fix: include output tokens in context window usage calculation

Include output tokens from last response in getContextWindowUsage calculation.
Output tokens become part of next request's input (conversation history), so
they must be counted to avoid overestimating remainingTokens. This aligns with
preemptive-compaction's calculation which already includes output tokens correctly.

🤖 GENERATED WITH ASSISTANCE OF [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)
This commit is contained in:
YeonGyu-Kim
2025-12-26 16:11:21 +09:00
parent 1b427570c8
commit 7c24f657e7

View File

@@ -112,7 +112,10 @@ export async function getContextWindowUsage(
const lastAssistant = assistantMessages[assistantMessages.length - 1] const lastAssistant = assistantMessages[assistantMessages.length - 1]
const lastTokens = lastAssistant.tokens const lastTokens = lastAssistant.tokens
const usedTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) const usedTokens =
(lastTokens?.input ?? 0) +
(lastTokens?.cache?.read ?? 0) +
(lastTokens?.output ?? 0)
const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens
return { return {