diff --git a/src/hooks/context-window-monitor.ts b/src/hooks/context-window-monitor.ts index 11e477f..ce8d756 100644 --- a/src/hooks/context-window-monitor.ts +++ b/src/hooks/context-window-monitor.ts @@ -52,11 +52,10 @@ export function createContextWindowMonitorHook(ctx: PluginInput) { const lastAssistant = assistantMessages[assistantMessages.length - 1] if (lastAssistant.providerID !== "anthropic") return - const totalInputTokens = assistantMessages.reduce((sum, m) => { - const inputTokens = m.tokens?.input ?? 0 - const cacheReadTokens = m.tokens?.cache?.read ?? 0 - return sum + inputTokens + cacheReadTokens - }, 0) + // Use only the last assistant message's input tokens + // This reflects the ACTUAL current context window usage (post-compaction) + const lastTokens = lastAssistant.tokens + const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) const actualUsagePercentage = totalInputTokens / ANTHROPIC_ACTUAL_LIMIT diff --git a/src/hooks/grep-output-truncator.ts b/src/hooks/grep-output-truncator.ts index 7b5405a..ec8d1f3 100644 --- a/src/hooks/grep-output-truncator.ts +++ b/src/hooks/grep-output-truncator.ts @@ -98,11 +98,11 @@ export function createGrepOutputTruncatorHook(ctx: PluginInput) { if (assistantMessages.length === 0) return - const totalInputTokens = assistantMessages.reduce((sum, m) => { - const inputTokens = m.tokens?.input ?? 0 - const cacheReadTokens = m.tokens?.cache?.read ?? 0 - return sum + inputTokens + cacheReadTokens - }, 0) + // Use only the last assistant message's input tokens + // This reflects the ACTUAL current context window usage (post-compaction) + const lastAssistant = assistantMessages[assistantMessages.length - 1] + const lastTokens = lastAssistant.tokens + const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - totalInputTokens