From 90d43dc2924a696479bfa56e82c6f889da63ceb7 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan <80092815+harshav167@users.noreply.github.com> Date: Sun, 28 Dec 2025 02:22:02 +1100 Subject: [PATCH] fix(dynamic-truncator): apply fallback truncation when context usage unavailable (#268) When getContextWindowUsage returns null (no assistant messages yet, API failure, or first request in session), the truncator was returning untruncated output. This caused context overflow crashes on early requests or when usage lookup failed. Now applies conservative truncation (50k tokens) as fallback, preventing prompt-too-long errors that crash sessions. --- src/shared/dynamic-truncator.ts | 252 +++++++++++++++++--------------- 1 file changed, 137 insertions(+), 115 deletions(-) diff --git a/src/shared/dynamic-truncator.ts b/src/shared/dynamic-truncator.ts index 5288f18..84a4e6e 100644 --- a/src/shared/dynamic-truncator.ts +++ b/src/shared/dynamic-truncator.ts @@ -1,167 +1,189 @@ -import type { PluginInput } from "@opencode-ai/plugin" +import type { PluginInput } from "@opencode-ai/plugin"; -const ANTHROPIC_ACTUAL_LIMIT = 200_000 -const CHARS_PER_TOKEN_ESTIMATE = 4 -const DEFAULT_TARGET_MAX_TOKENS = 50_000 +const ANTHROPIC_ACTUAL_LIMIT = 200_000; +const CHARS_PER_TOKEN_ESTIMATE = 4; +const DEFAULT_TARGET_MAX_TOKENS = 50_000; interface AssistantMessageInfo { - role: "assistant" - tokens: { - input: number - output: number - reasoning: number - cache: { read: number; write: number } - } + role: "assistant"; + tokens: { + input: number; + output: number; + reasoning: number; + cache: { read: number; write: number }; + }; } interface MessageWrapper { - info: { role: string } & Partial + info: { role: string } & Partial; } export interface TruncationResult { - result: string - truncated: boolean - removedCount?: number + result: string; + truncated: boolean; + removedCount?: number; } export interface TruncationOptions { - targetMaxTokens?: number - preserveHeaderLines?: number - contextWindowLimit?: number + targetMaxTokens?: number; + preserveHeaderLines?: number; + contextWindowLimit?: number; } function estimateTokens(text: string): number { - return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE) + return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE); } export function truncateToTokenLimit( - output: string, - maxTokens: number, - preserveHeaderLines = 3 + output: string, + maxTokens: number, + preserveHeaderLines = 3, ): TruncationResult { - const currentTokens = estimateTokens(output) + const currentTokens = estimateTokens(output); - if (currentTokens <= maxTokens) { - return { result: output, truncated: false } - } + if (currentTokens <= maxTokens) { + return { result: output, truncated: false }; + } - const lines = output.split("\n") + const lines = output.split("\n"); - if (lines.length <= preserveHeaderLines) { - const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE - return { - result: output.slice(0, maxChars) + "\n\n[Output truncated due to context window limit]", - truncated: true, - } - } + if (lines.length <= preserveHeaderLines) { + const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE; + return { + result: + output.slice(0, maxChars) + + "\n\n[Output truncated due to context window limit]", + truncated: true, + }; + } - const headerLines = lines.slice(0, preserveHeaderLines) - const contentLines = lines.slice(preserveHeaderLines) + const headerLines = lines.slice(0, preserveHeaderLines); + const contentLines = lines.slice(preserveHeaderLines); - const headerText = headerLines.join("\n") - const headerTokens = estimateTokens(headerText) - const truncationMessageTokens = 50 - const availableTokens = maxTokens - headerTokens - truncationMessageTokens + const headerText = headerLines.join("\n"); + const headerTokens = estimateTokens(headerText); + const truncationMessageTokens = 50; + const availableTokens = maxTokens - headerTokens - truncationMessageTokens; - if (availableTokens <= 0) { - return { - result: headerText + "\n\n[Content truncated due to context window limit]", - truncated: true, - removedCount: contentLines.length, - } - } + if (availableTokens <= 0) { + return { + result: + headerText + "\n\n[Content truncated due to context window limit]", + truncated: true, + removedCount: contentLines.length, + }; + } - const resultLines: string[] = [] - let currentTokenCount = 0 + const resultLines: string[] = []; + let currentTokenCount = 0; - for (const line of contentLines) { - const lineTokens = estimateTokens(line + "\n") - if (currentTokenCount + lineTokens > availableTokens) { - break - } - resultLines.push(line) - currentTokenCount += lineTokens - } + for (const line of contentLines) { + const lineTokens = estimateTokens(line + "\n"); + if (currentTokenCount + lineTokens > availableTokens) { + break; + } + resultLines.push(line); + currentTokenCount += lineTokens; + } - const truncatedContent = [...headerLines, ...resultLines].join("\n") - const removedCount = contentLines.length - resultLines.length + const truncatedContent = [...headerLines, ...resultLines].join("\n"); + const removedCount = contentLines.length - resultLines.length; - return { - result: truncatedContent + `\n\n[${removedCount} more lines truncated due to context window limit]`, - truncated: true, - removedCount, - } + return { + result: + truncatedContent + + `\n\n[${removedCount} more lines truncated due to context window limit]`, + truncated: true, + removedCount, + }; } export async function getContextWindowUsage( - ctx: PluginInput, - sessionID: string -): Promise<{ usedTokens: number; remainingTokens: number; usagePercentage: number } | null> { - try { - const response = await ctx.client.session.messages({ - path: { id: sessionID }, - }) + ctx: PluginInput, + sessionID: string, +): Promise<{ + usedTokens: number; + remainingTokens: number; + usagePercentage: number; +} | null> { + try { + const response = await ctx.client.session.messages({ + path: { id: sessionID }, + }); - const messages = (response.data ?? response) as MessageWrapper[] + const messages = (response.data ?? response) as MessageWrapper[]; - const assistantMessages = messages - .filter((m) => m.info.role === "assistant") - .map((m) => m.info as AssistantMessageInfo) + const assistantMessages = messages + .filter((m) => m.info.role === "assistant") + .map((m) => m.info as AssistantMessageInfo); - if (assistantMessages.length === 0) return null + if (assistantMessages.length === 0) return null; - const lastAssistant = assistantMessages[assistantMessages.length - 1] - const lastTokens = lastAssistant.tokens - const usedTokens = - (lastTokens?.input ?? 0) + - (lastTokens?.cache?.read ?? 0) + - (lastTokens?.output ?? 0) - const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens + const lastAssistant = assistantMessages[assistantMessages.length - 1]; + const lastTokens = lastAssistant.tokens; + const usedTokens = + (lastTokens?.input ?? 0) + + (lastTokens?.cache?.read ?? 0) + + (lastTokens?.output ?? 0); + const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens; - return { - usedTokens, - remainingTokens, - usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT, - } - } catch { - return null - } + return { + usedTokens, + remainingTokens, + usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT, + }; + } catch { + return null; + } } export async function dynamicTruncate( - ctx: PluginInput, - sessionID: string, - output: string, - options: TruncationOptions = {} + ctx: PluginInput, + sessionID: string, + output: string, + options: TruncationOptions = {}, ): Promise { - const { targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS, preserveHeaderLines = 3 } = options + const { + targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS, + preserveHeaderLines = 3, + } = options; - const usage = await getContextWindowUsage(ctx, sessionID) + const usage = await getContextWindowUsage(ctx, sessionID); - if (!usage) { - return { result: output, truncated: false } - } + if (!usage) { + // Fallback: apply conservative truncation when context usage unavailable + return truncateToTokenLimit(output, targetMaxTokens, preserveHeaderLines); + } - const maxOutputTokens = Math.min(usage.remainingTokens * 0.5, targetMaxTokens) + const maxOutputTokens = Math.min( + usage.remainingTokens * 0.5, + targetMaxTokens, + ); - if (maxOutputTokens <= 0) { - return { - result: "[Output suppressed - context window exhausted]", - truncated: true, - } - } + if (maxOutputTokens <= 0) { + return { + result: "[Output suppressed - context window exhausted]", + truncated: true, + }; + } - return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines) + return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines); } export function createDynamicTruncator(ctx: PluginInput) { - return { - truncate: (sessionID: string, output: string, options?: TruncationOptions) => - dynamicTruncate(ctx, sessionID, output, options), + return { + truncate: ( + sessionID: string, + output: string, + options?: TruncationOptions, + ) => dynamicTruncate(ctx, sessionID, output, options), - getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID), + getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID), - truncateSync: (output: string, maxTokens: number, preserveHeaderLines?: number) => - truncateToTokenLimit(output, maxTokens, preserveHeaderLines), - } + truncateSync: ( + output: string, + maxTokens: number, + preserveHeaderLines?: number, + ) => truncateToTokenLimit(output, maxTokens, preserveHeaderLines), + }; }