diff --git a/src/shared/dynamic-truncator.ts b/src/shared/dynamic-truncator.ts index 5288f18..84a4e6e 100644 --- a/src/shared/dynamic-truncator.ts +++ b/src/shared/dynamic-truncator.ts @@ -1,167 +1,189 @@ -import type { PluginInput } from "@opencode-ai/plugin" +import type { PluginInput } from "@opencode-ai/plugin"; -const ANTHROPIC_ACTUAL_LIMIT = 200_000 -const CHARS_PER_TOKEN_ESTIMATE = 4 -const DEFAULT_TARGET_MAX_TOKENS = 50_000 +const ANTHROPIC_ACTUAL_LIMIT = 200_000; +const CHARS_PER_TOKEN_ESTIMATE = 4; +const DEFAULT_TARGET_MAX_TOKENS = 50_000; interface AssistantMessageInfo { - role: "assistant" - tokens: { - input: number - output: number - reasoning: number - cache: { read: number; write: number } - } + role: "assistant"; + tokens: { + input: number; + output: number; + reasoning: number; + cache: { read: number; write: number }; + }; } interface MessageWrapper { - info: { role: string } & Partial + info: { role: string } & Partial; } export interface TruncationResult { - result: string - truncated: boolean - removedCount?: number + result: string; + truncated: boolean; + removedCount?: number; } export interface TruncationOptions { - targetMaxTokens?: number - preserveHeaderLines?: number - contextWindowLimit?: number + targetMaxTokens?: number; + preserveHeaderLines?: number; + contextWindowLimit?: number; } function estimateTokens(text: string): number { - return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE) + return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE); } export function truncateToTokenLimit( - output: string, - maxTokens: number, - preserveHeaderLines = 3 + output: string, + maxTokens: number, + preserveHeaderLines = 3, ): TruncationResult { - const currentTokens = estimateTokens(output) + const currentTokens = estimateTokens(output); - if (currentTokens <= maxTokens) { - return { result: output, truncated: false } - } + if (currentTokens <= maxTokens) { + return { result: output, truncated: false }; + } - const lines = output.split("\n") + const lines = output.split("\n"); - if (lines.length <= preserveHeaderLines) { - const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE - return { - result: output.slice(0, maxChars) + "\n\n[Output truncated due to context window limit]", - truncated: true, - } - } + if (lines.length <= preserveHeaderLines) { + const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE; + return { + result: + output.slice(0, maxChars) + + "\n\n[Output truncated due to context window limit]", + truncated: true, + }; + } - const headerLines = lines.slice(0, preserveHeaderLines) - const contentLines = lines.slice(preserveHeaderLines) + const headerLines = lines.slice(0, preserveHeaderLines); + const contentLines = lines.slice(preserveHeaderLines); - const headerText = headerLines.join("\n") - const headerTokens = estimateTokens(headerText) - const truncationMessageTokens = 50 - const availableTokens = maxTokens - headerTokens - truncationMessageTokens + const headerText = headerLines.join("\n"); + const headerTokens = estimateTokens(headerText); + const truncationMessageTokens = 50; + const availableTokens = maxTokens - headerTokens - truncationMessageTokens; - if (availableTokens <= 0) { - return { - result: headerText + "\n\n[Content truncated due to context window limit]", - truncated: true, - removedCount: contentLines.length, - } - } + if (availableTokens <= 0) { + return { + result: + headerText + "\n\n[Content truncated due to context window limit]", + truncated: true, + removedCount: contentLines.length, + }; + } - const resultLines: string[] = [] - let currentTokenCount = 0 + const resultLines: string[] = []; + let currentTokenCount = 0; - for (const line of contentLines) { - const lineTokens = estimateTokens(line + "\n") - if (currentTokenCount + lineTokens > availableTokens) { - break - } - resultLines.push(line) - currentTokenCount += lineTokens - } + for (const line of contentLines) { + const lineTokens = estimateTokens(line + "\n"); + if (currentTokenCount + lineTokens > availableTokens) { + break; + } + resultLines.push(line); + currentTokenCount += lineTokens; + } - const truncatedContent = [...headerLines, ...resultLines].join("\n") - const removedCount = contentLines.length - resultLines.length + const truncatedContent = [...headerLines, ...resultLines].join("\n"); + const removedCount = contentLines.length - resultLines.length; - return { - result: truncatedContent + `\n\n[${removedCount} more lines truncated due to context window limit]`, - truncated: true, - removedCount, - } + return { + result: + truncatedContent + + `\n\n[${removedCount} more lines truncated due to context window limit]`, + truncated: true, + removedCount, + }; } export async function getContextWindowUsage( - ctx: PluginInput, - sessionID: string -): Promise<{ usedTokens: number; remainingTokens: number; usagePercentage: number } | null> { - try { - const response = await ctx.client.session.messages({ - path: { id: sessionID }, - }) + ctx: PluginInput, + sessionID: string, +): Promise<{ + usedTokens: number; + remainingTokens: number; + usagePercentage: number; +} | null> { + try { + const response = await ctx.client.session.messages({ + path: { id: sessionID }, + }); - const messages = (response.data ?? response) as MessageWrapper[] + const messages = (response.data ?? response) as MessageWrapper[]; - const assistantMessages = messages - .filter((m) => m.info.role === "assistant") - .map((m) => m.info as AssistantMessageInfo) + const assistantMessages = messages + .filter((m) => m.info.role === "assistant") + .map((m) => m.info as AssistantMessageInfo); - if (assistantMessages.length === 0) return null + if (assistantMessages.length === 0) return null; - const lastAssistant = assistantMessages[assistantMessages.length - 1] - const lastTokens = lastAssistant.tokens - const usedTokens = - (lastTokens?.input ?? 0) + - (lastTokens?.cache?.read ?? 0) + - (lastTokens?.output ?? 0) - const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens + const lastAssistant = assistantMessages[assistantMessages.length - 1]; + const lastTokens = lastAssistant.tokens; + const usedTokens = + (lastTokens?.input ?? 0) + + (lastTokens?.cache?.read ?? 0) + + (lastTokens?.output ?? 0); + const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens; - return { - usedTokens, - remainingTokens, - usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT, - } - } catch { - return null - } + return { + usedTokens, + remainingTokens, + usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT, + }; + } catch { + return null; + } } export async function dynamicTruncate( - ctx: PluginInput, - sessionID: string, - output: string, - options: TruncationOptions = {} + ctx: PluginInput, + sessionID: string, + output: string, + options: TruncationOptions = {}, ): Promise { - const { targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS, preserveHeaderLines = 3 } = options + const { + targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS, + preserveHeaderLines = 3, + } = options; - const usage = await getContextWindowUsage(ctx, sessionID) + const usage = await getContextWindowUsage(ctx, sessionID); - if (!usage) { - return { result: output, truncated: false } - } + if (!usage) { + // Fallback: apply conservative truncation when context usage unavailable + return truncateToTokenLimit(output, targetMaxTokens, preserveHeaderLines); + } - const maxOutputTokens = Math.min(usage.remainingTokens * 0.5, targetMaxTokens) + const maxOutputTokens = Math.min( + usage.remainingTokens * 0.5, + targetMaxTokens, + ); - if (maxOutputTokens <= 0) { - return { - result: "[Output suppressed - context window exhausted]", - truncated: true, - } - } + if (maxOutputTokens <= 0) { + return { + result: "[Output suppressed - context window exhausted]", + truncated: true, + }; + } - return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines) + return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines); } export function createDynamicTruncator(ctx: PluginInput) { - return { - truncate: (sessionID: string, output: string, options?: TruncationOptions) => - dynamicTruncate(ctx, sessionID, output, options), + return { + truncate: ( + sessionID: string, + output: string, + options?: TruncationOptions, + ) => dynamicTruncate(ctx, sessionID, output, options), - getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID), + getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID), - truncateSync: (output: string, maxTokens: number, preserveHeaderLines?: number) => - truncateToTokenLimit(output, maxTokens, preserveHeaderLines), - } + truncateSync: ( + output: string, + maxTokens: number, + preserveHeaderLines?: number, + ) => truncateToTokenLimit(output, maxTokens, preserveHeaderLines), + }; }