fix(dynamic-truncator): apply fallback truncation when context usage unavailable (#268)

When getContextWindowUsage returns null (no assistant messages yet, API
failure, or first request in session), the truncator was returning
untruncated output. This caused context overflow crashes on early
requests or when usage lookup failed.

Now applies conservative truncation (50k tokens) as fallback, preventing
prompt-too-long errors that crash sessions.
This commit is contained in:
Harsha Vardhan
2025-12-28 02:22:02 +11:00
committed by GitHub
parent 6bc9a31ee4
commit 90d43dc292

View File

@@ -1,167 +1,189 @@
import type { PluginInput } from "@opencode-ai/plugin" import type { PluginInput } from "@opencode-ai/plugin";
const ANTHROPIC_ACTUAL_LIMIT = 200_000 const ANTHROPIC_ACTUAL_LIMIT = 200_000;
const CHARS_PER_TOKEN_ESTIMATE = 4 const CHARS_PER_TOKEN_ESTIMATE = 4;
const DEFAULT_TARGET_MAX_TOKENS = 50_000 const DEFAULT_TARGET_MAX_TOKENS = 50_000;
interface AssistantMessageInfo { interface AssistantMessageInfo {
role: "assistant" role: "assistant";
tokens: { tokens: {
input: number input: number;
output: number output: number;
reasoning: number reasoning: number;
cache: { read: number; write: number } cache: { read: number; write: number };
} };
} }
interface MessageWrapper { interface MessageWrapper {
info: { role: string } & Partial<AssistantMessageInfo> info: { role: string } & Partial<AssistantMessageInfo>;
} }
export interface TruncationResult { export interface TruncationResult {
result: string result: string;
truncated: boolean truncated: boolean;
removedCount?: number removedCount?: number;
} }
export interface TruncationOptions { export interface TruncationOptions {
targetMaxTokens?: number targetMaxTokens?: number;
preserveHeaderLines?: number preserveHeaderLines?: number;
contextWindowLimit?: number contextWindowLimit?: number;
} }
function estimateTokens(text: string): number { function estimateTokens(text: string): number {
return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE) return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE);
} }
export function truncateToTokenLimit( export function truncateToTokenLimit(
output: string, output: string,
maxTokens: number, maxTokens: number,
preserveHeaderLines = 3 preserveHeaderLines = 3,
): TruncationResult { ): TruncationResult {
const currentTokens = estimateTokens(output) const currentTokens = estimateTokens(output);
if (currentTokens <= maxTokens) { if (currentTokens <= maxTokens) {
return { result: output, truncated: false } return { result: output, truncated: false };
} }
const lines = output.split("\n") const lines = output.split("\n");
if (lines.length <= preserveHeaderLines) { if (lines.length <= preserveHeaderLines) {
const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE;
return { return {
result: output.slice(0, maxChars) + "\n\n[Output truncated due to context window limit]", result:
truncated: true, output.slice(0, maxChars) +
} "\n\n[Output truncated due to context window limit]",
} truncated: true,
};
}
const headerLines = lines.slice(0, preserveHeaderLines) const headerLines = lines.slice(0, preserveHeaderLines);
const contentLines = lines.slice(preserveHeaderLines) const contentLines = lines.slice(preserveHeaderLines);
const headerText = headerLines.join("\n") const headerText = headerLines.join("\n");
const headerTokens = estimateTokens(headerText) const headerTokens = estimateTokens(headerText);
const truncationMessageTokens = 50 const truncationMessageTokens = 50;
const availableTokens = maxTokens - headerTokens - truncationMessageTokens const availableTokens = maxTokens - headerTokens - truncationMessageTokens;
if (availableTokens <= 0) { if (availableTokens <= 0) {
return { return {
result: headerText + "\n\n[Content truncated due to context window limit]", result:
truncated: true, headerText + "\n\n[Content truncated due to context window limit]",
removedCount: contentLines.length, truncated: true,
} removedCount: contentLines.length,
} };
}
const resultLines: string[] = [] const resultLines: string[] = [];
let currentTokenCount = 0 let currentTokenCount = 0;
for (const line of contentLines) { for (const line of contentLines) {
const lineTokens = estimateTokens(line + "\n") const lineTokens = estimateTokens(line + "\n");
if (currentTokenCount + lineTokens > availableTokens) { if (currentTokenCount + lineTokens > availableTokens) {
break break;
} }
resultLines.push(line) resultLines.push(line);
currentTokenCount += lineTokens currentTokenCount += lineTokens;
} }
const truncatedContent = [...headerLines, ...resultLines].join("\n") const truncatedContent = [...headerLines, ...resultLines].join("\n");
const removedCount = contentLines.length - resultLines.length const removedCount = contentLines.length - resultLines.length;
return { return {
result: truncatedContent + `\n\n[${removedCount} more lines truncated due to context window limit]`, result:
truncated: true, truncatedContent +
removedCount, `\n\n[${removedCount} more lines truncated due to context window limit]`,
} truncated: true,
removedCount,
};
} }
export async function getContextWindowUsage( export async function getContextWindowUsage(
ctx: PluginInput, ctx: PluginInput,
sessionID: string sessionID: string,
): Promise<{ usedTokens: number; remainingTokens: number; usagePercentage: number } | null> { ): Promise<{
try { usedTokens: number;
const response = await ctx.client.session.messages({ remainingTokens: number;
path: { id: sessionID }, usagePercentage: number;
}) } | null> {
try {
const response = await ctx.client.session.messages({
path: { id: sessionID },
});
const messages = (response.data ?? response) as MessageWrapper[] const messages = (response.data ?? response) as MessageWrapper[];
const assistantMessages = messages const assistantMessages = messages
.filter((m) => m.info.role === "assistant") .filter((m) => m.info.role === "assistant")
.map((m) => m.info as AssistantMessageInfo) .map((m) => m.info as AssistantMessageInfo);
if (assistantMessages.length === 0) return null if (assistantMessages.length === 0) return null;
const lastAssistant = assistantMessages[assistantMessages.length - 1] const lastAssistant = assistantMessages[assistantMessages.length - 1];
const lastTokens = lastAssistant.tokens const lastTokens = lastAssistant.tokens;
const usedTokens = const usedTokens =
(lastTokens?.input ?? 0) + (lastTokens?.input ?? 0) +
(lastTokens?.cache?.read ?? 0) + (lastTokens?.cache?.read ?? 0) +
(lastTokens?.output ?? 0) (lastTokens?.output ?? 0);
const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens;
return { return {
usedTokens, usedTokens,
remainingTokens, remainingTokens,
usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT, usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT,
} };
} catch { } catch {
return null return null;
} }
} }
export async function dynamicTruncate( export async function dynamicTruncate(
ctx: PluginInput, ctx: PluginInput,
sessionID: string, sessionID: string,
output: string, output: string,
options: TruncationOptions = {} options: TruncationOptions = {},
): Promise<TruncationResult> { ): Promise<TruncationResult> {
const { targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS, preserveHeaderLines = 3 } = options const {
targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS,
preserveHeaderLines = 3,
} = options;
const usage = await getContextWindowUsage(ctx, sessionID) const usage = await getContextWindowUsage(ctx, sessionID);
if (!usage) { if (!usage) {
return { result: output, truncated: false } // Fallback: apply conservative truncation when context usage unavailable
} return truncateToTokenLimit(output, targetMaxTokens, preserveHeaderLines);
}
const maxOutputTokens = Math.min(usage.remainingTokens * 0.5, targetMaxTokens) const maxOutputTokens = Math.min(
usage.remainingTokens * 0.5,
targetMaxTokens,
);
if (maxOutputTokens <= 0) { if (maxOutputTokens <= 0) {
return { return {
result: "[Output suppressed - context window exhausted]", result: "[Output suppressed - context window exhausted]",
truncated: true, truncated: true,
} };
} }
return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines) return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines);
} }
export function createDynamicTruncator(ctx: PluginInput) { export function createDynamicTruncator(ctx: PluginInput) {
return { return {
truncate: (sessionID: string, output: string, options?: TruncationOptions) => truncate: (
dynamicTruncate(ctx, sessionID, output, options), sessionID: string,
output: string,
options?: TruncationOptions,
) => dynamicTruncate(ctx, sessionID, output, options),
getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID), getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID),
truncateSync: (output: string, maxTokens: number, preserveHeaderLines?: number) => truncateSync: (
truncateToTokenLimit(output, maxTokens, preserveHeaderLines), output: string,
} maxTokens: number,
preserveHeaderLines?: number,
) => truncateToTokenLimit(output, maxTokens, preserveHeaderLines),
};
} }