fix(dynamic-truncator): apply fallback truncation when context usage unavailable (#268)

When getContextWindowUsage returns null (no assistant messages yet, API
failure, or first request in session), the truncator was returning
untruncated output. This caused context overflow crashes on early
requests or when usage lookup failed.

Now applies conservative truncation (50k tokens) as fallback, preventing
prompt-too-long errors that crash sessions.
This commit is contained in:
Harsha Vardhan
2025-12-28 02:22:02 +11:00
committed by GitHub
parent 6bc9a31ee4
commit 90d43dc292

View File

@@ -1,167 +1,189 @@
import type { PluginInput } from "@opencode-ai/plugin"
import type { PluginInput } from "@opencode-ai/plugin";
const ANTHROPIC_ACTUAL_LIMIT = 200_000
const CHARS_PER_TOKEN_ESTIMATE = 4
const DEFAULT_TARGET_MAX_TOKENS = 50_000
const ANTHROPIC_ACTUAL_LIMIT = 200_000;
const CHARS_PER_TOKEN_ESTIMATE = 4;
const DEFAULT_TARGET_MAX_TOKENS = 50_000;
interface AssistantMessageInfo {
role: "assistant"
tokens: {
input: number
output: number
reasoning: number
cache: { read: number; write: number }
}
role: "assistant";
tokens: {
input: number;
output: number;
reasoning: number;
cache: { read: number; write: number };
};
}
interface MessageWrapper {
info: { role: string } & Partial<AssistantMessageInfo>
info: { role: string } & Partial<AssistantMessageInfo>;
}
export interface TruncationResult {
result: string
truncated: boolean
removedCount?: number
result: string;
truncated: boolean;
removedCount?: number;
}
export interface TruncationOptions {
targetMaxTokens?: number
preserveHeaderLines?: number
contextWindowLimit?: number
targetMaxTokens?: number;
preserveHeaderLines?: number;
contextWindowLimit?: number;
}
function estimateTokens(text: string): number {
return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE)
return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE);
}
export function truncateToTokenLimit(
output: string,
maxTokens: number,
preserveHeaderLines = 3
output: string,
maxTokens: number,
preserveHeaderLines = 3,
): TruncationResult {
const currentTokens = estimateTokens(output)
const currentTokens = estimateTokens(output);
if (currentTokens <= maxTokens) {
return { result: output, truncated: false }
}
if (currentTokens <= maxTokens) {
return { result: output, truncated: false };
}
const lines = output.split("\n")
const lines = output.split("\n");
if (lines.length <= preserveHeaderLines) {
const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE
return {
result: output.slice(0, maxChars) + "\n\n[Output truncated due to context window limit]",
truncated: true,
}
}
if (lines.length <= preserveHeaderLines) {
const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE;
return {
result:
output.slice(0, maxChars) +
"\n\n[Output truncated due to context window limit]",
truncated: true,
};
}
const headerLines = lines.slice(0, preserveHeaderLines)
const contentLines = lines.slice(preserveHeaderLines)
const headerLines = lines.slice(0, preserveHeaderLines);
const contentLines = lines.slice(preserveHeaderLines);
const headerText = headerLines.join("\n")
const headerTokens = estimateTokens(headerText)
const truncationMessageTokens = 50
const availableTokens = maxTokens - headerTokens - truncationMessageTokens
const headerText = headerLines.join("\n");
const headerTokens = estimateTokens(headerText);
const truncationMessageTokens = 50;
const availableTokens = maxTokens - headerTokens - truncationMessageTokens;
if (availableTokens <= 0) {
return {
result: headerText + "\n\n[Content truncated due to context window limit]",
truncated: true,
removedCount: contentLines.length,
}
}
if (availableTokens <= 0) {
return {
result:
headerText + "\n\n[Content truncated due to context window limit]",
truncated: true,
removedCount: contentLines.length,
};
}
const resultLines: string[] = []
let currentTokenCount = 0
const resultLines: string[] = [];
let currentTokenCount = 0;
for (const line of contentLines) {
const lineTokens = estimateTokens(line + "\n")
if (currentTokenCount + lineTokens > availableTokens) {
break
}
resultLines.push(line)
currentTokenCount += lineTokens
}
for (const line of contentLines) {
const lineTokens = estimateTokens(line + "\n");
if (currentTokenCount + lineTokens > availableTokens) {
break;
}
resultLines.push(line);
currentTokenCount += lineTokens;
}
const truncatedContent = [...headerLines, ...resultLines].join("\n")
const removedCount = contentLines.length - resultLines.length
const truncatedContent = [...headerLines, ...resultLines].join("\n");
const removedCount = contentLines.length - resultLines.length;
return {
result: truncatedContent + `\n\n[${removedCount} more lines truncated due to context window limit]`,
truncated: true,
removedCount,
}
return {
result:
truncatedContent +
`\n\n[${removedCount} more lines truncated due to context window limit]`,
truncated: true,
removedCount,
};
}
export async function getContextWindowUsage(
ctx: PluginInput,
sessionID: string
): Promise<{ usedTokens: number; remainingTokens: number; usagePercentage: number } | null> {
try {
const response = await ctx.client.session.messages({
path: { id: sessionID },
})
ctx: PluginInput,
sessionID: string,
): Promise<{
usedTokens: number;
remainingTokens: number;
usagePercentage: number;
} | null> {
try {
const response = await ctx.client.session.messages({
path: { id: sessionID },
});
const messages = (response.data ?? response) as MessageWrapper[]
const messages = (response.data ?? response) as MessageWrapper[];
const assistantMessages = messages
.filter((m) => m.info.role === "assistant")
.map((m) => m.info as AssistantMessageInfo)
const assistantMessages = messages
.filter((m) => m.info.role === "assistant")
.map((m) => m.info as AssistantMessageInfo);
if (assistantMessages.length === 0) return null
if (assistantMessages.length === 0) return null;
const lastAssistant = assistantMessages[assistantMessages.length - 1]
const lastTokens = lastAssistant.tokens
const usedTokens =
(lastTokens?.input ?? 0) +
(lastTokens?.cache?.read ?? 0) +
(lastTokens?.output ?? 0)
const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens
const lastAssistant = assistantMessages[assistantMessages.length - 1];
const lastTokens = lastAssistant.tokens;
const usedTokens =
(lastTokens?.input ?? 0) +
(lastTokens?.cache?.read ?? 0) +
(lastTokens?.output ?? 0);
const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens;
return {
usedTokens,
remainingTokens,
usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT,
}
} catch {
return null
}
return {
usedTokens,
remainingTokens,
usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT,
};
} catch {
return null;
}
}
export async function dynamicTruncate(
ctx: PluginInput,
sessionID: string,
output: string,
options: TruncationOptions = {}
ctx: PluginInput,
sessionID: string,
output: string,
options: TruncationOptions = {},
): Promise<TruncationResult> {
const { targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS, preserveHeaderLines = 3 } = options
const {
targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS,
preserveHeaderLines = 3,
} = options;
const usage = await getContextWindowUsage(ctx, sessionID)
const usage = await getContextWindowUsage(ctx, sessionID);
if (!usage) {
return { result: output, truncated: false }
}
if (!usage) {
// Fallback: apply conservative truncation when context usage unavailable
return truncateToTokenLimit(output, targetMaxTokens, preserveHeaderLines);
}
const maxOutputTokens = Math.min(usage.remainingTokens * 0.5, targetMaxTokens)
const maxOutputTokens = Math.min(
usage.remainingTokens * 0.5,
targetMaxTokens,
);
if (maxOutputTokens <= 0) {
return {
result: "[Output suppressed - context window exhausted]",
truncated: true,
}
}
if (maxOutputTokens <= 0) {
return {
result: "[Output suppressed - context window exhausted]",
truncated: true,
};
}
return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines)
return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines);
}
export function createDynamicTruncator(ctx: PluginInput) {
return {
truncate: (sessionID: string, output: string, options?: TruncationOptions) =>
dynamicTruncate(ctx, sessionID, output, options),
return {
truncate: (
sessionID: string,
output: string,
options?: TruncationOptions,
) => dynamicTruncate(ctx, sessionID, output, options),
getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID),
getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID),
truncateSync: (output: string, maxTokens: number, preserveHeaderLines?: number) =>
truncateToTokenLimit(output, maxTokens, preserveHeaderLines),
}
truncateSync: (
output: string,
maxTokens: number,
preserveHeaderLines?: number,
) => truncateToTokenLimit(output, maxTokens, preserveHeaderLines),
};
}