fix(dynamic-truncator): apply fallback truncation when context usage unavailable (#268)
When getContextWindowUsage returns null (no assistant messages yet, API failure, or first request in session), the truncator was returning untruncated output. This caused context overflow crashes on early requests or when usage lookup failed. Now applies conservative truncation (50k tokens) as fallback, preventing prompt-too-long errors that crash sessions.
This commit is contained in:
@@ -1,167 +1,189 @@
|
|||||||
import type { PluginInput } from "@opencode-ai/plugin"
|
import type { PluginInput } from "@opencode-ai/plugin";
|
||||||
|
|
||||||
const ANTHROPIC_ACTUAL_LIMIT = 200_000
|
const ANTHROPIC_ACTUAL_LIMIT = 200_000;
|
||||||
const CHARS_PER_TOKEN_ESTIMATE = 4
|
const CHARS_PER_TOKEN_ESTIMATE = 4;
|
||||||
const DEFAULT_TARGET_MAX_TOKENS = 50_000
|
const DEFAULT_TARGET_MAX_TOKENS = 50_000;
|
||||||
|
|
||||||
interface AssistantMessageInfo {
|
interface AssistantMessageInfo {
|
||||||
role: "assistant"
|
role: "assistant";
|
||||||
tokens: {
|
tokens: {
|
||||||
input: number
|
input: number;
|
||||||
output: number
|
output: number;
|
||||||
reasoning: number
|
reasoning: number;
|
||||||
cache: { read: number; write: number }
|
cache: { read: number; write: number };
|
||||||
}
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
interface MessageWrapper {
|
interface MessageWrapper {
|
||||||
info: { role: string } & Partial<AssistantMessageInfo>
|
info: { role: string } & Partial<AssistantMessageInfo>;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface TruncationResult {
|
export interface TruncationResult {
|
||||||
result: string
|
result: string;
|
||||||
truncated: boolean
|
truncated: boolean;
|
||||||
removedCount?: number
|
removedCount?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface TruncationOptions {
|
export interface TruncationOptions {
|
||||||
targetMaxTokens?: number
|
targetMaxTokens?: number;
|
||||||
preserveHeaderLines?: number
|
preserveHeaderLines?: number;
|
||||||
contextWindowLimit?: number
|
contextWindowLimit?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
function estimateTokens(text: string): number {
|
function estimateTokens(text: string): number {
|
||||||
return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE)
|
return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function truncateToTokenLimit(
|
export function truncateToTokenLimit(
|
||||||
output: string,
|
output: string,
|
||||||
maxTokens: number,
|
maxTokens: number,
|
||||||
preserveHeaderLines = 3
|
preserveHeaderLines = 3,
|
||||||
): TruncationResult {
|
): TruncationResult {
|
||||||
const currentTokens = estimateTokens(output)
|
const currentTokens = estimateTokens(output);
|
||||||
|
|
||||||
if (currentTokens <= maxTokens) {
|
if (currentTokens <= maxTokens) {
|
||||||
return { result: output, truncated: false }
|
return { result: output, truncated: false };
|
||||||
}
|
}
|
||||||
|
|
||||||
const lines = output.split("\n")
|
const lines = output.split("\n");
|
||||||
|
|
||||||
if (lines.length <= preserveHeaderLines) {
|
if (lines.length <= preserveHeaderLines) {
|
||||||
const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE
|
const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE;
|
||||||
return {
|
return {
|
||||||
result: output.slice(0, maxChars) + "\n\n[Output truncated due to context window limit]",
|
result:
|
||||||
truncated: true,
|
output.slice(0, maxChars) +
|
||||||
}
|
"\n\n[Output truncated due to context window limit]",
|
||||||
}
|
truncated: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
const headerLines = lines.slice(0, preserveHeaderLines)
|
const headerLines = lines.slice(0, preserveHeaderLines);
|
||||||
const contentLines = lines.slice(preserveHeaderLines)
|
const contentLines = lines.slice(preserveHeaderLines);
|
||||||
|
|
||||||
const headerText = headerLines.join("\n")
|
const headerText = headerLines.join("\n");
|
||||||
const headerTokens = estimateTokens(headerText)
|
const headerTokens = estimateTokens(headerText);
|
||||||
const truncationMessageTokens = 50
|
const truncationMessageTokens = 50;
|
||||||
const availableTokens = maxTokens - headerTokens - truncationMessageTokens
|
const availableTokens = maxTokens - headerTokens - truncationMessageTokens;
|
||||||
|
|
||||||
if (availableTokens <= 0) {
|
if (availableTokens <= 0) {
|
||||||
return {
|
return {
|
||||||
result: headerText + "\n\n[Content truncated due to context window limit]",
|
result:
|
||||||
truncated: true,
|
headerText + "\n\n[Content truncated due to context window limit]",
|
||||||
removedCount: contentLines.length,
|
truncated: true,
|
||||||
}
|
removedCount: contentLines.length,
|
||||||
}
|
};
|
||||||
|
}
|
||||||
|
|
||||||
const resultLines: string[] = []
|
const resultLines: string[] = [];
|
||||||
let currentTokenCount = 0
|
let currentTokenCount = 0;
|
||||||
|
|
||||||
for (const line of contentLines) {
|
for (const line of contentLines) {
|
||||||
const lineTokens = estimateTokens(line + "\n")
|
const lineTokens = estimateTokens(line + "\n");
|
||||||
if (currentTokenCount + lineTokens > availableTokens) {
|
if (currentTokenCount + lineTokens > availableTokens) {
|
||||||
break
|
break;
|
||||||
}
|
}
|
||||||
resultLines.push(line)
|
resultLines.push(line);
|
||||||
currentTokenCount += lineTokens
|
currentTokenCount += lineTokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
const truncatedContent = [...headerLines, ...resultLines].join("\n")
|
const truncatedContent = [...headerLines, ...resultLines].join("\n");
|
||||||
const removedCount = contentLines.length - resultLines.length
|
const removedCount = contentLines.length - resultLines.length;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
result: truncatedContent + `\n\n[${removedCount} more lines truncated due to context window limit]`,
|
result:
|
||||||
truncated: true,
|
truncatedContent +
|
||||||
removedCount,
|
`\n\n[${removedCount} more lines truncated due to context window limit]`,
|
||||||
}
|
truncated: true,
|
||||||
|
removedCount,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getContextWindowUsage(
|
export async function getContextWindowUsage(
|
||||||
ctx: PluginInput,
|
ctx: PluginInput,
|
||||||
sessionID: string
|
sessionID: string,
|
||||||
): Promise<{ usedTokens: number; remainingTokens: number; usagePercentage: number } | null> {
|
): Promise<{
|
||||||
try {
|
usedTokens: number;
|
||||||
const response = await ctx.client.session.messages({
|
remainingTokens: number;
|
||||||
path: { id: sessionID },
|
usagePercentage: number;
|
||||||
})
|
} | null> {
|
||||||
|
try {
|
||||||
|
const response = await ctx.client.session.messages({
|
||||||
|
path: { id: sessionID },
|
||||||
|
});
|
||||||
|
|
||||||
const messages = (response.data ?? response) as MessageWrapper[]
|
const messages = (response.data ?? response) as MessageWrapper[];
|
||||||
|
|
||||||
const assistantMessages = messages
|
const assistantMessages = messages
|
||||||
.filter((m) => m.info.role === "assistant")
|
.filter((m) => m.info.role === "assistant")
|
||||||
.map((m) => m.info as AssistantMessageInfo)
|
.map((m) => m.info as AssistantMessageInfo);
|
||||||
|
|
||||||
if (assistantMessages.length === 0) return null
|
if (assistantMessages.length === 0) return null;
|
||||||
|
|
||||||
const lastAssistant = assistantMessages[assistantMessages.length - 1]
|
const lastAssistant = assistantMessages[assistantMessages.length - 1];
|
||||||
const lastTokens = lastAssistant.tokens
|
const lastTokens = lastAssistant.tokens;
|
||||||
const usedTokens =
|
const usedTokens =
|
||||||
(lastTokens?.input ?? 0) +
|
(lastTokens?.input ?? 0) +
|
||||||
(lastTokens?.cache?.read ?? 0) +
|
(lastTokens?.cache?.read ?? 0) +
|
||||||
(lastTokens?.output ?? 0)
|
(lastTokens?.output ?? 0);
|
||||||
const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens
|
const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
usedTokens,
|
usedTokens,
|
||||||
remainingTokens,
|
remainingTokens,
|
||||||
usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT,
|
usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT,
|
||||||
}
|
};
|
||||||
} catch {
|
} catch {
|
||||||
return null
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function dynamicTruncate(
|
export async function dynamicTruncate(
|
||||||
ctx: PluginInput,
|
ctx: PluginInput,
|
||||||
sessionID: string,
|
sessionID: string,
|
||||||
output: string,
|
output: string,
|
||||||
options: TruncationOptions = {}
|
options: TruncationOptions = {},
|
||||||
): Promise<TruncationResult> {
|
): Promise<TruncationResult> {
|
||||||
const { targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS, preserveHeaderLines = 3 } = options
|
const {
|
||||||
|
targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS,
|
||||||
|
preserveHeaderLines = 3,
|
||||||
|
} = options;
|
||||||
|
|
||||||
const usage = await getContextWindowUsage(ctx, sessionID)
|
const usage = await getContextWindowUsage(ctx, sessionID);
|
||||||
|
|
||||||
if (!usage) {
|
if (!usage) {
|
||||||
return { result: output, truncated: false }
|
// Fallback: apply conservative truncation when context usage unavailable
|
||||||
}
|
return truncateToTokenLimit(output, targetMaxTokens, preserveHeaderLines);
|
||||||
|
}
|
||||||
|
|
||||||
const maxOutputTokens = Math.min(usage.remainingTokens * 0.5, targetMaxTokens)
|
const maxOutputTokens = Math.min(
|
||||||
|
usage.remainingTokens * 0.5,
|
||||||
|
targetMaxTokens,
|
||||||
|
);
|
||||||
|
|
||||||
if (maxOutputTokens <= 0) {
|
if (maxOutputTokens <= 0) {
|
||||||
return {
|
return {
|
||||||
result: "[Output suppressed - context window exhausted]",
|
result: "[Output suppressed - context window exhausted]",
|
||||||
truncated: true,
|
truncated: true,
|
||||||
}
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines)
|
return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createDynamicTruncator(ctx: PluginInput) {
|
export function createDynamicTruncator(ctx: PluginInput) {
|
||||||
return {
|
return {
|
||||||
truncate: (sessionID: string, output: string, options?: TruncationOptions) =>
|
truncate: (
|
||||||
dynamicTruncate(ctx, sessionID, output, options),
|
sessionID: string,
|
||||||
|
output: string,
|
||||||
|
options?: TruncationOptions,
|
||||||
|
) => dynamicTruncate(ctx, sessionID, output, options),
|
||||||
|
|
||||||
getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID),
|
getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID),
|
||||||
|
|
||||||
truncateSync: (output: string, maxTokens: number, preserveHeaderLines?: number) =>
|
truncateSync: (
|
||||||
truncateToTokenLimit(output, maxTokens, preserveHeaderLines),
|
output: string,
|
||||||
}
|
maxTokens: number,
|
||||||
|
preserveHeaderLines?: number,
|
||||||
|
) => truncateToTokenLimit(output, maxTokens, preserveHeaderLines),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user