- Update preemptive-compaction hook to use 1M limit when env vars set - Update dynamic-truncator to use 1M limit for output truncation - Update context-window-monitor to use 1M limit for usage tracking Previously hardcoded 200k limits caused compaction at 140k tokens even with 1M context enabled. Now respects env vars consistently with base opencode implementation. Fixes compaction triggering too early with Claude Sonnet 4.5 1M context. Related to anomalyco/opencode#6660
194 lines
4.6 KiB
TypeScript
194 lines
4.6 KiB
TypeScript
import type { PluginInput } from "@opencode-ai/plugin";
|
|
|
|
const ANTHROPIC_ACTUAL_LIMIT =
|
|
process.env.ANTHROPIC_1M_CONTEXT === "true" ||
|
|
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
|
|
? 1_000_000
|
|
: 200_000;
|
|
const CHARS_PER_TOKEN_ESTIMATE = 4;
|
|
const DEFAULT_TARGET_MAX_TOKENS = 50_000;
|
|
|
|
interface AssistantMessageInfo {
|
|
role: "assistant";
|
|
tokens: {
|
|
input: number;
|
|
output: number;
|
|
reasoning: number;
|
|
cache: { read: number; write: number };
|
|
};
|
|
}
|
|
|
|
interface MessageWrapper {
|
|
info: { role: string } & Partial<AssistantMessageInfo>;
|
|
}
|
|
|
|
export interface TruncationResult {
|
|
result: string;
|
|
truncated: boolean;
|
|
removedCount?: number;
|
|
}
|
|
|
|
export interface TruncationOptions {
|
|
targetMaxTokens?: number;
|
|
preserveHeaderLines?: number;
|
|
contextWindowLimit?: number;
|
|
}
|
|
|
|
function estimateTokens(text: string): number {
|
|
return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE);
|
|
}
|
|
|
|
export function truncateToTokenLimit(
|
|
output: string,
|
|
maxTokens: number,
|
|
preserveHeaderLines = 3,
|
|
): TruncationResult {
|
|
const currentTokens = estimateTokens(output);
|
|
|
|
if (currentTokens <= maxTokens) {
|
|
return { result: output, truncated: false };
|
|
}
|
|
|
|
const lines = output.split("\n");
|
|
|
|
if (lines.length <= preserveHeaderLines) {
|
|
const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE;
|
|
return {
|
|
result:
|
|
output.slice(0, maxChars) +
|
|
"\n\n[Output truncated due to context window limit]",
|
|
truncated: true,
|
|
};
|
|
}
|
|
|
|
const headerLines = lines.slice(0, preserveHeaderLines);
|
|
const contentLines = lines.slice(preserveHeaderLines);
|
|
|
|
const headerText = headerLines.join("\n");
|
|
const headerTokens = estimateTokens(headerText);
|
|
const truncationMessageTokens = 50;
|
|
const availableTokens = maxTokens - headerTokens - truncationMessageTokens;
|
|
|
|
if (availableTokens <= 0) {
|
|
return {
|
|
result:
|
|
headerText + "\n\n[Content truncated due to context window limit]",
|
|
truncated: true,
|
|
removedCount: contentLines.length,
|
|
};
|
|
}
|
|
|
|
const resultLines: string[] = [];
|
|
let currentTokenCount = 0;
|
|
|
|
for (const line of contentLines) {
|
|
const lineTokens = estimateTokens(line + "\n");
|
|
if (currentTokenCount + lineTokens > availableTokens) {
|
|
break;
|
|
}
|
|
resultLines.push(line);
|
|
currentTokenCount += lineTokens;
|
|
}
|
|
|
|
const truncatedContent = [...headerLines, ...resultLines].join("\n");
|
|
const removedCount = contentLines.length - resultLines.length;
|
|
|
|
return {
|
|
result:
|
|
truncatedContent +
|
|
`\n\n[${removedCount} more lines truncated due to context window limit]`,
|
|
truncated: true,
|
|
removedCount,
|
|
};
|
|
}
|
|
|
|
export async function getContextWindowUsage(
|
|
ctx: PluginInput,
|
|
sessionID: string,
|
|
): Promise<{
|
|
usedTokens: number;
|
|
remainingTokens: number;
|
|
usagePercentage: number;
|
|
} | null> {
|
|
try {
|
|
const response = await ctx.client.session.messages({
|
|
path: { id: sessionID },
|
|
});
|
|
|
|
const messages = (response.data ?? response) as MessageWrapper[];
|
|
|
|
const assistantMessages = messages
|
|
.filter((m) => m.info.role === "assistant")
|
|
.map((m) => m.info as AssistantMessageInfo);
|
|
|
|
if (assistantMessages.length === 0) return null;
|
|
|
|
const lastAssistant = assistantMessages[assistantMessages.length - 1];
|
|
const lastTokens = lastAssistant.tokens;
|
|
const usedTokens =
|
|
(lastTokens?.input ?? 0) +
|
|
(lastTokens?.cache?.read ?? 0) +
|
|
(lastTokens?.output ?? 0);
|
|
const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens;
|
|
|
|
return {
|
|
usedTokens,
|
|
remainingTokens,
|
|
usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT,
|
|
};
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
export async function dynamicTruncate(
|
|
ctx: PluginInput,
|
|
sessionID: string,
|
|
output: string,
|
|
options: TruncationOptions = {},
|
|
): Promise<TruncationResult> {
|
|
const {
|
|
targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS,
|
|
preserveHeaderLines = 3,
|
|
} = options;
|
|
|
|
const usage = await getContextWindowUsage(ctx, sessionID);
|
|
|
|
if (!usage) {
|
|
// Fallback: apply conservative truncation when context usage unavailable
|
|
return truncateToTokenLimit(output, targetMaxTokens, preserveHeaderLines);
|
|
}
|
|
|
|
const maxOutputTokens = Math.min(
|
|
usage.remainingTokens * 0.5,
|
|
targetMaxTokens,
|
|
);
|
|
|
|
if (maxOutputTokens <= 0) {
|
|
return {
|
|
result: "[Output suppressed - context window exhausted]",
|
|
truncated: true,
|
|
};
|
|
}
|
|
|
|
return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines);
|
|
}
|
|
|
|
export function createDynamicTruncator(ctx: PluginInput) {
|
|
return {
|
|
truncate: (
|
|
sessionID: string,
|
|
output: string,
|
|
options?: TruncationOptions,
|
|
) => dynamicTruncate(ctx, sessionID, output, options),
|
|
|
|
getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID),
|
|
|
|
truncateSync: (
|
|
output: string,
|
|
maxTokens: number,
|
|
preserveHeaderLines?: number,
|
|
) => truncateToTokenLimit(output, maxTokens, preserveHeaderLines),
|
|
};
|
|
}
|