Files
oh-my-opencode-free-fork/src/shared/dynamic-truncator.ts
Steven Vo 99c7df5640 fix: respect ANTHROPIC_1M_CONTEXT and VERTEX_ANTHROPIC_1M_CONTEXT env vars (#450)
- Update preemptive-compaction hook to use 1M limit when env vars set
- Update dynamic-truncator to use 1M limit for output truncation
- Update context-window-monitor to use 1M limit for usage tracking

Previously hardcoded 200k limits caused compaction at 140k tokens even
with 1M context enabled. Now respects env vars consistently with base
opencode implementation.

Fixes compaction triggering too early with Claude Sonnet 4.5 1M context.

Related to anomalyco/opencode#6660
2026-01-03 21:06:06 +09:00

194 lines
4.6 KiB
TypeScript

import type { PluginInput } from "@opencode-ai/plugin";
const ANTHROPIC_ACTUAL_LIMIT =
process.env.ANTHROPIC_1M_CONTEXT === "true" ||
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
? 1_000_000
: 200_000;
const CHARS_PER_TOKEN_ESTIMATE = 4;
const DEFAULT_TARGET_MAX_TOKENS = 50_000;
interface AssistantMessageInfo {
role: "assistant";
tokens: {
input: number;
output: number;
reasoning: number;
cache: { read: number; write: number };
};
}
interface MessageWrapper {
info: { role: string } & Partial<AssistantMessageInfo>;
}
export interface TruncationResult {
result: string;
truncated: boolean;
removedCount?: number;
}
export interface TruncationOptions {
targetMaxTokens?: number;
preserveHeaderLines?: number;
contextWindowLimit?: number;
}
function estimateTokens(text: string): number {
return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE);
}
export function truncateToTokenLimit(
output: string,
maxTokens: number,
preserveHeaderLines = 3,
): TruncationResult {
const currentTokens = estimateTokens(output);
if (currentTokens <= maxTokens) {
return { result: output, truncated: false };
}
const lines = output.split("\n");
if (lines.length <= preserveHeaderLines) {
const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE;
return {
result:
output.slice(0, maxChars) +
"\n\n[Output truncated due to context window limit]",
truncated: true,
};
}
const headerLines = lines.slice(0, preserveHeaderLines);
const contentLines = lines.slice(preserveHeaderLines);
const headerText = headerLines.join("\n");
const headerTokens = estimateTokens(headerText);
const truncationMessageTokens = 50;
const availableTokens = maxTokens - headerTokens - truncationMessageTokens;
if (availableTokens <= 0) {
return {
result:
headerText + "\n\n[Content truncated due to context window limit]",
truncated: true,
removedCount: contentLines.length,
};
}
const resultLines: string[] = [];
let currentTokenCount = 0;
for (const line of contentLines) {
const lineTokens = estimateTokens(line + "\n");
if (currentTokenCount + lineTokens > availableTokens) {
break;
}
resultLines.push(line);
currentTokenCount += lineTokens;
}
const truncatedContent = [...headerLines, ...resultLines].join("\n");
const removedCount = contentLines.length - resultLines.length;
return {
result:
truncatedContent +
`\n\n[${removedCount} more lines truncated due to context window limit]`,
truncated: true,
removedCount,
};
}
export async function getContextWindowUsage(
ctx: PluginInput,
sessionID: string,
): Promise<{
usedTokens: number;
remainingTokens: number;
usagePercentage: number;
} | null> {
try {
const response = await ctx.client.session.messages({
path: { id: sessionID },
});
const messages = (response.data ?? response) as MessageWrapper[];
const assistantMessages = messages
.filter((m) => m.info.role === "assistant")
.map((m) => m.info as AssistantMessageInfo);
if (assistantMessages.length === 0) return null;
const lastAssistant = assistantMessages[assistantMessages.length - 1];
const lastTokens = lastAssistant.tokens;
const usedTokens =
(lastTokens?.input ?? 0) +
(lastTokens?.cache?.read ?? 0) +
(lastTokens?.output ?? 0);
const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens;
return {
usedTokens,
remainingTokens,
usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT,
};
} catch {
return null;
}
}
export async function dynamicTruncate(
ctx: PluginInput,
sessionID: string,
output: string,
options: TruncationOptions = {},
): Promise<TruncationResult> {
const {
targetMaxTokens = DEFAULT_TARGET_MAX_TOKENS,
preserveHeaderLines = 3,
} = options;
const usage = await getContextWindowUsage(ctx, sessionID);
if (!usage) {
// Fallback: apply conservative truncation when context usage unavailable
return truncateToTokenLimit(output, targetMaxTokens, preserveHeaderLines);
}
const maxOutputTokens = Math.min(
usage.remainingTokens * 0.5,
targetMaxTokens,
);
if (maxOutputTokens <= 0) {
return {
result: "[Output suppressed - context window exhausted]",
truncated: true,
};
}
return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines);
}
export function createDynamicTruncator(ctx: PluginInput) {
return {
truncate: (
sessionID: string,
output: string,
options?: TruncationOptions,
) => dynamicTruncate(ctx, sessionID, output, options),
getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID),
truncateSync: (
output: string,
maxTokens: number,
preserveHeaderLines?: number,
) => truncateToTokenLimit(output, maxTokens, preserveHeaderLines),
};
}