From 48dc8298ddd4cedc7e16331a587dbf739cbdb0a6 Mon Sep 17 00:00:00 2001 From: Sisyphus Date: Sat, 3 Jan 2026 12:09:34 +0900 Subject: [PATCH] fix(webfetch): apply aggressive truncation for webfetch outputs (#434) Root cause: DEFAULT_TARGET_MAX_TOKENS (50k tokens ~200k chars) was too high for webfetch outputs. Web pages can be large but most content doesn't exceed this limit, so truncation rarely triggered. Changes: - Add WEBFETCH_MAX_TOKENS = 10k tokens (~40k chars) for web content - Introduce TOOL_SPECIFIC_MAX_TOKENS map for per-tool limits - webfetch/WebFetch now use aggressive 10k token limit - Other tools continue using default 50k token limit - Add comprehensive tests for truncation behavior Fixes #195 Co-authored-by: sisyphus-dev-ai --- src/hooks/tool-output-truncator.test.ts | 168 ++++++++++++++++++++++++ src/hooks/tool-output-truncator.ts | 15 ++- 2 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 src/hooks/tool-output-truncator.test.ts diff --git a/src/hooks/tool-output-truncator.test.ts b/src/hooks/tool-output-truncator.test.ts new file mode 100644 index 0000000..e38a1c7 --- /dev/null +++ b/src/hooks/tool-output-truncator.test.ts @@ -0,0 +1,168 @@ +import { describe, it, expect, beforeEach, mock, spyOn } from "bun:test" +import { createToolOutputTruncatorHook } from "./tool-output-truncator" +import * as dynamicTruncator from "../shared/dynamic-truncator" + +describe("createToolOutputTruncatorHook", () => { + let hook: ReturnType + let truncateSpy: ReturnType + + beforeEach(() => { + truncateSpy = spyOn(dynamicTruncator, "createDynamicTruncator").mockReturnValue({ + truncate: mock(async (_sessionID: string, output: string, options?: { targetMaxTokens?: number }) => ({ + result: output, + truncated: false, + targetMaxTokens: options?.targetMaxTokens, + })), + getUsage: mock(async () => null), + truncateSync: mock(() => ({ result: "", truncated: false })), + }) + hook = createToolOutputTruncatorHook({} as never) + }) + + describe("tool.execute.after", () => { + const createInput = (tool: string) => ({ + tool, + sessionID: "test-session", + callID: "test-call-id", + }) + + const createOutput = (outputText: string) => ({ + title: "Result", + output: outputText, + metadata: {}, + }) + + describe("#given webfetch tool", () => { + describe("#when output is processed", () => { + it("#then should use aggressive truncation limit (10k tokens)", async () => { + const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({ + result: "truncated", + truncated: true, + targetMaxTokens: options?.targetMaxTokens, + })) + truncateSpy.mockReturnValue({ + truncate: truncateMock, + getUsage: mock(async () => null), + truncateSync: mock(() => ({ result: "", truncated: false })), + }) + hook = createToolOutputTruncatorHook({} as never) + + const input = createInput("webfetch") + const output = createOutput("large content") + + await hook["tool.execute.after"](input, output) + + expect(truncateMock).toHaveBeenCalledWith( + "test-session", + "large content", + { targetMaxTokens: 10_000 } + ) + }) + }) + + describe("#when using WebFetch variant", () => { + it("#then should also use aggressive truncation limit", async () => { + const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({ + result: "truncated", + truncated: true, + })) + truncateSpy.mockReturnValue({ + truncate: truncateMock, + getUsage: mock(async () => null), + truncateSync: mock(() => ({ result: "", truncated: false })), + }) + hook = createToolOutputTruncatorHook({} as never) + + const input = createInput("WebFetch") + const output = createOutput("large content") + + await hook["tool.execute.after"](input, output) + + expect(truncateMock).toHaveBeenCalledWith( + "test-session", + "large content", + { targetMaxTokens: 10_000 } + ) + }) + }) + }) + + describe("#given grep tool", () => { + describe("#when output is processed", () => { + it("#then should use default truncation limit (50k tokens)", async () => { + const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({ + result: "truncated", + truncated: true, + })) + truncateSpy.mockReturnValue({ + truncate: truncateMock, + getUsage: mock(async () => null), + truncateSync: mock(() => ({ result: "", truncated: false })), + }) + hook = createToolOutputTruncatorHook({} as never) + + const input = createInput("grep") + const output = createOutput("grep output") + + await hook["tool.execute.after"](input, output) + + expect(truncateMock).toHaveBeenCalledWith( + "test-session", + "grep output", + { targetMaxTokens: 50_000 } + ) + }) + }) + }) + + describe("#given non-truncatable tool", () => { + describe("#when tool is not in TRUNCATABLE_TOOLS list", () => { + it("#then should not call truncator", async () => { + const truncateMock = mock(async () => ({ + result: "truncated", + truncated: true, + })) + truncateSpy.mockReturnValue({ + truncate: truncateMock, + getUsage: mock(async () => null), + truncateSync: mock(() => ({ result: "", truncated: false })), + }) + hook = createToolOutputTruncatorHook({} as never) + + const input = createInput("Read") + const output = createOutput("file content") + + await hook["tool.execute.after"](input, output) + + expect(truncateMock).not.toHaveBeenCalled() + }) + }) + }) + + describe("#given truncate_all_tool_outputs enabled", () => { + describe("#when any tool output is processed", () => { + it("#then should truncate non-listed tools too", async () => { + const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({ + result: "truncated", + truncated: true, + })) + truncateSpy.mockReturnValue({ + truncate: truncateMock, + getUsage: mock(async () => null), + truncateSync: mock(() => ({ result: "", truncated: false })), + }) + hook = createToolOutputTruncatorHook({} as never, { + experimental: { truncate_all_tool_outputs: true }, + }) + + const input = createInput("Read") + const output = createOutput("file content") + + await hook["tool.execute.after"](input, output) + + expect(truncateMock).toHaveBeenCalled() + }) + }) + }) + }) +}) diff --git a/src/hooks/tool-output-truncator.ts b/src/hooks/tool-output-truncator.ts index 7fb2eb8..09713d6 100644 --- a/src/hooks/tool-output-truncator.ts +++ b/src/hooks/tool-output-truncator.ts @@ -2,6 +2,9 @@ import type { PluginInput } from "@opencode-ai/plugin" import type { ExperimentalConfig } from "../config/schema" import { createDynamicTruncator } from "../shared/dynamic-truncator" +const DEFAULT_MAX_TOKENS = 50_000 // ~200k chars +const WEBFETCH_MAX_TOKENS = 10_000 // ~40k chars - web pages need aggressive truncation + const TRUNCATABLE_TOOLS = [ "grep", "Grep", @@ -21,6 +24,11 @@ const TRUNCATABLE_TOOLS = [ "WebFetch", ] +const TOOL_SPECIFIC_MAX_TOKENS: Record = { + webfetch: WEBFETCH_MAX_TOKENS, + WebFetch: WEBFETCH_MAX_TOKENS, +} + interface ToolOutputTruncatorOptions { experimental?: ExperimentalConfig } @@ -36,7 +44,12 @@ export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOu if (!truncateAll && !TRUNCATABLE_TOOLS.includes(input.tool)) return try { - const { result, truncated } = await truncator.truncate(input.sessionID, output.output) + const targetMaxTokens = TOOL_SPECIFIC_MAX_TOKENS[input.tool] ?? DEFAULT_MAX_TOKENS + const { result, truncated } = await truncator.truncate( + input.sessionID, + output.output, + { targetMaxTokens } + ) if (truncated) { output.output = result }