fix(webfetch): apply aggressive truncation for webfetch outputs (#434)

Root cause: DEFAULT_TARGET_MAX_TOKENS (50k tokens ~200k chars) was too high for webfetch outputs. Web pages can be large but most content doesn't exceed this limit, so truncation rarely triggered. Changes: - Add WEBFETCH_MAX_TOKENS = 10k tokens (~40k chars) for web content - Introduce TOOL_SPECIFIC_MAX_TOKENS map for per-tool limits - webfetch/WebFetch now use aggressive 10k token limit - Other tools continue using default 50k token limit - Add comprehensive tests for truncation behavior Fixes #195 Co-authored-by: sisyphus-dev-ai <sisyphus-dev-ai@users.noreply.github.com>
2026-01-03 12:09:34 +09:00
parent 8bc9d6a540
commit 48dc8298dd
2 changed files with 182 additions and 1 deletions
--- a/src/hooks/tool-output-truncator.test.ts
+++ b/src/hooks/tool-output-truncator.test.ts
@@ -0,0 +1,168 @@
+import { describe, it, expect, beforeEach, mock, spyOn } from "bun:test"
+import { createToolOutputTruncatorHook } from "./tool-output-truncator"
+import * as dynamicTruncator from "../shared/dynamic-truncator"
+
+describe("createToolOutputTruncatorHook", () => {
+  let hook: ReturnType<typeof createToolOutputTruncatorHook>
+  let truncateSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    truncateSpy = spyOn(dynamicTruncator, "createDynamicTruncator").mockReturnValue({
+      truncate: mock(async (_sessionID: string, output: string, options?: { targetMaxTokens?: number }) => ({
+        result: output,
+        truncated: false,
+        targetMaxTokens: options?.targetMaxTokens,
+      })),
+      getUsage: mock(async () => null),
+      truncateSync: mock(() => ({ result: "", truncated: false })),
+    })
+    hook = createToolOutputTruncatorHook({} as never)
+  })
+
+  describe("tool.execute.after", () => {
+    const createInput = (tool: string) => ({
+      tool,
+      sessionID: "test-session",
+      callID: "test-call-id",
+    })
+
+    const createOutput = (outputText: string) => ({
+      title: "Result",
+      output: outputText,
+      metadata: {},
+    })
+
+    describe("#given webfetch tool", () => {
+      describe("#when output is processed", () => {
+        it("#then should use aggressive truncation limit (10k tokens)", async () => {
+          const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
+            result: "truncated",
+            truncated: true,
+            targetMaxTokens: options?.targetMaxTokens,
+          }))
+          truncateSpy.mockReturnValue({
+            truncate: truncateMock,
+            getUsage: mock(async () => null),
+            truncateSync: mock(() => ({ result: "", truncated: false })),
+          })
+          hook = createToolOutputTruncatorHook({} as never)
+
+          const input = createInput("webfetch")
+          const output = createOutput("large content")
+
+          await hook["tool.execute.after"](input, output)
+
+          expect(truncateMock).toHaveBeenCalledWith(
+            "test-session",
+            "large content",
+            { targetMaxTokens: 10_000 }
+          )
+        })
+      })
+
+      describe("#when using WebFetch variant", () => {
+        it("#then should also use aggressive truncation limit", async () => {
+          const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
+            result: "truncated",
+            truncated: true,
+          }))
+          truncateSpy.mockReturnValue({
+            truncate: truncateMock,
+            getUsage: mock(async () => null),
+            truncateSync: mock(() => ({ result: "", truncated: false })),
+          })
+          hook = createToolOutputTruncatorHook({} as never)
+
+          const input = createInput("WebFetch")
+          const output = createOutput("large content")
+
+          await hook["tool.execute.after"](input, output)
+
+          expect(truncateMock).toHaveBeenCalledWith(
+            "test-session",
+            "large content",
+            { targetMaxTokens: 10_000 }
+          )
+        })
+      })
+    })
+
+    describe("#given grep tool", () => {
+      describe("#when output is processed", () => {
+        it("#then should use default truncation limit (50k tokens)", async () => {
+          const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
+            result: "truncated",
+            truncated: true,
+          }))
+          truncateSpy.mockReturnValue({
+            truncate: truncateMock,
+            getUsage: mock(async () => null),
+            truncateSync: mock(() => ({ result: "", truncated: false })),
+          })
+          hook = createToolOutputTruncatorHook({} as never)
+
+          const input = createInput("grep")
+          const output = createOutput("grep output")
+
+          await hook["tool.execute.after"](input, output)
+
+          expect(truncateMock).toHaveBeenCalledWith(
+            "test-session",
+            "grep output",
+            { targetMaxTokens: 50_000 }
+          )
+        })
+      })
+    })
+
+    describe("#given non-truncatable tool", () => {
+      describe("#when tool is not in TRUNCATABLE_TOOLS list", () => {
+        it("#then should not call truncator", async () => {
+          const truncateMock = mock(async () => ({
+            result: "truncated",
+            truncated: true,
+          }))
+          truncateSpy.mockReturnValue({
+            truncate: truncateMock,
+            getUsage: mock(async () => null),
+            truncateSync: mock(() => ({ result: "", truncated: false })),
+          })
+          hook = createToolOutputTruncatorHook({} as never)
+
+          const input = createInput("Read")
+          const output = createOutput("file content")
+
+          await hook["tool.execute.after"](input, output)
+
+          expect(truncateMock).not.toHaveBeenCalled()
+        })
+      })
+    })
+
+    describe("#given truncate_all_tool_outputs enabled", () => {
+      describe("#when any tool output is processed", () => {
+        it("#then should truncate non-listed tools too", async () => {
+          const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
+            result: "truncated",
+            truncated: true,
+          }))
+          truncateSpy.mockReturnValue({
+            truncate: truncateMock,
+            getUsage: mock(async () => null),
+            truncateSync: mock(() => ({ result: "", truncated: false })),
+          })
+          hook = createToolOutputTruncatorHook({} as never, {
+            experimental: { truncate_all_tool_outputs: true },
+          })
+
+          const input = createInput("Read")
+          const output = createOutput("file content")
+
+          await hook["tool.execute.after"](input, output)
+
+          expect(truncateMock).toHaveBeenCalled()
+        })
+      })
+    })
+  })
+})
--- a/src/hooks/tool-output-truncator.ts
+++ b/src/hooks/tool-output-truncator.ts
@@ -2,6 +2,9 @@ import type { PluginInput } from "@opencode-ai/plugin"
 import type { ExperimentalConfig } from "../config/schema"
 import { createDynamicTruncator } from "../shared/dynamic-truncator"

+const DEFAULT_MAX_TOKENS = 50_000 // ~200k chars
+const WEBFETCH_MAX_TOKENS = 10_000 // ~40k chars - web pages need aggressive truncation
+
 const TRUNCATABLE_TOOLS = [
  "grep",
  "Grep",
@@ -21,6 +24,11 @@ const TRUNCATABLE_TOOLS = [
  "WebFetch",
 ]

+const TOOL_SPECIFIC_MAX_TOKENS: Record<string, number> = {
+  webfetch: WEBFETCH_MAX_TOKENS,
+  WebFetch: WEBFETCH_MAX_TOKENS,
+}
+
 interface ToolOutputTruncatorOptions {
  experimental?: ExperimentalConfig
 }
@@ -36,7 +44,12 @@ export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOu
    if (!truncateAll && !TRUNCATABLE_TOOLS.includes(input.tool)) return

    try {
-      const { result, truncated } = await truncator.truncate(input.sessionID, output.output)
+      const targetMaxTokens = TOOL_SPECIFIC_MAX_TOKENS[input.tool] ?? DEFAULT_MAX_TOKENS
+      const { result, truncated } = await truncator.truncate(
+        input.sessionID,
+        output.output,
+        { targetMaxTokens }
+      )
      if (truncated) {
        output.output = result
      }