fix(webfetch): apply aggressive truncation for webfetch outputs (#434)

Root cause: DEFAULT_TARGET_MAX_TOKENS (50k tokens ~200k chars) was too high
for webfetch outputs. Web pages can be large but most content doesn't exceed
this limit, so truncation rarely triggered.

Changes:
- Add WEBFETCH_MAX_TOKENS = 10k tokens (~40k chars) for web content
- Introduce TOOL_SPECIFIC_MAX_TOKENS map for per-tool limits
- webfetch/WebFetch now use aggressive 10k token limit
- Other tools continue using default 50k token limit
- Add comprehensive tests for truncation behavior

Fixes #195

Co-authored-by: sisyphus-dev-ai <sisyphus-dev-ai@users.noreply.github.com>
This commit is contained in:
Sisyphus
2026-01-03 12:09:34 +09:00
committed by GitHub
parent 8bc9d6a540
commit 48dc8298dd
2 changed files with 182 additions and 1 deletions

View File

@@ -0,0 +1,168 @@
import { describe, it, expect, beforeEach, mock, spyOn } from "bun:test"
import { createToolOutputTruncatorHook } from "./tool-output-truncator"
import * as dynamicTruncator from "../shared/dynamic-truncator"
describe("createToolOutputTruncatorHook", () => {
let hook: ReturnType<typeof createToolOutputTruncatorHook>
let truncateSpy: ReturnType<typeof spyOn>
beforeEach(() => {
truncateSpy = spyOn(dynamicTruncator, "createDynamicTruncator").mockReturnValue({
truncate: mock(async (_sessionID: string, output: string, options?: { targetMaxTokens?: number }) => ({
result: output,
truncated: false,
targetMaxTokens: options?.targetMaxTokens,
})),
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never)
})
describe("tool.execute.after", () => {
const createInput = (tool: string) => ({
tool,
sessionID: "test-session",
callID: "test-call-id",
})
const createOutput = (outputText: string) => ({
title: "Result",
output: outputText,
metadata: {},
})
describe("#given webfetch tool", () => {
describe("#when output is processed", () => {
it("#then should use aggressive truncation limit (10k tokens)", async () => {
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
result: "truncated",
truncated: true,
targetMaxTokens: options?.targetMaxTokens,
}))
truncateSpy.mockReturnValue({
truncate: truncateMock,
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never)
const input = createInput("webfetch")
const output = createOutput("large content")
await hook["tool.execute.after"](input, output)
expect(truncateMock).toHaveBeenCalledWith(
"test-session",
"large content",
{ targetMaxTokens: 10_000 }
)
})
})
describe("#when using WebFetch variant", () => {
it("#then should also use aggressive truncation limit", async () => {
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
result: "truncated",
truncated: true,
}))
truncateSpy.mockReturnValue({
truncate: truncateMock,
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never)
const input = createInput("WebFetch")
const output = createOutput("large content")
await hook["tool.execute.after"](input, output)
expect(truncateMock).toHaveBeenCalledWith(
"test-session",
"large content",
{ targetMaxTokens: 10_000 }
)
})
})
})
describe("#given grep tool", () => {
describe("#when output is processed", () => {
it("#then should use default truncation limit (50k tokens)", async () => {
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
result: "truncated",
truncated: true,
}))
truncateSpy.mockReturnValue({
truncate: truncateMock,
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never)
const input = createInput("grep")
const output = createOutput("grep output")
await hook["tool.execute.after"](input, output)
expect(truncateMock).toHaveBeenCalledWith(
"test-session",
"grep output",
{ targetMaxTokens: 50_000 }
)
})
})
})
describe("#given non-truncatable tool", () => {
describe("#when tool is not in TRUNCATABLE_TOOLS list", () => {
it("#then should not call truncator", async () => {
const truncateMock = mock(async () => ({
result: "truncated",
truncated: true,
}))
truncateSpy.mockReturnValue({
truncate: truncateMock,
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never)
const input = createInput("Read")
const output = createOutput("file content")
await hook["tool.execute.after"](input, output)
expect(truncateMock).not.toHaveBeenCalled()
})
})
})
describe("#given truncate_all_tool_outputs enabled", () => {
describe("#when any tool output is processed", () => {
it("#then should truncate non-listed tools too", async () => {
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
result: "truncated",
truncated: true,
}))
truncateSpy.mockReturnValue({
truncate: truncateMock,
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never, {
experimental: { truncate_all_tool_outputs: true },
})
const input = createInput("Read")
const output = createOutput("file content")
await hook["tool.execute.after"](input, output)
expect(truncateMock).toHaveBeenCalled()
})
})
})
})
})

View File

@@ -2,6 +2,9 @@ import type { PluginInput } from "@opencode-ai/plugin"
import type { ExperimentalConfig } from "../config/schema"
import { createDynamicTruncator } from "../shared/dynamic-truncator"
const DEFAULT_MAX_TOKENS = 50_000 // ~200k chars
const WEBFETCH_MAX_TOKENS = 10_000 // ~40k chars - web pages need aggressive truncation
const TRUNCATABLE_TOOLS = [
"grep",
"Grep",
@@ -21,6 +24,11 @@ const TRUNCATABLE_TOOLS = [
"WebFetch",
]
const TOOL_SPECIFIC_MAX_TOKENS: Record<string, number> = {
webfetch: WEBFETCH_MAX_TOKENS,
WebFetch: WEBFETCH_MAX_TOKENS,
}
interface ToolOutputTruncatorOptions {
experimental?: ExperimentalConfig
}
@@ -36,7 +44,12 @@ export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOu
if (!truncateAll && !TRUNCATABLE_TOOLS.includes(input.tool)) return
try {
const { result, truncated } = await truncator.truncate(input.sessionID, output.output)
const targetMaxTokens = TOOL_SPECIFIC_MAX_TOKENS[input.tool] ?? DEFAULT_MAX_TOKENS
const { result, truncated } = await truncator.truncate(
input.sessionID,
output.output,
{ targetMaxTokens }
)
if (truncated) {
output.output = result
}