fix(webfetch): apply aggressive truncation for webfetch outputs (#434)
Root cause: DEFAULT_TARGET_MAX_TOKENS (50k tokens ~200k chars) was too high for webfetch outputs. Web pages can be large but most content doesn't exceed this limit, so truncation rarely triggered. Changes: - Add WEBFETCH_MAX_TOKENS = 10k tokens (~40k chars) for web content - Introduce TOOL_SPECIFIC_MAX_TOKENS map for per-tool limits - webfetch/WebFetch now use aggressive 10k token limit - Other tools continue using default 50k token limit - Add comprehensive tests for truncation behavior Fixes #195 Co-authored-by: sisyphus-dev-ai <sisyphus-dev-ai@users.noreply.github.com>
This commit is contained in:
168
src/hooks/tool-output-truncator.test.ts
Normal file
168
src/hooks/tool-output-truncator.test.ts
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
import { describe, it, expect, beforeEach, mock, spyOn } from "bun:test"
|
||||||
|
import { createToolOutputTruncatorHook } from "./tool-output-truncator"
|
||||||
|
import * as dynamicTruncator from "../shared/dynamic-truncator"
|
||||||
|
|
||||||
|
describe("createToolOutputTruncatorHook", () => {
|
||||||
|
let hook: ReturnType<typeof createToolOutputTruncatorHook>
|
||||||
|
let truncateSpy: ReturnType<typeof spyOn>
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
truncateSpy = spyOn(dynamicTruncator, "createDynamicTruncator").mockReturnValue({
|
||||||
|
truncate: mock(async (_sessionID: string, output: string, options?: { targetMaxTokens?: number }) => ({
|
||||||
|
result: output,
|
||||||
|
truncated: false,
|
||||||
|
targetMaxTokens: options?.targetMaxTokens,
|
||||||
|
})),
|
||||||
|
getUsage: mock(async () => null),
|
||||||
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||||
|
})
|
||||||
|
hook = createToolOutputTruncatorHook({} as never)
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("tool.execute.after", () => {
|
||||||
|
const createInput = (tool: string) => ({
|
||||||
|
tool,
|
||||||
|
sessionID: "test-session",
|
||||||
|
callID: "test-call-id",
|
||||||
|
})
|
||||||
|
|
||||||
|
const createOutput = (outputText: string) => ({
|
||||||
|
title: "Result",
|
||||||
|
output: outputText,
|
||||||
|
metadata: {},
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given webfetch tool", () => {
|
||||||
|
describe("#when output is processed", () => {
|
||||||
|
it("#then should use aggressive truncation limit (10k tokens)", async () => {
|
||||||
|
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
||||||
|
result: "truncated",
|
||||||
|
truncated: true,
|
||||||
|
targetMaxTokens: options?.targetMaxTokens,
|
||||||
|
}))
|
||||||
|
truncateSpy.mockReturnValue({
|
||||||
|
truncate: truncateMock,
|
||||||
|
getUsage: mock(async () => null),
|
||||||
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||||
|
})
|
||||||
|
hook = createToolOutputTruncatorHook({} as never)
|
||||||
|
|
||||||
|
const input = createInput("webfetch")
|
||||||
|
const output = createOutput("large content")
|
||||||
|
|
||||||
|
await hook["tool.execute.after"](input, output)
|
||||||
|
|
||||||
|
expect(truncateMock).toHaveBeenCalledWith(
|
||||||
|
"test-session",
|
||||||
|
"large content",
|
||||||
|
{ targetMaxTokens: 10_000 }
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#when using WebFetch variant", () => {
|
||||||
|
it("#then should also use aggressive truncation limit", async () => {
|
||||||
|
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
||||||
|
result: "truncated",
|
||||||
|
truncated: true,
|
||||||
|
}))
|
||||||
|
truncateSpy.mockReturnValue({
|
||||||
|
truncate: truncateMock,
|
||||||
|
getUsage: mock(async () => null),
|
||||||
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||||
|
})
|
||||||
|
hook = createToolOutputTruncatorHook({} as never)
|
||||||
|
|
||||||
|
const input = createInput("WebFetch")
|
||||||
|
const output = createOutput("large content")
|
||||||
|
|
||||||
|
await hook["tool.execute.after"](input, output)
|
||||||
|
|
||||||
|
expect(truncateMock).toHaveBeenCalledWith(
|
||||||
|
"test-session",
|
||||||
|
"large content",
|
||||||
|
{ targetMaxTokens: 10_000 }
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given grep tool", () => {
|
||||||
|
describe("#when output is processed", () => {
|
||||||
|
it("#then should use default truncation limit (50k tokens)", async () => {
|
||||||
|
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
||||||
|
result: "truncated",
|
||||||
|
truncated: true,
|
||||||
|
}))
|
||||||
|
truncateSpy.mockReturnValue({
|
||||||
|
truncate: truncateMock,
|
||||||
|
getUsage: mock(async () => null),
|
||||||
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||||
|
})
|
||||||
|
hook = createToolOutputTruncatorHook({} as never)
|
||||||
|
|
||||||
|
const input = createInput("grep")
|
||||||
|
const output = createOutput("grep output")
|
||||||
|
|
||||||
|
await hook["tool.execute.after"](input, output)
|
||||||
|
|
||||||
|
expect(truncateMock).toHaveBeenCalledWith(
|
||||||
|
"test-session",
|
||||||
|
"grep output",
|
||||||
|
{ targetMaxTokens: 50_000 }
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given non-truncatable tool", () => {
|
||||||
|
describe("#when tool is not in TRUNCATABLE_TOOLS list", () => {
|
||||||
|
it("#then should not call truncator", async () => {
|
||||||
|
const truncateMock = mock(async () => ({
|
||||||
|
result: "truncated",
|
||||||
|
truncated: true,
|
||||||
|
}))
|
||||||
|
truncateSpy.mockReturnValue({
|
||||||
|
truncate: truncateMock,
|
||||||
|
getUsage: mock(async () => null),
|
||||||
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||||
|
})
|
||||||
|
hook = createToolOutputTruncatorHook({} as never)
|
||||||
|
|
||||||
|
const input = createInput("Read")
|
||||||
|
const output = createOutput("file content")
|
||||||
|
|
||||||
|
await hook["tool.execute.after"](input, output)
|
||||||
|
|
||||||
|
expect(truncateMock).not.toHaveBeenCalled()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given truncate_all_tool_outputs enabled", () => {
|
||||||
|
describe("#when any tool output is processed", () => {
|
||||||
|
it("#then should truncate non-listed tools too", async () => {
|
||||||
|
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
||||||
|
result: "truncated",
|
||||||
|
truncated: true,
|
||||||
|
}))
|
||||||
|
truncateSpy.mockReturnValue({
|
||||||
|
truncate: truncateMock,
|
||||||
|
getUsage: mock(async () => null),
|
||||||
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||||
|
})
|
||||||
|
hook = createToolOutputTruncatorHook({} as never, {
|
||||||
|
experimental: { truncate_all_tool_outputs: true },
|
||||||
|
})
|
||||||
|
|
||||||
|
const input = createInput("Read")
|
||||||
|
const output = createOutput("file content")
|
||||||
|
|
||||||
|
await hook["tool.execute.after"](input, output)
|
||||||
|
|
||||||
|
expect(truncateMock).toHaveBeenCalled()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -2,6 +2,9 @@ import type { PluginInput } from "@opencode-ai/plugin"
|
|||||||
import type { ExperimentalConfig } from "../config/schema"
|
import type { ExperimentalConfig } from "../config/schema"
|
||||||
import { createDynamicTruncator } from "../shared/dynamic-truncator"
|
import { createDynamicTruncator } from "../shared/dynamic-truncator"
|
||||||
|
|
||||||
|
const DEFAULT_MAX_TOKENS = 50_000 // ~200k chars
|
||||||
|
const WEBFETCH_MAX_TOKENS = 10_000 // ~40k chars - web pages need aggressive truncation
|
||||||
|
|
||||||
const TRUNCATABLE_TOOLS = [
|
const TRUNCATABLE_TOOLS = [
|
||||||
"grep",
|
"grep",
|
||||||
"Grep",
|
"Grep",
|
||||||
@@ -21,6 +24,11 @@ const TRUNCATABLE_TOOLS = [
|
|||||||
"WebFetch",
|
"WebFetch",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
const TOOL_SPECIFIC_MAX_TOKENS: Record<string, number> = {
|
||||||
|
webfetch: WEBFETCH_MAX_TOKENS,
|
||||||
|
WebFetch: WEBFETCH_MAX_TOKENS,
|
||||||
|
}
|
||||||
|
|
||||||
interface ToolOutputTruncatorOptions {
|
interface ToolOutputTruncatorOptions {
|
||||||
experimental?: ExperimentalConfig
|
experimental?: ExperimentalConfig
|
||||||
}
|
}
|
||||||
@@ -36,7 +44,12 @@ export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOu
|
|||||||
if (!truncateAll && !TRUNCATABLE_TOOLS.includes(input.tool)) return
|
if (!truncateAll && !TRUNCATABLE_TOOLS.includes(input.tool)) return
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const { result, truncated } = await truncator.truncate(input.sessionID, output.output)
|
const targetMaxTokens = TOOL_SPECIFIC_MAX_TOKENS[input.tool] ?? DEFAULT_MAX_TOKENS
|
||||||
|
const { result, truncated } = await truncator.truncate(
|
||||||
|
input.sessionID,
|
||||||
|
output.output,
|
||||||
|
{ targetMaxTokens }
|
||||||
|
)
|
||||||
if (truncated) {
|
if (truncated) {
|
||||||
output.output = result
|
output.output = result
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user