fix(webfetch): apply aggressive truncation for webfetch outputs (#434)
Root cause: DEFAULT_TARGET_MAX_TOKENS (50k tokens ~200k chars) was too high for webfetch outputs. Web pages can be large but most content doesn't exceed this limit, so truncation rarely triggered. Changes: - Add WEBFETCH_MAX_TOKENS = 10k tokens (~40k chars) for web content - Introduce TOOL_SPECIFIC_MAX_TOKENS map for per-tool limits - webfetch/WebFetch now use aggressive 10k token limit - Other tools continue using default 50k token limit - Add comprehensive tests for truncation behavior Fixes #195 Co-authored-by: sisyphus-dev-ai <sisyphus-dev-ai@users.noreply.github.com>
This commit is contained in:
168
src/hooks/tool-output-truncator.test.ts
Normal file
168
src/hooks/tool-output-truncator.test.ts
Normal file
@@ -0,0 +1,168 @@
|
||||
import { describe, it, expect, beforeEach, mock, spyOn } from "bun:test"
|
||||
import { createToolOutputTruncatorHook } from "./tool-output-truncator"
|
||||
import * as dynamicTruncator from "../shared/dynamic-truncator"
|
||||
|
||||
describe("createToolOutputTruncatorHook", () => {
|
||||
let hook: ReturnType<typeof createToolOutputTruncatorHook>
|
||||
let truncateSpy: ReturnType<typeof spyOn>
|
||||
|
||||
beforeEach(() => {
|
||||
truncateSpy = spyOn(dynamicTruncator, "createDynamicTruncator").mockReturnValue({
|
||||
truncate: mock(async (_sessionID: string, output: string, options?: { targetMaxTokens?: number }) => ({
|
||||
result: output,
|
||||
truncated: false,
|
||||
targetMaxTokens: options?.targetMaxTokens,
|
||||
})),
|
||||
getUsage: mock(async () => null),
|
||||
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||
})
|
||||
hook = createToolOutputTruncatorHook({} as never)
|
||||
})
|
||||
|
||||
describe("tool.execute.after", () => {
|
||||
const createInput = (tool: string) => ({
|
||||
tool,
|
||||
sessionID: "test-session",
|
||||
callID: "test-call-id",
|
||||
})
|
||||
|
||||
const createOutput = (outputText: string) => ({
|
||||
title: "Result",
|
||||
output: outputText,
|
||||
metadata: {},
|
||||
})
|
||||
|
||||
describe("#given webfetch tool", () => {
|
||||
describe("#when output is processed", () => {
|
||||
it("#then should use aggressive truncation limit (10k tokens)", async () => {
|
||||
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
||||
result: "truncated",
|
||||
truncated: true,
|
||||
targetMaxTokens: options?.targetMaxTokens,
|
||||
}))
|
||||
truncateSpy.mockReturnValue({
|
||||
truncate: truncateMock,
|
||||
getUsage: mock(async () => null),
|
||||
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||
})
|
||||
hook = createToolOutputTruncatorHook({} as never)
|
||||
|
||||
const input = createInput("webfetch")
|
||||
const output = createOutput("large content")
|
||||
|
||||
await hook["tool.execute.after"](input, output)
|
||||
|
||||
expect(truncateMock).toHaveBeenCalledWith(
|
||||
"test-session",
|
||||
"large content",
|
||||
{ targetMaxTokens: 10_000 }
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe("#when using WebFetch variant", () => {
|
||||
it("#then should also use aggressive truncation limit", async () => {
|
||||
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
||||
result: "truncated",
|
||||
truncated: true,
|
||||
}))
|
||||
truncateSpy.mockReturnValue({
|
||||
truncate: truncateMock,
|
||||
getUsage: mock(async () => null),
|
||||
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||
})
|
||||
hook = createToolOutputTruncatorHook({} as never)
|
||||
|
||||
const input = createInput("WebFetch")
|
||||
const output = createOutput("large content")
|
||||
|
||||
await hook["tool.execute.after"](input, output)
|
||||
|
||||
expect(truncateMock).toHaveBeenCalledWith(
|
||||
"test-session",
|
||||
"large content",
|
||||
{ targetMaxTokens: 10_000 }
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given grep tool", () => {
|
||||
describe("#when output is processed", () => {
|
||||
it("#then should use default truncation limit (50k tokens)", async () => {
|
||||
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
||||
result: "truncated",
|
||||
truncated: true,
|
||||
}))
|
||||
truncateSpy.mockReturnValue({
|
||||
truncate: truncateMock,
|
||||
getUsage: mock(async () => null),
|
||||
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||
})
|
||||
hook = createToolOutputTruncatorHook({} as never)
|
||||
|
||||
const input = createInput("grep")
|
||||
const output = createOutput("grep output")
|
||||
|
||||
await hook["tool.execute.after"](input, output)
|
||||
|
||||
expect(truncateMock).toHaveBeenCalledWith(
|
||||
"test-session",
|
||||
"grep output",
|
||||
{ targetMaxTokens: 50_000 }
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given non-truncatable tool", () => {
|
||||
describe("#when tool is not in TRUNCATABLE_TOOLS list", () => {
|
||||
it("#then should not call truncator", async () => {
|
||||
const truncateMock = mock(async () => ({
|
||||
result: "truncated",
|
||||
truncated: true,
|
||||
}))
|
||||
truncateSpy.mockReturnValue({
|
||||
truncate: truncateMock,
|
||||
getUsage: mock(async () => null),
|
||||
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||
})
|
||||
hook = createToolOutputTruncatorHook({} as never)
|
||||
|
||||
const input = createInput("Read")
|
||||
const output = createOutput("file content")
|
||||
|
||||
await hook["tool.execute.after"](input, output)
|
||||
|
||||
expect(truncateMock).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given truncate_all_tool_outputs enabled", () => {
|
||||
describe("#when any tool output is processed", () => {
|
||||
it("#then should truncate non-listed tools too", async () => {
|
||||
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
||||
result: "truncated",
|
||||
truncated: true,
|
||||
}))
|
||||
truncateSpy.mockReturnValue({
|
||||
truncate: truncateMock,
|
||||
getUsage: mock(async () => null),
|
||||
truncateSync: mock(() => ({ result: "", truncated: false })),
|
||||
})
|
||||
hook = createToolOutputTruncatorHook({} as never, {
|
||||
experimental: { truncate_all_tool_outputs: true },
|
||||
})
|
||||
|
||||
const input = createInput("Read")
|
||||
const output = createOutput("file content")
|
||||
|
||||
await hook["tool.execute.after"](input, output)
|
||||
|
||||
expect(truncateMock).toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -2,6 +2,9 @@ import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import type { ExperimentalConfig } from "../config/schema"
|
||||
import { createDynamicTruncator } from "../shared/dynamic-truncator"
|
||||
|
||||
const DEFAULT_MAX_TOKENS = 50_000 // ~200k chars
|
||||
const WEBFETCH_MAX_TOKENS = 10_000 // ~40k chars - web pages need aggressive truncation
|
||||
|
||||
const TRUNCATABLE_TOOLS = [
|
||||
"grep",
|
||||
"Grep",
|
||||
@@ -21,6 +24,11 @@ const TRUNCATABLE_TOOLS = [
|
||||
"WebFetch",
|
||||
]
|
||||
|
||||
const TOOL_SPECIFIC_MAX_TOKENS: Record<string, number> = {
|
||||
webfetch: WEBFETCH_MAX_TOKENS,
|
||||
WebFetch: WEBFETCH_MAX_TOKENS,
|
||||
}
|
||||
|
||||
interface ToolOutputTruncatorOptions {
|
||||
experimental?: ExperimentalConfig
|
||||
}
|
||||
@@ -36,7 +44,12 @@ export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOu
|
||||
if (!truncateAll && !TRUNCATABLE_TOOLS.includes(input.tool)) return
|
||||
|
||||
try {
|
||||
const { result, truncated } = await truncator.truncate(input.sessionID, output.output)
|
||||
const targetMaxTokens = TOOL_SPECIFIC_MAX_TOKENS[input.tool] ?? DEFAULT_MAX_TOKENS
|
||||
const { result, truncated } = await truncator.truncate(
|
||||
input.sessionID,
|
||||
output.output,
|
||||
{ targetMaxTokens }
|
||||
)
|
||||
if (truncated) {
|
||||
output.output = result
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user