From 2c778d9352349902ede5272172f1c8b2e1838805 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 30 Dec 2025 10:05:54 +0900 Subject: [PATCH] fix: extend look_at MIME type support for Gemini API media formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add HEIC/HEIF image format support - Add video formats (mp4, mpeg, mov, avi, flv, webm, wmv, 3gpp) - Add audio formats (wav, mp3, aiff, aac, ogg, flac) - Add CSV and Python document formats - Remove unsupported formats (gif, svg, bmp, ico, css, ts) - Update tool description to clarify purpose 🤖 Generated with assistance of OhMyOpenCode --- src/tools/look-at/constants.ts | 2 +- src/tools/look-at/tools.ts | 30 ++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/tools/look-at/constants.ts b/src/tools/look-at/constants.ts index fb0a75f..b28df1c 100644 --- a/src/tools/look-at/constants.ts +++ b/src/tools/look-at/constants.ts @@ -1,3 +1,3 @@ export const MULTIMODAL_LOOKER_AGENT = "multimodal-looker" as const -export const LOOK_AT_DESCRIPTION = `Analyze media files (PDFs, images, diagrams) via Gemini 2.5 Flash in separate context. Saves main context tokens.` +export const LOOK_AT_DESCRIPTION = `Analyze media files (PDFs, images, diagrams) that require interpretation beyond raw text. Extracts specific information or summaries from documents, describes visual content. Use when you need analyzed/extracted data rather than literal file contents.` diff --git a/src/tools/look-at/tools.ts b/src/tools/look-at/tools.ts index 711a56c..606e545 100644 --- a/src/tools/look-at/tools.ts +++ b/src/tools/look-at/tools.ts @@ -11,20 +11,34 @@ function inferMimeType(filePath: string): string { ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png", - ".gif": "image/gif", ".webp": "image/webp", - ".svg": "image/svg+xml", - ".bmp": "image/bmp", - ".ico": "image/x-icon", + ".heic": "image/heic", + ".heif": "image/heif", + ".mp4": "video/mp4", + ".mpeg": "video/mpeg", + ".mpg": "video/mpeg", + ".mov": "video/mov", + ".avi": "video/avi", + ".flv": "video/x-flv", + ".webm": "video/webm", + ".wmv": "video/wmv", + ".3gpp": "video/3gpp", + ".3gp": "video/3gpp", + ".wav": "audio/wav", + ".mp3": "audio/mp3", + ".aiff": "audio/aiff", + ".aac": "audio/aac", + ".ogg": "audio/ogg", + ".flac": "audio/flac", ".pdf": "application/pdf", ".txt": "text/plain", - ".md": "text/markdown", + ".csv": "text/csv", + ".md": "text/md", + ".html": "text/html", ".json": "application/json", ".xml": "application/xml", - ".html": "text/html", - ".css": "text/css", ".js": "text/javascript", - ".ts": "text/typescript", + ".py": "text/x-python", } return mimeTypes[ext] || "application/octet-stream" }