fix: clarify tool descriptions to prevent LLM selecting vision for YouTube
Browse filesProblem: LLM was selecting vision tool for YouTube URLs, which cannot
process them directly because vision description mentioned "YouTube links".
Changes:
- Vision: Removed "videos, YouTube links" from description
- Vision: Added NOTE about using youtube_transcript for YouTube
- YouTube: Enhanced description to be more explicit about being FIRST choice
- YouTube: Added "ONLY tool that can process YouTube URLs directly"
This should guide LLM to select youtube_transcript for YouTube URLs.
Co-Authored-By: Claude <noreply@anthropic.com>
- src/tools/__init__.py +3 -3
src/tools/__init__.py
CHANGED
|
@@ -66,7 +66,7 @@ TOOLS = {
|
|
| 66 |
},
|
| 67 |
"vision": {
|
| 68 |
"function": analyze_image,
|
| 69 |
-
"description": "Analyze images
|
| 70 |
"parameters": {
|
| 71 |
"image_path": {
|
| 72 |
"description": "Path to the image file to analyze",
|
|
@@ -82,10 +82,10 @@ TOOLS = {
|
|
| 82 |
},
|
| 83 |
"youtube_transcript": {
|
| 84 |
"function": youtube_transcript,
|
| 85 |
-
"description": "Extract transcript from YouTube video
|
| 86 |
"parameters": {
|
| 87 |
"url": {
|
| 88 |
-
"description": "YouTube video URL (youtube.com, youtu.be, or shorts)",
|
| 89 |
"type": "string"
|
| 90 |
}
|
| 91 |
},
|
|
|
|
| 66 |
},
|
| 67 |
"vision": {
|
| 68 |
"function": analyze_image,
|
| 69 |
+
"description": "Analyze images using multimodal AI vision models. Describe visual content, identify objects, read text from images, answer questions about photos or screenshots. Use when question mentions images, photos, pictures, screenshots, or visual content. NOTE: For YouTube videos, use youtube_transcript tool instead. For video files (MP4, AVI, etc.), the file must be downloaded first.",
|
| 70 |
"parameters": {
|
| 71 |
"image_path": {
|
| 72 |
"description": "Path to the image file to analyze",
|
|
|
|
| 82 |
},
|
| 83 |
"youtube_transcript": {
|
| 84 |
"function": youtube_transcript,
|
| 85 |
+
"description": "Extract transcript from YouTube video URLs (youtube.com, youtu.be, shorts). Use this tool FIRST when question mentions YouTube, video, or contains a YouTube URL. This tool handles video content by extracting the transcript (what is said/discussed in the video). Falls back to Whisper audio transcription if captions are unavailable. This is the ONLY tool that can process YouTube URLs directly.",
|
| 86 |
"parameters": {
|
| 87 |
"url": {
|
| 88 |
+
"description": "YouTube video URL (youtube.com/watch?v=ID, youtu.be/ID, or shorts/ID format)",
|
| 89 |
"type": "string"
|
| 90 |
}
|
| 91 |
},
|