File size: 5,840 Bytes
bd73133 1041734 e7b4937 bd73133 1041734 38cc8e4 bd73133 1041734 38cc8e4 bd73133 1041734 38cc8e4 1041734 4eed151 1041734 5890f66 4eed151 1041734 5890f66 4eed151 1041734 5890f66 4eed151 5890f66 4eed151 1041734 3ee1e2c 4eed151 3ee1e2c 4eed151 1041734 38cc8e4 f1b095a 38cc8e4 61ecfdb 38cc8e4 1041734 38cc8e4 1041734 38cc8e4 1041734 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
"""
Tool implementations package
Author: @mangubee
This package contains all agent tools:
- web_search: Web search using Tavily/Exa
- file_parser: Multi-format file parsing (PDF/Excel/Word/Text)
- calculator: Safe mathematical expression evaluation
- vision: Multimodal image analysis using LLMs
- youtube: YouTube transcript extraction with Whisper fallback
- audio: Audio transcription using Whisper (ZeroGPU)
Stage 2: All tools implemented with retry logic and error handling
Phase 1: YouTube + Audio transcription added
"""
from src.tools.web_search import search, tavily_search, exa_search
from src.tools.file_parser import parse_file, parse_pdf, parse_excel, parse_word, parse_text
from src.tools.calculator import safe_eval
from src.tools.vision import analyze_image, analyze_image_gemini, analyze_image_claude
from src.tools.youtube import youtube_transcript
from src.tools.audio import transcribe_audio, cleanup
# Tool registry with metadata
# Schema matches LLM function calling requirements (parameters as dict, not list)
TOOLS = {
"web_search": {
"function": search,
"description": "Search the web for factual information, current events, Wikipedia articles, statistics, people, companies, and research. Use when question requires external knowledge not in context or files.",
"parameters": {
"query": {
"description": "Search query string",
"type": "string"
},
"max_results": {
"description": "Maximum number of search results to return (default: 5)",
"type": "integer"
}
},
"required_params": ["query"],
"category": "information_retrieval",
},
"parse_file": {
"function": parse_file,
"description": "Extract and parse content from uploaded files (PDF, Excel, Word, Text, CSV). Use when question references 'the file', 'uploaded document', 'attachment', or specific file formats. Reads file structure and text content.",
"parameters": {
"file_path": {
"description": "Absolute or relative path to the file to parse",
"type": "string"
}
},
"required_params": ["file_path"],
"category": "file_processing",
},
"calculator": {
"function": safe_eval,
"description": "Evaluate mathematical expressions and perform calculations (arithmetic, algebra, trigonometry, logarithms). Supports operators (+, -, *, /, **) and functions (sqrt, sin, cos, log, abs, etc). Use for any numerical computation or formula evaluation.",
"parameters": {
"expression": {
"description": "Mathematical expression to evaluate (e.g., '2 + 2', 'sqrt(16)', '25 * 37 + 100')",
"type": "string"
}
},
"required_params": ["expression"],
"category": "computation",
},
"vision": {
"function": analyze_image,
"description": "Analyze image files using multimodal AI vision models. Describe visual content, identify objects, read text from images, answer questions about photos or screenshots. Use when question mentions images, photos, pictures, screenshots, or visual content. Supports JPEG, PNG, GIF, BMP, WebP formats. NOTE: Cannot process videos. For YouTube videos or video files, use youtube_transcript or transcribe_audio tools instead.",
"parameters": {
"image_path": {
"description": "Path to the image file to analyze (JPEG, PNG, GIF, BMP, WebP)",
"type": "string"
},
"question": {
"description": "Question to ask about the image (optional, defaults to 'Describe this image')",
"type": "string"
}
},
"required_params": ["image_path"],
"category": "multimodal",
},
"youtube_transcript": {
"function": youtube_transcript,
"description": "Extract transcript from YouTube video URLs (youtube.com, youtu.be, shorts) OR analyze video frames visually. Use this tool FIRST when question mentions YouTube, video, or contains a YouTube URL. This tool handles video content in two modes: (1) Transcript mode extracts what is said/discussed via captions or Whisper fallback, (2) Frame mode extracts and analyzes video frames with vision models. Mode is controlled by YOUTUBE_MODE env variable. This is the ONLY tool that can process YouTube URLs directly.",
"parameters": {
"url": {
"description": "YouTube video URL (youtube.com/watch?v=ID, youtu.be/ID, or shorts/ID format)",
"type": "string"
}
},
"required_params": ["url"],
"category": "video_processing",
},
"transcribe_audio": {
"function": transcribe_audio,
"description": "Transcribe audio file using Whisper speech-to-text. Supports MP3, WAV, M4A, OGG, FLAC, AAC formats. Use when question references audio files, podcasts, voice recordings, or when YouTube video lacks transcript. Returns transcribed text.",
"parameters": {
"file_path": {
"description": "Path to the audio file to transcribe",
"type": "string"
}
},
"required_params": ["file_path"],
"category": "audio_processing",
},
}
__all__ = [
# Main unified tool functions
"search",
"parse_file",
"safe_eval",
"analyze_image",
"youtube_transcript",
"transcribe_audio",
# Specific implementations (for advanced use)
"tavily_search",
"exa_search",
"parse_pdf",
"parse_excel",
"parse_word",
"parse_text",
"analyze_image_gemini",
"analyze_image_claude",
"cleanup",
# Tool registry
"TOOLS",
]
|