Spaces:
Runtime error
Runtime error
Upload 22 files
Browse files- .gitignore +31 -0
- ARCHITECTURE.md +436 -0
- OtherDS.py +11 -0
- README.md +14 -14
- agent_cache.json +21 -0
- app.py +149 -46
- cache/agent/__init__.py +6 -0
- cache/agent/__pycache__/__init__.cpython-311.pyc +0 -0
- cache/agent/__pycache__/__init__.cpython-312.pyc +0 -0
- cache/agent/__pycache__/basic_agent.cpython-311.pyc +0 -0
- cache/agent/__pycache__/basic_agent.cpython-312.pyc +0 -0
- cache/agent/__pycache__/constants.cpython-311.pyc +0 -0
- cache/agent/__pycache__/graph.cpython-311.pyc +0 -0
- cache/agent/__pycache__/graph.cpython-312.pyc +0 -0
- cache/agent/__pycache__/tools.cpython-311.pyc +0 -0
- cache/agent/basic_agent.py +211 -0
- cache/agent/constants.py +6 -0
- cache/agent/graph.py +356 -0
- cache/agent/tools.py +476 -0
- cache/agent_graph.png +0 -0
- requirements.txt +21 -1
- rows.json +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
|
| 3 |
+
# Exclude arterm folder (reference code)
|
| 4 |
+
arterm/
|
| 5 |
+
|
| 6 |
+
# Exclude downloads directory
|
| 7 |
+
downloads/
|
| 8 |
+
|
| 9 |
+
# Exclude log files
|
| 10 |
+
*.log
|
| 11 |
+
agent_run_*.log
|
| 12 |
+
|
| 13 |
+
# Exclude cache file
|
| 14 |
+
agent_cache.json
|
| 15 |
+
|
| 16 |
+
# Exclude Python cache
|
| 17 |
+
__pycache__/
|
| 18 |
+
*.pyc
|
| 19 |
+
*.pyo
|
| 20 |
+
|
| 21 |
+
# Exclude downloaded files from evaluation
|
| 22 |
+
*.xlsx
|
| 23 |
+
*.mp3
|
| 24 |
+
*.png
|
| 25 |
+
*.jpg
|
| 26 |
+
*.pdf
|
| 27 |
+
downloaded_file.*
|
| 28 |
+
|
| 29 |
+
# Exclude other temporary files
|
| 30 |
+
rows.json
|
| 31 |
+
OtherDS.py
|
ARCHITECTURE.md
ADDED
|
@@ -0,0 +1,436 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Agent Architecture Documentation
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
This is a LangGraph-based AI agent designed for the GAIA (General AI Assistants) benchmark evaluation. The agent uses GPT-4o/GPT-4o-mini with tool-calling capabilities to answer complex multi-step questions involving web search, file analysis, multimedia processing, and reasoning.
|
| 6 |
+
|
| 7 |
+
## System Architecture
|
| 8 |
+
|
| 9 |
+
```
|
| 10 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 11 |
+
│ User Request │
|
| 12 |
+
│ (20 GAIA Questions) │
|
| 13 |
+
└────────────────────────────┬────────────────────────────────────┘
|
| 14 |
+
↓
|
| 15 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 16 |
+
│ app.py │
|
| 17 |
+
│ • Fetches questions from API │
|
| 18 |
+
│ • Downloads attached files (Excel, MP3, images, Python) │
|
| 19 |
+
│ • Saves files to downloads/ directory │
|
| 20 |
+
│ • Calls BasicAgent for each question │
|
| 21 |
+
│ • Submits answers to evaluation API │
|
| 22 |
+
└────────────────────────────┬────────────────────────────────────┘
|
| 23 |
+
↓
|
| 24 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 25 |
+
│ BasicAgent │
|
| 26 |
+
│ (agent/basic_agent.py) │
|
| 27 |
+
│ │
|
| 28 |
+
│ 1. Check Cache (agent_cache.json) │
|
| 29 |
+
│ └─ If cached: Return answer instantly ✅ │
|
| 30 |
+
│ │
|
| 31 |
+
│ 2. If not cached: │
|
| 32 |
+
│ └─ Invoke LangGraph workflow │
|
| 33 |
+
│ │
|
| 34 |
+
│ 3. Clean & validate answer │
|
| 35 |
+
│ └─ Remove JSON, code blocks, explanations │
|
| 36 |
+
│ │
|
| 37 |
+
│ 4. Cache answer to disk │
|
| 38 |
+
│ └─ Save to agent_cache.json for future use │
|
| 39 |
+
└────────────────────────────┬────────────────────────────────────┘
|
| 40 |
+
↓
|
| 41 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 42 |
+
│ LangGraph Workflow │
|
| 43 |
+
│ (agent/graph.py) │
|
| 44 |
+
│ │
|
| 45 |
+
│ ┌──────────────┐ │
|
| 46 |
+
│ │ Agent Node │ ← Decides next action │
|
| 47 |
+
│ │ (GPT-4o) │ • Analyze question │
|
| 48 |
+
│ └──────┬───────┘ • Choose tool(s) │
|
| 49 |
+
│ │ • Generate response │
|
| 50 |
+
│ ↓ │
|
| 51 |
+
│ ┌──────────────┐ │
|
| 52 |
+
│ │ Tools Node │ ← Executes tools │
|
| 53 |
+
│ │ │ • Search, calculate, read files │
|
| 54 |
+
│ └──────┬───────┘ • Returns results │
|
| 55 |
+
│ │ │
|
| 56 |
+
│ ↓ │
|
| 57 |
+
│ ┌──────────────┐ │
|
| 58 |
+
│ │ Agent Node │ ← Processes results │
|
| 59 |
+
│ │ (GPT-4o) │ • Analyzes tool output │
|
| 60 |
+
│ └──────┬───────┘ • Decides: more tools or final answer? │
|
| 61 |
+
│ │ │
|
| 62 |
+
│ └─────────→ Loop (max 50 iterations) │
|
| 63 |
+
│ │
|
| 64 |
+
│ Final Answer → Return to BasicAgent │
|
| 65 |
+
└─────────────────────────────────────────────────────────────────┘
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
## Core Components
|
| 69 |
+
|
| 70 |
+
### 1. **app.py** - Main Application
|
| 71 |
+
**Responsibilities:**
|
| 72 |
+
- Fetch questions from evaluation API
|
| 73 |
+
- Download attached files from `/files/{task_id}` endpoint
|
| 74 |
+
- Orchestrate agent execution for all questions
|
| 75 |
+
- Submit answers to evaluation API
|
| 76 |
+
- Display results
|
| 77 |
+
|
| 78 |
+
**Key Functions:**
|
| 79 |
+
- `run_and_submit_all()` - Main evaluation loop
|
| 80 |
+
- File download with error handling
|
| 81 |
+
- Results aggregation and submission
|
| 82 |
+
|
| 83 |
+
### 2. **agent/basic_agent.py** - Agent Wrapper
|
| 84 |
+
**Responsibilities:**
|
| 85 |
+
- Manage agent lifecycle
|
| 86 |
+
- Implement caching system (persistent to disk)
|
| 87 |
+
- Clean and validate answers
|
| 88 |
+
- Logging to file
|
| 89 |
+
|
| 90 |
+
**Key Features:**
|
| 91 |
+
- **Persistent Caching:** Saves answers to `agent_cache.json`
|
| 92 |
+
- **Answer Cleaning:** Removes JSON, code blocks, explanations
|
| 93 |
+
- **Validation:** Ensures no empty answers submitted
|
| 94 |
+
- **Logging:** All output saved to timestamped log files
|
| 95 |
+
|
| 96 |
+
**Cache System:**
|
| 97 |
+
```python
|
| 98 |
+
{
|
| 99 |
+
"question_text": "answer",
|
| 100 |
+
"How many albums...": "4",
|
| 101 |
+
"What is 2+2?": "4"
|
| 102 |
+
}
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
### 3. **agent/graph.py** - LangGraph Workflow
|
| 106 |
+
**Responsibilities:**
|
| 107 |
+
- Define agent workflow (nodes and edges)
|
| 108 |
+
- Initialize LLM chains (primary + fallback)
|
| 109 |
+
- Initialize and manage tools
|
| 110 |
+
- Route between agent and tools nodes
|
| 111 |
+
|
| 112 |
+
**Workflow Structure:**
|
| 113 |
+
```
|
| 114 |
+
START → Agent Node → [Tools Node] → Agent Node → END
|
| 115 |
+
↑_______________|
|
| 116 |
+
(loop until answer found or max iterations)
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
**Key Components:**
|
| 120 |
+
- `agent_node()` - LLM decision making
|
| 121 |
+
- `tool_node()` - Tool execution
|
| 122 |
+
- `should_continue()` - Routing logic
|
| 123 |
+
- System prompt with detailed instructions
|
| 124 |
+
|
| 125 |
+
**LLM Configuration:**
|
| 126 |
+
- **Primary:** GPT-4o (with tools)
|
| 127 |
+
- **Fallback:** GPT-4o-mini (with tools)
|
| 128 |
+
- **Recursion Limit:** 50 iterations
|
| 129 |
+
- **Rate Limiting:** Exponential backoff (5 retries)
|
| 130 |
+
|
| 131 |
+
### 4. **agent/tools.py** - Tool Implementations
|
| 132 |
+
**Responsibilities:**
|
| 133 |
+
- Implement all tools available to the agent
|
| 134 |
+
- Handle file path resolution (current dir + downloads/)
|
| 135 |
+
- Integrate with external APIs (Gemini, search engines)
|
| 136 |
+
|
| 137 |
+
**Available Tools:**
|
| 138 |
+
|
| 139 |
+
#### Search & Research (5 tools)
|
| 140 |
+
- `duckduckgo_search` - Web search
|
| 141 |
+
- `tavily_search` - Advanced web search
|
| 142 |
+
- `wikipedia` - Wikipedia lookup
|
| 143 |
+
- `youtube_transcript` - Get YouTube transcripts
|
| 144 |
+
- `arxiv_search` - Academic paper search
|
| 145 |
+
|
| 146 |
+
#### File Operations (5 tools)
|
| 147 |
+
- `list_files` - List files in current/downloads directory
|
| 148 |
+
- `read_file` - Read text files
|
| 149 |
+
- `read_excel` - Read and analyze Excel files
|
| 150 |
+
- `download_file` - Download files from URLs
|
| 151 |
+
- `execute_python_file` - Run Python scripts
|
| 152 |
+
|
| 153 |
+
#### Multimedia Analysis (3 tools - Gemini-powered)
|
| 154 |
+
- `understand_video` - Analyze YouTube videos
|
| 155 |
+
- `understand_audio` - Transcribe and analyze MP3/audio
|
| 156 |
+
- `analyze_image` - Analyze images (chess, diagrams, text)
|
| 157 |
+
|
| 158 |
+
#### Computation (2 tools)
|
| 159 |
+
- `calculator` - Safe math evaluation
|
| 160 |
+
- `python_repl` - Execute Python code
|
| 161 |
+
|
| 162 |
+
**File Path Resolution:**
|
| 163 |
+
All file tools use `find_file()` helper that checks:
|
| 164 |
+
1. Current directory
|
| 165 |
+
2. `downloads/` directory
|
| 166 |
+
3. Returns best match or downloads path
|
| 167 |
+
|
| 168 |
+
## Data Flow
|
| 169 |
+
|
| 170 |
+
### Question Processing Flow
|
| 171 |
+
|
| 172 |
+
```
|
| 173 |
+
1. API Request
|
| 174 |
+
└─ GET /questions
|
| 175 |
+
└─ Returns: [{task_id, question, Level, file_name}, ...]
|
| 176 |
+
|
| 177 |
+
2. File Download (if file_name exists)
|
| 178 |
+
└─ GET /files/{task_id}
|
| 179 |
+
└─ Save to: downloads/{file_name}
|
| 180 |
+
|
| 181 |
+
3. Agent Invocation
|
| 182 |
+
├─ Check cache
|
| 183 |
+
│ └─ If hit: Return cached answer (0 LLM calls)
|
| 184 |
+
│
|
| 185 |
+
└─ If miss:
|
| 186 |
+
├─ Create initial state with question
|
| 187 |
+
├─ Invoke LangGraph workflow
|
| 188 |
+
│ ├─ Agent decides action
|
| 189 |
+
│ ├─ Execute tools
|
| 190 |
+
│ ├─ Agent processes results
|
| 191 |
+
│ └─ Loop until answer or max iterations
|
| 192 |
+
│
|
| 193 |
+
├─ Extract answer from final message
|
| 194 |
+
├─ Clean answer (remove JSON, explanations)
|
| 195 |
+
├─ Validate answer (ensure not empty)
|
| 196 |
+
└─ Cache to disk
|
| 197 |
+
|
| 198 |
+
4. Answer Submission
|
| 199 |
+
└─ POST /submit
|
| 200 |
+
└─ Body: {username, answers: [{task_id, submitted_answer}]}
|
| 201 |
+
```
|
| 202 |
+
|
| 203 |
+
## Tool Execution Flow
|
| 204 |
+
|
| 205 |
+
```
|
| 206 |
+
Agent Node (GPT-4o)
|
| 207 |
+
↓
|
| 208 |
+
Decides: "I need to use list_files tool"
|
| 209 |
+
↓
|
| 210 |
+
Tool Node
|
| 211 |
+
├─ Finds tool by name
|
| 212 |
+
├─ Validates parameters
|
| 213 |
+
├─ Executes tool._run()
|
| 214 |
+
│ └─ Example: list_files()
|
| 215 |
+
│ ├─ Check current directory
|
| 216 |
+
│ ├─ Check downloads/ directory
|
| 217 |
+
│ └─ Return: "Files found:\n./app.py\ndownloads/data.xlsx"
|
| 218 |
+
└─ Returns ToolMessage with result
|
| 219 |
+
↓
|
| 220 |
+
Agent Node (GPT-4o)
|
| 221 |
+
├─ Receives tool output
|
| 222 |
+
├─ Analyzes results
|
| 223 |
+
└─ Decides: Use another tool OR provide final answer
|
| 224 |
+
```
|
| 225 |
+
|
| 226 |
+
## Key Design Decisions
|
| 227 |
+
|
| 228 |
+
### 1. **Persistent Caching**
|
| 229 |
+
**Why:** Reduce costs and enable fast re-runs
|
| 230 |
+
**How:** JSON file on disk, loaded at startup, saved after each answer
|
| 231 |
+
**Benefit:** 100% cost savings on repeated questions
|
| 232 |
+
|
| 233 |
+
### 2. **File Path Resolution**
|
| 234 |
+
**Why:** Files can be in current directory or downloads/
|
| 235 |
+
**How:** `find_file()` helper checks both locations
|
| 236 |
+
**Benefit:** Agent doesn't need to know exact file location
|
| 237 |
+
|
| 238 |
+
### 3. **Gemini for Multimedia**
|
| 239 |
+
**Why:** GPT-4o doesn't support direct video/audio analysis
|
| 240 |
+
**How:** Upload files to Gemini API, get analysis
|
| 241 |
+
**Benefit:** Can handle YouTube videos, MP3 files, images
|
| 242 |
+
|
| 243 |
+
### 4. **Answer Cleaning Pipeline**
|
| 244 |
+
**Why:** LLMs often return verbose explanations or JSON
|
| 245 |
+
**How:** Multi-stage cleaning (JSON removal, pattern matching, validation)
|
| 246 |
+
**Benefit:** Clean, concise answers that match expected format
|
| 247 |
+
|
| 248 |
+
### 5. **Dual LLM Strategy**
|
| 249 |
+
**Why:** Reliability and cost optimization
|
| 250 |
+
**How:** Primary (GPT-4o) with fallback (GPT-4o-mini)
|
| 251 |
+
**Benefit:** Continues working if primary fails
|
| 252 |
+
|
| 253 |
+
### 6. **Tool-First Architecture**
|
| 254 |
+
**Why:** Many questions require external data
|
| 255 |
+
**How:** Rich tool suite with 15+ specialized tools
|
| 256 |
+
**Benefit:** Can handle diverse question types
|
| 257 |
+
|
| 258 |
+
## Configuration
|
| 259 |
+
|
| 260 |
+
### Environment Variables (.env)
|
| 261 |
+
```bash
|
| 262 |
+
OPENAI_API_KEY=sk-... # Required for GPT-4o
|
| 263 |
+
GEMINI_API_KEY=... # Required for video/audio/image analysis
|
| 264 |
+
TAVILY_API_KEY=... # Optional for advanced search
|
| 265 |
+
HF_TOKEN=... # For HuggingFace API access
|
| 266 |
+
```
|
| 267 |
+
|
| 268 |
+
### Configurable Parameters
|
| 269 |
+
|
| 270 |
+
**In BasicAgent:**
|
| 271 |
+
- `log_to_file` - Enable/disable logging (default: True)
|
| 272 |
+
- `use_cache` - Enable/disable caching (default: True)
|
| 273 |
+
- `cache_file` - Cache file path (default: "agent_cache.json")
|
| 274 |
+
|
| 275 |
+
**In LangGraph:**
|
| 276 |
+
- `recursion_limit` - Max iterations (default: 50)
|
| 277 |
+
- `temperature` - LLM temperature (default: 0.0)
|
| 278 |
+
- `max_retries` - Rate limit retries (default: 5)
|
| 279 |
+
|
| 280 |
+
## File Structure
|
| 281 |
+
|
| 282 |
+
```
|
| 283 |
+
Final_Assignment_Template/
|
| 284 |
+
├── app.py # Main application
|
| 285 |
+
├── agent/
|
| 286 |
+
│ ├── __init__.py
|
| 287 |
+
│ ├── basic_agent.py # Agent wrapper with caching
|
| 288 |
+
│ ├── graph.py # LangGraph workflow
|
| 289 |
+
│ └── tools.py # Tool implementations
|
| 290 |
+
├── downloads/ # Downloaded files (gitignored)
|
| 291 |
+
│ ├── file1.xlsx
|
| 292 |
+
│ ├── audio.mp3
|
| 293 |
+
│ └── image.png
|
| 294 |
+
├── agent_cache.json # Persistent cache (gitignored)
|
| 295 |
+
├── agent_run_*.log # Log files (gitignored)
|
| 296 |
+
├── requirements.txt # Python dependencies
|
| 297 |
+
├── .env # Environment variables (gitignored)
|
| 298 |
+
├── .gitignore
|
| 299 |
+
├── ARCHITECTURE.md # This file
|
| 300 |
+
└── README.md # User documentation
|
| 301 |
+
```
|
| 302 |
+
|
| 303 |
+
## Performance Characteristics
|
| 304 |
+
|
| 305 |
+
### Typical Question Processing Time
|
| 306 |
+
- **Simple (cached):** < 0.1 seconds
|
| 307 |
+
- **Simple (web search):** 2-5 seconds
|
| 308 |
+
- **Medium (file analysis):** 5-15 seconds
|
| 309 |
+
- **Complex (multi-step):** 15-60 seconds
|
| 310 |
+
- **Multimedia (video/audio):** 30-120 seconds
|
| 311 |
+
|
| 312 |
+
### LLM Token Usage (per question)
|
| 313 |
+
- **Simple:** 500-2,000 tokens
|
| 314 |
+
- **Medium:** 2,000-8,000 tokens
|
| 315 |
+
- **Complex:** 8,000-20,000 tokens
|
| 316 |
+
|
| 317 |
+
### Cost Estimates (GPT-4o)
|
| 318 |
+
- **Per question (avg):** $0.01-0.05
|
| 319 |
+
- **20 questions:** $0.20-1.00
|
| 320 |
+
- **With caching (re-runs):** $0.00
|
| 321 |
+
|
| 322 |
+
## Error Handling
|
| 323 |
+
|
| 324 |
+
### Graceful Degradation
|
| 325 |
+
1. **Cache file corrupted:** Start with empty cache
|
| 326 |
+
2. **File download fails:** Continue without file, agent handles gracefully
|
| 327 |
+
3. **Tool execution fails:** Return error message, agent tries alternative
|
| 328 |
+
4. **LLM rate limit:** Exponential backoff, retry up to 5 times
|
| 329 |
+
5. **Primary LLM fails:** Fallback to GPT-4o-mini
|
| 330 |
+
6. **Recursion limit hit:** Return best answer so far
|
| 331 |
+
|
| 332 |
+
### Validation
|
| 333 |
+
- All answers validated (never empty)
|
| 334 |
+
- File paths validated before access
|
| 335 |
+
- API responses validated before processing
|
| 336 |
+
- Tool parameters validated before execution
|
| 337 |
+
|
| 338 |
+
## Testing & Development
|
| 339 |
+
|
| 340 |
+
### Local Testing
|
| 341 |
+
```bash
|
| 342 |
+
# Run full evaluation
|
| 343 |
+
python app.py
|
| 344 |
+
|
| 345 |
+
# Check logs
|
| 346 |
+
tail -f agent_run_*.log
|
| 347 |
+
|
| 348 |
+
# View cache
|
| 349 |
+
cat agent_cache.json
|
| 350 |
+
|
| 351 |
+
# Clear cache for fresh run
|
| 352 |
+
rm agent_cache.json
|
| 353 |
+
```
|
| 354 |
+
|
| 355 |
+
### Debugging
|
| 356 |
+
- All tool calls logged with arguments and results
|
| 357 |
+
- Agent reasoning logged at each step
|
| 358 |
+
- Errors logged with full stack traces
|
| 359 |
+
- Cache hits/misses logged
|
| 360 |
+
|
| 361 |
+
## Future Enhancements
|
| 362 |
+
|
| 363 |
+
### Potential Improvements
|
| 364 |
+
1. **Pattern-based answering** - Skip LLM for simple questions
|
| 365 |
+
2. **Parallel tool execution** - Run independent tools simultaneously
|
| 366 |
+
3. **Smarter caching** - Fuzzy matching for similar questions
|
| 367 |
+
4. **Cost tracking** - Log token usage and costs
|
| 368 |
+
5. **A/B testing** - Compare different prompts/strategies
|
| 369 |
+
6. **Streaming responses** - Show progress in real-time
|
| 370 |
+
|
| 371 |
+
### Scalability Considerations
|
| 372 |
+
- Cache can grow large (consider size limits or TTL)
|
| 373 |
+
- Multiple concurrent runs need separate cache files
|
| 374 |
+
- Rate limiting may need adjustment for production
|
| 375 |
+
- Consider database instead of JSON for large-scale caching
|
| 376 |
+
|
| 377 |
+
## Dependencies
|
| 378 |
+
|
| 379 |
+
### Core
|
| 380 |
+
- `langchain` - LLM framework
|
| 381 |
+
- `langgraph` - Workflow orchestration
|
| 382 |
+
- `langchain-openai` - OpenAI integration
|
| 383 |
+
- `langchain-community` - Community tools
|
| 384 |
+
|
| 385 |
+
### Tools
|
| 386 |
+
- `google-generativeai` - Gemini API
|
| 387 |
+
- `tavily-python` - Advanced search
|
| 388 |
+
- `duckduckgo-search` - Web search
|
| 389 |
+
- `youtube-transcript-api` - YouTube transcripts
|
| 390 |
+
- `pandas` - Data analysis
|
| 391 |
+
- `openpyxl` - Excel files
|
| 392 |
+
|
| 393 |
+
### Utilities
|
| 394 |
+
- `requests` - HTTP requests
|
| 395 |
+
- `python-dotenv` - Environment variables
|
| 396 |
+
- `gradio` - Web UI (optional)
|
| 397 |
+
|
| 398 |
+
## Security Considerations
|
| 399 |
+
|
| 400 |
+
### API Keys
|
| 401 |
+
- Stored in `.env` file (gitignored)
|
| 402 |
+
- Never hardcoded in source
|
| 403 |
+
- Loaded via `python-dotenv`
|
| 404 |
+
|
| 405 |
+
### Code Execution
|
| 406 |
+
- `python_repl` uses AST-based REPL (safer than eval)
|
| 407 |
+
- `execute_python_file` runs in subprocess with timeout
|
| 408 |
+
- No shell injection vulnerabilities
|
| 409 |
+
|
| 410 |
+
### File Access
|
| 411 |
+
- All file operations use Path validation
|
| 412 |
+
- No arbitrary file system access
|
| 413 |
+
- Downloads isolated to `downloads/` directory
|
| 414 |
+
|
| 415 |
+
## Monitoring & Observability
|
| 416 |
+
|
| 417 |
+
### Logs
|
| 418 |
+
- Timestamped log files for each run
|
| 419 |
+
- Structured logging with emojis for easy parsing
|
| 420 |
+
- Tool calls logged with full context
|
| 421 |
+
- Errors logged with stack traces
|
| 422 |
+
|
| 423 |
+
### Metrics (available in logs)
|
| 424 |
+
- Questions processed
|
| 425 |
+
- Cache hit rate
|
| 426 |
+
- Tool usage frequency
|
| 427 |
+
- LLM calls per question
|
| 428 |
+
- Execution time per question
|
| 429 |
+
- Error rate
|
| 430 |
+
|
| 431 |
+
---
|
| 432 |
+
|
| 433 |
+
**Version:** 1.0
|
| 434 |
+
**Last Updated:** 2025-09-30
|
| 435 |
+
**Author:** Leon Woo
|
| 436 |
+
**License:** MIT
|
OtherDS.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datasets import load_dataset
|
| 2 |
+
|
| 3 |
+
ds = load_dataset("arterm-sedov/agent-course-final-assignment", "init") # default config
|
| 4 |
+
rows = ds["train"][:200] # grab 200 rows (dict of lists)
|
| 5 |
+
|
| 6 |
+
# Convert to a list of dicts (row-wise) for JSON export:
|
| 7 |
+
rowwise = [dict(zip(rows.keys(), values)) for values in zip(*rows.values())]
|
| 8 |
+
|
| 9 |
+
import json
|
| 10 |
+
with open("rows.json", "w", encoding="utf-8") as f:
|
| 11 |
+
json.dump(rowwise, f, ensure_ascii=False, indent=2)
|
README.md
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Template Final Assignment
|
| 3 |
-
emoji: 🕵🏻♂️
|
| 4 |
-
colorFrom: indigo
|
| 5 |
-
colorTo: indigo
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 5.25.2
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
hf_oauth: true
|
| 11 |
-
# optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
|
| 12 |
-
hf_oauth_expiration_minutes: 480
|
| 13 |
-
---
|
| 14 |
-
|
| 15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Template Final Assignment
|
| 3 |
+
emoji: 🕵🏻♂️
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.25.2
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
hf_oauth: true
|
| 11 |
+
# optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
|
| 12 |
+
hf_oauth_expiration_minutes: 480
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
agent_cache.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.": "4",
|
| 3 |
+
"In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?": "4",
|
| 4 |
+
".rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI": "right",
|
| 5 |
+
"Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.": "Qb1#",
|
| 6 |
+
"Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?": "FunkMonk",
|
| 7 |
+
"Given this table defining * on the set S = {a, b, c, d, e}\n\n|*|a|b|c|d|e|\n|---|---|---|---|---|---|\n|a|a|b|c|b|d|\n|b|b|c|a|e|c|\n|c|c|a|b|b|a|\n|d|b|e|b|e|d|\n|e|d|b|a|d|c|\n\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.": "a,b,c,e",
|
| 8 |
+
"Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\n\nWhat does Teal'c say in response to the question \"Isn't that hot?\"": "Indeed",
|
| 9 |
+
"What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?": "Louvrier",
|
| 10 |
+
"I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:\n\nmilk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts\n\nI need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.": "broccoli,celery,green beans,lettuce,sweet potatoes,zucchini",
|
| 11 |
+
"Hi, I'm making a pie but I could use some help with my shopping list. I have everything I need for the crust, but I'm not sure about the filling. I got the recipe from my friend Aditi, but she left it as a voice memo and the speaker on my phone is buzzing so I can't quite make out what she's saying. Could you please listen to the recipe and list all of the ingredients that my friend described? I only want the ingredients for the filling, as I have everything I need to make my favorite pie crust. I've attached the recipe as Strawberry pie.mp3.\n\nIn your response, please only list the ingredients, not any measurements. So if the recipe calls for \"a pinch of salt\" or \"two cups of ripe strawberries\" the ingredients on the list would be \"salt\" and \"ripe strawberries\".\n\nPlease format your response as a comma separated list of ingredients. Also, please alphabetize the ingredients.": "cornstarch,granulated sugar,lemon juice,ripe strawberries,vanilla extract",
|
| 12 |
+
"Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.": "Wojciech",
|
| 13 |
+
"What is the final numeric output from the attached Python code?": "0",
|
| 14 |
+
"How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?": "519",
|
| 15 |
+
"Hi, I was out sick from my classes on Friday, so I'm trying to figure out what I need to study for my Calculus mid-term next week. My friend from class sent me an audio recording of Professor Willowbrook giving out the recommended reading for the test, but my headphones are broken :(\n\nCould you please listen to the recording for me and tell me the page numbers I'm supposed to go over? I've attached a file called Homework.mp3 that has the recording. Please provide just the page numbers as a comma-delimited list. And please provide the list in ascending order.": "132133134197245",
|
| 16 |
+
"On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?": "80GSFC21M0002",
|
| 17 |
+
"Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.": "Saint Petersburg",
|
| 18 |
+
"What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.": "CUB",
|
| 19 |
+
"Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.": "Yamasaki,Uehara",
|
| 20 |
+
"The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.": "89706.00"
|
| 21 |
+
}
|
app.py
CHANGED
|
@@ -1,38 +1,63 @@
|
|
| 1 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
-
import requests
|
| 4 |
-
import inspect
|
| 5 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
#
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
-
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 13 |
-
class BasicAgent:
|
| 14 |
-
def __init__(self):
|
| 15 |
-
print("BasicAgent initialized.")
|
| 16 |
-
def __call__(self, question: str) -> str:
|
| 17 |
-
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
| 18 |
-
fixed_answer = "This is a default answer."
|
| 19 |
-
print(f"Agent returning fixed answer: {fixed_answer}")
|
| 20 |
-
return fixed_answer
|
| 21 |
-
|
| 22 |
-
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 23 |
"""
|
| 24 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 25 |
and displays the results.
|
| 26 |
"""
|
| 27 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
| 28 |
-
space_id = os.getenv("SPACE_ID")
|
| 29 |
|
| 30 |
-
if profile:
|
| 31 |
-
username= f"{profile.username}"
|
| 32 |
-
print(f"User logged in: {username}")
|
|
|
|
|
|
|
|
|
|
| 33 |
else:
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
api_url = DEFAULT_API_URL
|
| 38 |
questions_url = f"{api_url}/questions"
|
|
@@ -55,16 +80,23 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 55 |
response.raise_for_status()
|
| 56 |
questions_data = response.json()
|
| 57 |
if not questions_data:
|
| 58 |
-
|
| 59 |
-
|
| 60 |
print(f"Fetched {len(questions_data)} questions.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
except requests.exceptions.RequestException as e:
|
| 62 |
print(f"Error fetching questions: {e}")
|
| 63 |
return f"Error fetching questions: {e}", None
|
| 64 |
except requests.exceptions.JSONDecodeError as e:
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
except Exception as e:
|
| 69 |
print(f"An unexpected error occurred fetching questions: {e}")
|
| 70 |
return f"An unexpected error occurred fetching questions: {e}", None
|
|
@@ -79,19 +111,51 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 79 |
if not task_id or question_text is None:
|
| 80 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 81 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
try:
|
| 83 |
submitted_answer = agent(question_text)
|
| 84 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 85 |
-
results_log.append({
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
except Exception as e:
|
| 87 |
-
|
| 88 |
-
|
| 89 |
|
| 90 |
if not answers_payload:
|
| 91 |
print("Agent did not produce any answers to submit.")
|
| 92 |
-
|
|
|
|
| 93 |
|
| 94 |
-
# 4. Prepare Submission
|
| 95 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 96 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 97 |
print(status_update)
|
|
@@ -110,7 +174,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 110 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 111 |
)
|
| 112 |
print("Submission successful.")
|
| 113 |
-
results_df = pd.DataFrame(results_log)
|
| 114 |
return final_status, results_df
|
| 115 |
except requests.exceptions.HTTPError as e:
|
| 116 |
error_detail = f"Server responded with status {e.response.status_code}."
|
|
@@ -121,26 +185,32 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 121 |
error_detail += f" Response: {e.response.text[:500]}"
|
| 122 |
status_message = f"Submission Failed: {error_detail}"
|
| 123 |
print(status_message)
|
| 124 |
-
results_df = pd.DataFrame(results_log)
|
| 125 |
return status_message, results_df
|
| 126 |
except requests.exceptions.Timeout:
|
| 127 |
status_message = "Submission Failed: The request timed out."
|
| 128 |
print(status_message)
|
| 129 |
-
results_df = pd.DataFrame(results_log)
|
| 130 |
return status_message, results_df
|
| 131 |
except requests.exceptions.RequestException as e:
|
| 132 |
status_message = f"Submission Failed: Network error - {e}"
|
| 133 |
print(status_message)
|
| 134 |
-
results_df = pd.DataFrame(results_log)
|
| 135 |
return status_message, results_df
|
| 136 |
except Exception as e:
|
| 137 |
status_message = f"An unexpected error occurred during submission: {e}"
|
| 138 |
print(status_message)
|
| 139 |
-
results_df = pd.DataFrame(results_log)
|
| 140 |
return status_message, results_df
|
| 141 |
|
| 142 |
|
| 143 |
# --- Build Gradio Interface using Blocks ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
with gr.Blocks() as demo:
|
| 145 |
gr.Markdown("# Basic Agent Evaluation Runner")
|
| 146 |
gr.Markdown(
|
|
@@ -158,24 +228,57 @@ with gr.Blocks() as demo:
|
|
| 158 |
"""
|
| 159 |
)
|
| 160 |
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 164 |
|
| 165 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
outputs=[status_output, results_table]
|
| 172 |
)
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
if __name__ == "__main__":
|
| 175 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 176 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
| 177 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 178 |
-
space_id_startup = os.getenv("SPACE_ID")
|
| 179 |
|
| 180 |
if space_host_startup:
|
| 181 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
@@ -183,7 +286,7 @@ if __name__ == "__main__":
|
|
| 183 |
else:
|
| 184 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 185 |
|
| 186 |
-
if space_id_startup:
|
| 187 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 188 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 189 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
|
|
| 1 |
import os
|
| 2 |
+
|
| 3 |
+
# Disable Hugging Face login for local execution
|
| 4 |
+
os.environ["DISABLE_HF_LOGIN"] = "1"
|
| 5 |
+
|
| 6 |
import gradio as gr
|
|
|
|
|
|
|
| 7 |
import pandas as pd
|
| 8 |
+
import requests
|
| 9 |
+
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
|
| 12 |
+
from agent import BasicAgent, agent_graph_mermaid
|
| 13 |
+
from agent.graph import agent_graph_png_base64
|
| 14 |
+
|
| 15 |
+
load_dotenv()
|
| 16 |
|
| 17 |
+
# Generate the agent graph visualizations
|
| 18 |
+
try:
|
| 19 |
+
GRAPH_MERMAID = agent_graph_mermaid()
|
| 20 |
+
except Exception as exc:
|
| 21 |
+
GRAPH_MERMAID = f"Error generating graph diagram: {exc}"
|
| 22 |
+
|
| 23 |
+
GRAPH_PNG_BASE64 = agent_graph_png_base64()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _env_flag(name: str, default: bool = False) -> bool:
|
| 27 |
+
value = os.getenv(name)
|
| 28 |
+
if value is None:
|
| 29 |
+
return default
|
| 30 |
+
return value.strip().lower() in {"1", "true", "yes", "on"}
|
| 31 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 32 |
+
_SPACE_ENV_CONFIGURED = bool(os.getenv("SPACE_ID") or os.getenv("SPACE_HOST"))
|
| 33 |
+
FORCE_LOCAL_MODE = _env_flag("FORCE_LOCAL_MODE") or _env_flag("DISABLE_HF_LOGIN") or _env_flag("GRADIO_FORCE_LOCAL")
|
| 34 |
+
RUNNING_IN_SPACE = _SPACE_ENV_CONFIGURED and not FORCE_LOCAL_MODE
|
| 35 |
|
| 36 |
+
def run_and_submit_all(profile: gr.OAuthProfile | None = None, username: str | None = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 39 |
and displays the results.
|
| 40 |
"""
|
| 41 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
| 42 |
+
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
| 43 |
|
| 44 |
+
if profile and getattr(profile, "username", None):
|
| 45 |
+
username = f"{profile.username}".strip()
|
| 46 |
+
print(f"User logged in via OAuth: {username}")
|
| 47 |
+
elif username:
|
| 48 |
+
username = username.strip()
|
| 49 |
+
print(f"Using provided username: {username}")
|
| 50 |
else:
|
| 51 |
+
env_username = (os.getenv("HF_USERNAME") or os.getenv("LOCAL_HF_USERNAME") or "").strip()
|
| 52 |
+
if env_username:
|
| 53 |
+
username = env_username
|
| 54 |
+
print(f"Using username from environment: {username}")
|
| 55 |
+
else:
|
| 56 |
+
print("User not logged in and no username supplied.")
|
| 57 |
+
return (
|
| 58 |
+
"Please login to Hugging Face, provide a username locally, or set the `HF_USERNAME`/`LOCAL_HF_USERNAME` environment variable.",
|
| 59 |
+
None,
|
| 60 |
+
)
|
| 61 |
|
| 62 |
api_url = DEFAULT_API_URL
|
| 63 |
questions_url = f"{api_url}/questions"
|
|
|
|
| 80 |
response.raise_for_status()
|
| 81 |
questions_data = response.json()
|
| 82 |
if not questions_data:
|
| 83 |
+
print("Fetched questions list is empty.")
|
| 84 |
+
return "Fetched questions list is empty or invalid format.", None
|
| 85 |
print(f"Fetched {len(questions_data)} questions.")
|
| 86 |
+
|
| 87 |
+
# Debug: Print first question structure to understand format
|
| 88 |
+
if questions_data and len(questions_data) > 0:
|
| 89 |
+
print(f"\n🔍 DEBUG - First question structure:")
|
| 90 |
+
print(f"Keys: {list(questions_data[0].keys())}")
|
| 91 |
+
if len(questions_data[0].keys()) > 2:
|
| 92 |
+
print(f"Sample: {str(questions_data[0])[:300]}...\n")
|
| 93 |
except requests.exceptions.RequestException as e:
|
| 94 |
print(f"Error fetching questions: {e}")
|
| 95 |
return f"Error fetching questions: {e}", None
|
| 96 |
except requests.exceptions.JSONDecodeError as e:
|
| 97 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
| 98 |
+
print(f"Response text: {response.text[:500]}")
|
| 99 |
+
return f"Error decoding server response for questions: {e}", None
|
| 100 |
except Exception as e:
|
| 101 |
print(f"An unexpected error occurred fetching questions: {e}")
|
| 102 |
return f"An unexpected error occurred fetching questions: {e}", None
|
|
|
|
| 111 |
if not task_id or question_text is None:
|
| 112 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 113 |
continue
|
| 114 |
+
|
| 115 |
+
# Check for attached files and download them
|
| 116 |
+
file_name = item.get("file_name", "")
|
| 117 |
+
if file_name and file_name.strip():
|
| 118 |
+
try:
|
| 119 |
+
from pathlib import Path
|
| 120 |
+
|
| 121 |
+
# Create downloads directory if it doesn't exist
|
| 122 |
+
download_dir = Path("downloads")
|
| 123 |
+
download_dir.mkdir(exist_ok=True)
|
| 124 |
+
|
| 125 |
+
# Construct file download URL
|
| 126 |
+
file_url = f"{api_url}/files/{task_id}" # Note: /files/ (plural)
|
| 127 |
+
print(f"📥 Downloading file: {file_name} from {file_url}")
|
| 128 |
+
|
| 129 |
+
# Download the file
|
| 130 |
+
file_response = requests.get(file_url, timeout=30)
|
| 131 |
+
file_response.raise_for_status()
|
| 132 |
+
|
| 133 |
+
# Save file to downloads directory
|
| 134 |
+
filepath = download_dir / file_name
|
| 135 |
+
with open(filepath, 'wb') as f:
|
| 136 |
+
f.write(file_response.content)
|
| 137 |
+
print(f"✅ Saved file: {filepath} ({len(file_response.content)} bytes)")
|
| 138 |
+
except Exception as e:
|
| 139 |
+
print(f"⚠️ Error downloading file {file_name}: {e}")
|
| 140 |
+
|
| 141 |
try:
|
| 142 |
submitted_answer = agent(question_text)
|
| 143 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 144 |
+
results_log.append({
|
| 145 |
+
"Task ID": task_id,
|
| 146 |
+
"Question": question_text,
|
| 147 |
+
"Submitted Answer": submitted_answer,
|
| 148 |
+
})
|
| 149 |
except Exception as e:
|
| 150 |
+
print(f"Error running agent on task {task_id}: {e}")
|
| 151 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
| 152 |
|
| 153 |
if not answers_payload:
|
| 154 |
print("Agent did not produce any answers to submit.")
|
| 155 |
+
results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"]).fillna("")
|
| 156 |
+
return "Agent did not produce any answers to submit.", results_df
|
| 157 |
|
| 158 |
+
# 4. Prepare Submission
|
| 159 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 160 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 161 |
print(status_update)
|
|
|
|
| 174 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 175 |
)
|
| 176 |
print("Submission successful.")
|
| 177 |
+
results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"]).fillna("")
|
| 178 |
return final_status, results_df
|
| 179 |
except requests.exceptions.HTTPError as e:
|
| 180 |
error_detail = f"Server responded with status {e.response.status_code}."
|
|
|
|
| 185 |
error_detail += f" Response: {e.response.text[:500]}"
|
| 186 |
status_message = f"Submission Failed: {error_detail}"
|
| 187 |
print(status_message)
|
| 188 |
+
results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"]).fillna("")
|
| 189 |
return status_message, results_df
|
| 190 |
except requests.exceptions.Timeout:
|
| 191 |
status_message = "Submission Failed: The request timed out."
|
| 192 |
print(status_message)
|
| 193 |
+
results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"]).fillna("")
|
| 194 |
return status_message, results_df
|
| 195 |
except requests.exceptions.RequestException as e:
|
| 196 |
status_message = f"Submission Failed: Network error - {e}"
|
| 197 |
print(status_message)
|
| 198 |
+
results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"]).fillna("")
|
| 199 |
return status_message, results_df
|
| 200 |
except Exception as e:
|
| 201 |
status_message = f"An unexpected error occurred during submission: {e}"
|
| 202 |
print(status_message)
|
| 203 |
+
results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"]).fillna("")
|
| 204 |
return status_message, results_df
|
| 205 |
|
| 206 |
|
| 207 |
# --- Build Gradio Interface using Blocks ---
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def run_and_submit_all_local(username_input: str | None):
|
| 211 |
+
return run_and_submit_all(profile=None, username=username_input)
|
| 212 |
+
|
| 213 |
+
|
| 214 |
with gr.Blocks() as demo:
|
| 215 |
gr.Markdown("# Basic Agent Evaluation Runner")
|
| 216 |
gr.Markdown(
|
|
|
|
| 228 |
"""
|
| 229 |
)
|
| 230 |
|
| 231 |
+
# Display the agent graph
|
| 232 |
+
gr.Markdown("## Agent Flow Graph")
|
| 233 |
+
gr.Markdown("This diagram shows how your agent processes questions using LangGraph:")
|
| 234 |
+
|
| 235 |
+
if GRAPH_PNG_BASE64:
|
| 236 |
+
gr.HTML(
|
| 237 |
+
f'<img src="data:image/png;base64,{GRAPH_PNG_BASE64}" alt="Agent Flow Graph" style="max-width:100%;height:auto;border:1px solid #ddd;border-radius:4px;padding:10px;"/>'
|
| 238 |
+
)
|
| 239 |
+
else:
|
| 240 |
+
gr.Markdown("*Graph visualization not available. The agent is still functional.*")
|
| 241 |
+
|
| 242 |
+
login_button = None
|
| 243 |
+
username_box = None
|
| 244 |
+
if RUNNING_IN_SPACE:
|
| 245 |
+
login_button = gr.LoginButton()
|
| 246 |
+
else:
|
| 247 |
+
username_box = gr.Textbox(
|
| 248 |
+
label="Hugging Face Username",
|
| 249 |
+
placeholder="Enter the username to associate with your submission",
|
| 250 |
+
value=(os.getenv("HF_USERNAME") or os.getenv("LOCAL_HF_USERNAME") or ""),
|
| 251 |
+
)
|
| 252 |
|
| 253 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 254 |
|
| 255 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 256 |
+
results_table = gr.DataFrame(
|
| 257 |
+
label="Questions and Agent Answers",
|
| 258 |
+
value=pd.DataFrame(columns=["Task ID", "Question", "Submitted Answer"]),
|
| 259 |
+
interactive=False,
|
| 260 |
+
wrap=True,
|
|
|
|
| 261 |
)
|
| 262 |
|
| 263 |
+
if RUNNING_IN_SPACE:
|
| 264 |
+
run_button.click(
|
| 265 |
+
fn=run_and_submit_all,
|
| 266 |
+
inputs=[login_button],
|
| 267 |
+
outputs=[status_output, results_table],
|
| 268 |
+
)
|
| 269 |
+
else:
|
| 270 |
+
run_button.click(
|
| 271 |
+
fn=run_and_submit_all_local,
|
| 272 |
+
inputs=[username_box],
|
| 273 |
+
outputs=[status_output, results_table],
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
|
| 277 |
if __name__ == "__main__":
|
| 278 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 279 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
| 280 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 281 |
+
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
| 282 |
|
| 283 |
if space_host_startup:
|
| 284 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
|
|
| 286 |
else:
|
| 287 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 288 |
|
| 289 |
+
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
| 290 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 291 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 292 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
cache/agent/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Agent package exposing the BasicAgent and graph utilities."""
|
| 2 |
+
|
| 3 |
+
from .basic_agent import BasicAgent
|
| 4 |
+
from .graph import agent_graph_mermaid, build_agent_graph
|
| 5 |
+
|
| 6 |
+
__all__ = ["BasicAgent", "agent_graph_mermaid", "build_agent_graph"]
|
cache/agent/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (471 Bytes). View file
|
|
|
cache/agent/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (420 Bytes). View file
|
|
|
cache/agent/__pycache__/basic_agent.cpython-311.pyc
ADDED
|
Binary file (12.8 kB). View file
|
|
|
cache/agent/__pycache__/basic_agent.cpython-312.pyc
ADDED
|
Binary file (3.31 kB). View file
|
|
|
cache/agent/__pycache__/constants.cpython-311.pyc
ADDED
|
Binary file (542 Bytes). View file
|
|
|
cache/agent/__pycache__/graph.cpython-311.pyc
ADDED
|
Binary file (18.3 kB). View file
|
|
|
cache/agent/__pycache__/graph.cpython-312.pyc
ADDED
|
Binary file (10.2 kB). View file
|
|
|
cache/agent/__pycache__/tools.cpython-311.pyc
ADDED
|
Binary file (25.7 kB). View file
|
|
|
cache/agent/basic_agent.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""A LangGraph-based agent implementation."""
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
import json
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from langchain_core.messages import AIMessage, HumanMessage
|
| 9 |
+
|
| 10 |
+
from .graph import AgentState, build_agent_graph
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def ensure_valid_answer(answer: str) -> str:
|
| 14 |
+
"""Ensure answer is never None or empty."""
|
| 15 |
+
if not answer or not isinstance(answer, str) or answer.strip() == "":
|
| 16 |
+
return "Unable to determine answer"
|
| 17 |
+
return answer.strip()
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class TeeOutput:
|
| 21 |
+
"""Redirect stdout/stderr to both console and file."""
|
| 22 |
+
def __init__(self, file_path, mode='a'):
|
| 23 |
+
self.file = open(file_path, mode, encoding='utf-8')
|
| 24 |
+
self.terminal = sys.stdout if mode == 'a' else sys.stderr
|
| 25 |
+
|
| 26 |
+
def write(self, message):
|
| 27 |
+
self.terminal.write(message)
|
| 28 |
+
self.file.write(message)
|
| 29 |
+
self.file.flush()
|
| 30 |
+
|
| 31 |
+
def flush(self):
|
| 32 |
+
self.terminal.flush()
|
| 33 |
+
self.file.flush()
|
| 34 |
+
|
| 35 |
+
def close(self):
|
| 36 |
+
self.file.close()
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class BasicAgent:
|
| 40 |
+
"""A LangGraph-powered agent that uses tools to answer questions."""
|
| 41 |
+
|
| 42 |
+
def __init__(self, log_to_file=True, use_cache=True, cache_file="agent_cache.json") -> None:
|
| 43 |
+
"""Initialize the agent with the compiled graph."""
|
| 44 |
+
self.graph = build_agent_graph()
|
| 45 |
+
self.log_file = None
|
| 46 |
+
self.use_cache = use_cache
|
| 47 |
+
self.cache_file = Path(cache_file)
|
| 48 |
+
self.answer_cache = {} # Cache for question -> answer mapping
|
| 49 |
+
|
| 50 |
+
# Load cache from disk if it exists
|
| 51 |
+
if self.use_cache:
|
| 52 |
+
self._load_cache()
|
| 53 |
+
|
| 54 |
+
# Set up logging to file
|
| 55 |
+
if log_to_file:
|
| 56 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 57 |
+
log_filename = f"agent_run_{timestamp}.log"
|
| 58 |
+
self.log_file = TeeOutput(log_filename, 'w')
|
| 59 |
+
sys.stdout = self.log_file
|
| 60 |
+
print(f"📝 Logging to: {log_filename}\n")
|
| 61 |
+
if self.use_cache and self.answer_cache:
|
| 62 |
+
print(f"💾 Loaded {len(self.answer_cache)} cached answers from {self.cache_file}\n")
|
| 63 |
+
|
| 64 |
+
def _load_cache(self):
|
| 65 |
+
"""Load answer cache from disk."""
|
| 66 |
+
try:
|
| 67 |
+
if self.cache_file.exists():
|
| 68 |
+
with open(self.cache_file, 'r', encoding='utf-8') as f:
|
| 69 |
+
self.answer_cache = json.load(f)
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"⚠️ Warning: Could not load cache from {self.cache_file}: {e}")
|
| 72 |
+
self.answer_cache = {}
|
| 73 |
+
|
| 74 |
+
def _save_cache(self):
|
| 75 |
+
"""Save answer cache to disk."""
|
| 76 |
+
try:
|
| 77 |
+
with open(self.cache_file, 'w', encoding='utf-8') as f:
|
| 78 |
+
json.dump(self.answer_cache, f, indent=2, ensure_ascii=False)
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"⚠️ Warning: Could not save cache to {self.cache_file}: {e}")
|
| 81 |
+
|
| 82 |
+
def _clean_answer(self, answer: str, question: str) -> str:
|
| 83 |
+
"""
|
| 84 |
+
Clean the answer based on GAIA scoring rules.
|
| 85 |
+
Aggressively removes explanatory text to provide only the literal answer.
|
| 86 |
+
"""
|
| 87 |
+
answer = answer.strip()
|
| 88 |
+
|
| 89 |
+
# Remove JSON formatting and code blocks
|
| 90 |
+
if answer.startswith('```'):
|
| 91 |
+
# Extract content from code blocks
|
| 92 |
+
lines = answer.split('\n')
|
| 93 |
+
answer = '\n'.join([l for l in lines if not l.startswith('```')])
|
| 94 |
+
answer = answer.strip()
|
| 95 |
+
|
| 96 |
+
# Remove JSON structures like {"name":"FINISH","answer":"value"}
|
| 97 |
+
if answer.startswith('{') and ('"name"' in answer or '"FINISH"' in answer):
|
| 98 |
+
try:
|
| 99 |
+
import json
|
| 100 |
+
# Try to parse as JSON
|
| 101 |
+
parsed = json.loads(answer)
|
| 102 |
+
# Extract the actual answer value from various possible keys
|
| 103 |
+
for key in ['answer', 'arguments', 'vegetables', 'surname', 'value', 'result', 'submitted_answer']:
|
| 104 |
+
if key in parsed and parsed[key] and parsed[key] != "FINISH":
|
| 105 |
+
answer = str(parsed[key])
|
| 106 |
+
break
|
| 107 |
+
# If still has "name" field, it's probably still JSON - extract any non-name value
|
| 108 |
+
if isinstance(parsed, dict) and 'name' in parsed:
|
| 109 |
+
for key, value in parsed.items():
|
| 110 |
+
if key != 'name' and key != 'FINISH' and value and value != "FINISH":
|
| 111 |
+
answer = str(value)
|
| 112 |
+
break
|
| 113 |
+
except:
|
| 114 |
+
pass
|
| 115 |
+
|
| 116 |
+
# Remove common prefixes and explanatory phrases
|
| 117 |
+
patterns_to_remove = [
|
| 118 |
+
r'^(the answer is|answer:|final answer:|thus,|therefore,|so,|hence,)\s*',
|
| 119 |
+
r'^(the\s+)?(correct\s+)?(number|city|country|name|value|total|result)\s+(is|are|was|were)\s*',
|
| 120 |
+
r'^\d+\.\s*', # Remove leading numbers like "1. " or "2. "
|
| 121 |
+
r'^[-•]\s*', # Remove bullet points
|
| 122 |
+
]
|
| 123 |
+
|
| 124 |
+
for pattern in patterns_to_remove:
|
| 125 |
+
answer = re.sub(pattern, '', answer, flags=re.IGNORECASE)
|
| 126 |
+
answer = answer.strip()
|
| 127 |
+
|
| 128 |
+
# If answer contains multiple sentences, try to extract just the key info
|
| 129 |
+
sentences = answer.split('.')
|
| 130 |
+
if len(sentences) > 1:
|
| 131 |
+
# Look for the shortest sentence that contains key info
|
| 132 |
+
for sent in sentences:
|
| 133 |
+
sent = sent.strip()
|
| 134 |
+
# If it's short and contains a number or key word, use it
|
| 135 |
+
if len(sent) < 50 and (any(char.isdigit() for char in sent) or len(sent.split()) <= 5):
|
| 136 |
+
answer = sent
|
| 137 |
+
break
|
| 138 |
+
|
| 139 |
+
# Remove trailing explanations in parentheses
|
| 140 |
+
answer = re.sub(r'\s*\([^)]*\)\s*$', '', answer)
|
| 141 |
+
|
| 142 |
+
# If the question asks for a comma-separated list, ensure no spaces after commas
|
| 143 |
+
if 'comma' in question.lower() and ('list' in question.lower() or 'separated' in question.lower()):
|
| 144 |
+
answer = re.sub(r',\s+', ',', answer)
|
| 145 |
+
|
| 146 |
+
# Clean numbers: remove currency symbols and commas
|
| 147 |
+
if len(answer.split()) <= 5: # Short answer, likely a number
|
| 148 |
+
if any(char.isdigit() for char in answer):
|
| 149 |
+
cleaned = answer
|
| 150 |
+
for symbol in ['$', '€', '£', '¥', '%', ',']:
|
| 151 |
+
cleaned = cleaned.replace(symbol, '')
|
| 152 |
+
|
| 153 |
+
# If after cleaning it's still a valid number, use the cleaned version
|
| 154 |
+
try:
|
| 155 |
+
float(cleaned.strip())
|
| 156 |
+
answer = cleaned.strip()
|
| 157 |
+
except ValueError:
|
| 158 |
+
pass # Not a pure number, keep original
|
| 159 |
+
|
| 160 |
+
# Final cleanup: remove quotes if they wrap the entire answer
|
| 161 |
+
answer = answer.strip('"\'')
|
| 162 |
+
|
| 163 |
+
return answer
|
| 164 |
+
|
| 165 |
+
def __call__(self, question: str) -> str:
|
| 166 |
+
"""Invoke the agent with a question and return the answer."""
|
| 167 |
+
try:
|
| 168 |
+
print("\n" + "="*80)
|
| 169 |
+
print(f"📋 QUESTION: {question[:150]}...")
|
| 170 |
+
print("="*80)
|
| 171 |
+
|
| 172 |
+
# Check cache first
|
| 173 |
+
if self.use_cache and question in self.answer_cache:
|
| 174 |
+
cached_answer = self.answer_cache[question]
|
| 175 |
+
print("\n💾 Using cached answer (no LLM call!)")
|
| 176 |
+
print(f"\n🎯 FINAL ANSWER: {cached_answer}")
|
| 177 |
+
print("="*80 + "\n")
|
| 178 |
+
return cached_answer
|
| 179 |
+
|
| 180 |
+
# Create the initial state with the user's question
|
| 181 |
+
state: AgentState = {"messages": [HumanMessage(content=question)]}
|
| 182 |
+
|
| 183 |
+
# Run the graph with increased recursion limit
|
| 184 |
+
print("\n🚀 Starting agent execution...")
|
| 185 |
+
result = self.graph.invoke(state, config={"recursion_limit": 50})
|
| 186 |
+
|
| 187 |
+
# Extract the final answer from the last AI message
|
| 188 |
+
for message in reversed(result["messages"]):
|
| 189 |
+
if isinstance(message, AIMessage):
|
| 190 |
+
raw_answer = message.content
|
| 191 |
+
# Clean the answer based on GAIA scoring rules
|
| 192 |
+
cleaned_answer = self._clean_answer(raw_answer, question)
|
| 193 |
+
# Ensure answer is never empty
|
| 194 |
+
validated_answer = ensure_valid_answer(cleaned_answer)
|
| 195 |
+
|
| 196 |
+
# Cache the answer and save to disk
|
| 197 |
+
if self.use_cache:
|
| 198 |
+
self.answer_cache[question] = validated_answer
|
| 199 |
+
self._save_cache() # Persist to disk immediately
|
| 200 |
+
|
| 201 |
+
print(f"\n🎯 FINAL ANSWER: {validated_answer}")
|
| 202 |
+
print("="*80 + "\n")
|
| 203 |
+
return validated_answer
|
| 204 |
+
|
| 205 |
+
print("\n⚠️ No answer found")
|
| 206 |
+
print("="*80 + "\n")
|
| 207 |
+
return ensure_valid_answer("")
|
| 208 |
+
except Exception as e:
|
| 209 |
+
print(f"\n❌ ERROR: {e}")
|
| 210 |
+
print("="*80 + "\n")
|
| 211 |
+
return ensure_valid_answer(f"Agent failed with error: {e}")
|
cache/agent/constants.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared constants for the agent."""
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
CACHE_DIR = Path(__file__).resolve().parent.parent / "cache"
|
| 6 |
+
CACHE_DIR.mkdir(exist_ok=True)
|
cache/agent/graph.py
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LangGraph state graph construction for the LLM-powered agent."""
|
| 2 |
+
|
| 3 |
+
import base64
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import time
|
| 7 |
+
import random
|
| 8 |
+
from typing import Annotated, Literal, TypedDict
|
| 9 |
+
|
| 10 |
+
from langchain_core.messages import AIMessage, BaseMessage, ToolMessage
|
| 11 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 12 |
+
from langchain_core.tools import render_text_description
|
| 13 |
+
from langchain_openai import ChatOpenAI
|
| 14 |
+
from langgraph.graph import StateGraph
|
| 15 |
+
from langgraph.graph.message import add_messages
|
| 16 |
+
from openai import RateLimitError
|
| 17 |
+
|
| 18 |
+
from .constants import CACHE_DIR
|
| 19 |
+
from .tools import tool_classes
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class AgentState(TypedDict):
|
| 23 |
+
messages: Annotated[list[BaseMessage], add_messages]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class SimpleRateLimiter:
|
| 27 |
+
"""Simple token bucket rate limiter to prevent hitting API limits."""
|
| 28 |
+
|
| 29 |
+
def __init__(self, calls_per_minute=50):
|
| 30 |
+
self.calls_per_minute = calls_per_minute
|
| 31 |
+
self.call_times = []
|
| 32 |
+
|
| 33 |
+
def wait_if_needed(self):
|
| 34 |
+
"""Wait if we're about to exceed rate limit."""
|
| 35 |
+
now = time.time()
|
| 36 |
+
|
| 37 |
+
# Remove calls older than 1 minute
|
| 38 |
+
self.call_times = [t for t in self.call_times if now - t < 60]
|
| 39 |
+
|
| 40 |
+
# If we're at the limit, wait
|
| 41 |
+
if len(self.call_times) >= self.calls_per_minute:
|
| 42 |
+
sleep_time = 60 - (now - self.call_times[0]) + 1
|
| 43 |
+
if sleep_time > 0:
|
| 44 |
+
print(f"⏳ Rate limiter: waiting {sleep_time:.1f}s to avoid hitting limits...")
|
| 45 |
+
time.sleep(sleep_time)
|
| 46 |
+
self.call_times = []
|
| 47 |
+
|
| 48 |
+
# Record this call
|
| 49 |
+
self.call_times.append(time.time())
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# Global variables that will be initialized in build_agent_graph()
|
| 53 |
+
_tools = None
|
| 54 |
+
_agent_chain = None
|
| 55 |
+
_generation_chain = None
|
| 56 |
+
_primary_llm = None
|
| 57 |
+
_fallback_llm = None
|
| 58 |
+
_rate_limiter = SimpleRateLimiter(calls_per_minute=40) # Conservative limit
|
| 59 |
+
|
| 60 |
+
def _call_llm_with_retry(chain, state, max_retries=5):
|
| 61 |
+
"""
|
| 62 |
+
Call LLM with exponential backoff retry logic.
|
| 63 |
+
Falls back to cheaper model if primary keeps failing.
|
| 64 |
+
"""
|
| 65 |
+
for attempt in range(max_retries):
|
| 66 |
+
try:
|
| 67 |
+
# Wait if we're approaching rate limits
|
| 68 |
+
_rate_limiter.wait_if_needed()
|
| 69 |
+
return chain.invoke(state)
|
| 70 |
+
except RateLimitError as e:
|
| 71 |
+
# Extract wait time from error if available
|
| 72 |
+
wait_time = min(60, (2 ** attempt) + random.random())
|
| 73 |
+
print(f"⚠️ Rate limit hit (attempt {attempt + 1}/{max_retries})")
|
| 74 |
+
print(f" Waiting {wait_time:.1f}s before retry...")
|
| 75 |
+
time.sleep(wait_time)
|
| 76 |
+
except Exception as e:
|
| 77 |
+
# For other errors, don't retry
|
| 78 |
+
print(f"❌ LLM error: {e}")
|
| 79 |
+
raise
|
| 80 |
+
|
| 81 |
+
# If all retries failed, try fallback model
|
| 82 |
+
print("🔄 All retries exhausted, switching to fallback model (gpt-4o-mini)...")
|
| 83 |
+
try:
|
| 84 |
+
# Rebuild chain with fallback LLM
|
| 85 |
+
if _fallback_llm is not None:
|
| 86 |
+
fallback_chain = chain.first | _fallback_llm
|
| 87 |
+
return fallback_chain.invoke(state)
|
| 88 |
+
except Exception as e:
|
| 89 |
+
print(f"❌ Fallback model also failed: {e}")
|
| 90 |
+
raise
|
| 91 |
+
|
| 92 |
+
raise RuntimeError("All retry attempts and fallback failed")
|
| 93 |
+
|
| 94 |
+
def _initialize_chains_and_tools():
|
| 95 |
+
"""Initialize the tools and LLM chains. Called once when building the graph."""
|
| 96 |
+
global _tools, _agent_chain, _generation_chain, _primary_llm, _fallback_llm
|
| 97 |
+
|
| 98 |
+
if _tools is not None:
|
| 99 |
+
return # Already initialized
|
| 100 |
+
|
| 101 |
+
# Initialize PRIMARY LLM (gpt-4o)
|
| 102 |
+
print("🔧 Initializing primary LLM: gpt-4o")
|
| 103 |
+
_primary_llm = ChatOpenAI(
|
| 104 |
+
#model="gpt-4o",
|
| 105 |
+
model="gpt-4.1",
|
| 106 |
+
temperature=0,
|
| 107 |
+
verbose=True,
|
| 108 |
+
request_timeout=60 # 60 second timeout
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# Initialize FALLBACK LLM (gpt-4o-mini - cheaper, faster)
|
| 112 |
+
print("🔧 Initializing fallback LLM: gpt-4o-mini")
|
| 113 |
+
_fallback_llm = ChatOpenAI(
|
| 114 |
+
model="gpt-4o-mini",
|
| 115 |
+
temperature=0,
|
| 116 |
+
verbose=True,
|
| 117 |
+
request_timeout=60
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
llm = _primary_llm
|
| 121 |
+
|
| 122 |
+
# Instantiate the tools
|
| 123 |
+
_tools = []
|
| 124 |
+
for tool in tool_classes:
|
| 125 |
+
if callable(tool) and not isinstance(tool, type):
|
| 126 |
+
# It's a function that returns a tool instance (like create_wikipedia_tool)
|
| 127 |
+
_tools.append(tool())
|
| 128 |
+
else:
|
| 129 |
+
# It's a class, instantiate it
|
| 130 |
+
_tools.append(tool())
|
| 131 |
+
|
| 132 |
+
# CRITICAL: Bind tools to the LLM using OpenAI's native function calling
|
| 133 |
+
llm = llm.bind_tools(_tools)
|
| 134 |
+
|
| 135 |
+
# Render the tools to a text description for the prompt
|
| 136 |
+
rendered_tools = render_text_description(_tools)
|
| 137 |
+
|
| 138 |
+
# Create the system prompt
|
| 139 |
+
system_prompt = f"""You are a highly capable AI assistant designed to solve complex, real-world questions.
|
| 140 |
+
|
| 141 |
+
REASONING STRATEGY (CRITICAL):
|
| 142 |
+
1. **Decompose**: Break complex questions into smaller sub-questions
|
| 143 |
+
2. **Plan**: Before using tools, outline your complete strategy
|
| 144 |
+
3. **Execute**: Use tools systematically, one step at a time
|
| 145 |
+
4. **Verify**: Check each result before proceeding to the next step
|
| 146 |
+
5. **Self-correct**: If a tool fails or gives unexpected results, try alternative approaches
|
| 147 |
+
6. **Synthesize**: Combine information from multiple sources to form your final answer
|
| 148 |
+
|
| 149 |
+
FILE HANDLING - CRITICAL:
|
| 150 |
+
⚠️ **Files mentioned as "attached" are ALREADY in the current directory!**
|
| 151 |
+
- When question says "attached Excel file", "attached image", "attached .mp3" - use `list_files` to find them
|
| 152 |
+
- Files are pre-downloaded before you start, so they WILL be in current directory
|
| 153 |
+
- **NEVER** ask for URLs for "attached" files - they're already there!
|
| 154 |
+
- Workflow:
|
| 155 |
+
1. Use `list_files` to see what's available
|
| 156 |
+
2. Find the relevant file (Excel, image, mp3, etc.)
|
| 157 |
+
3. Process it with appropriate tool:
|
| 158 |
+
- Excel (.xlsx, .xls): use `read_excel` tool to get summary and data
|
| 159 |
+
- CSV: use `python_repl` with pandas: `pd.read_csv('filename.csv')`
|
| 160 |
+
- Python files (.py): use `execute_python_file` tool to run and get output
|
| 161 |
+
- Text files: use `read_file` tool
|
| 162 |
+
- Images (.png, .jpg): use `analyze_image` tool (Gemini vision) - great for chess, diagrams, text in images
|
| 163 |
+
- MP3/Audio files: use `understand_audio` tool (Gemini audio) - transcribes and understands audio
|
| 164 |
+
|
| 165 |
+
MULTIMEDIA HANDLING:
|
| 166 |
+
- For YouTube videos: use `understand_video` tool with format: 'URL: <youtube_url> | QUESTION: <specific_question>'
|
| 167 |
+
Example: understand_video('URL: https://www.youtube.com/watch?v=abc | QUESTION: How many bird species are visible?')
|
| 168 |
+
This ensures Gemini knows exactly what to look for in the video
|
| 169 |
+
- For audio files (.mp3): use `understand_audio` tool - Gemini will transcribe and answer questions
|
| 170 |
+
- For images: use `analyze_image` tool - Gemini can read text, analyze chess positions, describe images
|
| 171 |
+
- For web URLs: use `download_file` if you need to download something from the internet
|
| 172 |
+
|
| 173 |
+
TOOL USAGE BEST PRACTICES:
|
| 174 |
+
- Use `calculator` for precise mathematical operations (faster than python_repl)
|
| 175 |
+
- Use `wikipedia` for factual knowledge about people, places, events
|
| 176 |
+
- Use `tavily_search` for recent information or specific facts
|
| 177 |
+
- Use `youtube_transcript` for YouTube video content analysis
|
| 178 |
+
- Use `read_excel` for quick Excel file inspection
|
| 179 |
+
- Use `python_repl` for complex data analysis and calculations
|
| 180 |
+
- Chain multiple tools when needed (e.g., search → extract info → calculate)
|
| 181 |
+
|
| 182 |
+
AVAILABLE TOOLS:
|
| 183 |
+
{rendered_tools}
|
| 184 |
+
|
| 185 |
+
RESPONSE FORMAT:
|
| 186 |
+
- For tool calls: return JSON with 'name' and 'arguments' keys
|
| 187 |
+
- When finished: return JSON with 'name' of 'FINISH'
|
| 188 |
+
|
| 189 |
+
⚠️ CRITICAL - PROVIDE ONLY THE FINAL ANSWER ⚠️
|
| 190 |
+
DO NOT include explanations, reasoning, or extra text in your final answer.
|
| 191 |
+
Examples:
|
| 192 |
+
- Question: "How many albums?" → Answer: "2" (NOT "Mercedes Sosa published 2 albums...")
|
| 193 |
+
- Question: "What city?" → Answer: "Paris" (NOT "The city is Paris")
|
| 194 |
+
- Question: "Total sales?" → Answer: "1234.56" (NOT "The total sales are $1,234.56")
|
| 195 |
+
|
| 196 |
+
BE EXTREMELY CONCISE. The scoring system only wants the literal answer.
|
| 197 |
+
|
| 198 |
+
CRITICAL - ANSWER FORMATTING RULES:
|
| 199 |
+
The scoring system is very strict about format. Follow these rules EXACTLY:
|
| 200 |
+
|
| 201 |
+
1. **For NUMBER answers**:
|
| 202 |
+
- Remove currency symbols ($, €, £)
|
| 203 |
+
- Remove percentage signs (%)
|
| 204 |
+
- Remove commas from large numbers
|
| 205 |
+
- Provide just the number: "1234.56" not "$1,234.56"
|
| 206 |
+
|
| 207 |
+
2. **For LIST answers** (comma-separated):
|
| 208 |
+
- Use ONLY commas to separate items (or semicolons if specified)
|
| 209 |
+
- NO extra spaces around commas
|
| 210 |
+
- Count must match exactly
|
| 211 |
+
- Order matters!
|
| 212 |
+
- Example: "apple,banana,cherry" NOT "apple, banana, cherry"
|
| 213 |
+
|
| 214 |
+
3. **For STRING answers**:
|
| 215 |
+
- Be concise - extra words will cause mismatch
|
| 216 |
+
- Capitalization doesn't matter
|
| 217 |
+
- Punctuation doesn't matter
|
| 218 |
+
- Spaces don't matter
|
| 219 |
+
- But be precise with the core answer
|
| 220 |
+
|
| 221 |
+
4. **For NAMES**:
|
| 222 |
+
- Use full names if asked
|
| 223 |
+
- Use last names only if specified
|
| 224 |
+
- Check the question carefully for format requirements
|
| 225 |
+
|
| 226 |
+
5. **For CODES** (IOC, airport, etc.):
|
| 227 |
+
- Use exact format requested (uppercase/lowercase)
|
| 228 |
+
- No extra characters
|
| 229 |
+
|
| 230 |
+
DOUBLE-CHECK YOUR FINAL ANSWER FORMAT BEFORE RETURNING!
|
| 231 |
+
"""
|
| 232 |
+
|
| 233 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 234 |
+
("system", system_prompt),
|
| 235 |
+
("placeholder", "{messages}"),
|
| 236 |
+
])
|
| 237 |
+
|
| 238 |
+
# Create the LLM chains
|
| 239 |
+
_agent_chain = prompt | llm
|
| 240 |
+
generation_prompt = ChatPromptTemplate.from_messages([
|
| 241 |
+
("system", "You are a helpful assistant. Answer the user's question based on the conversation history."),
|
| 242 |
+
("placeholder", "{messages}"),
|
| 243 |
+
])
|
| 244 |
+
_generation_chain = generation_prompt | llm
|
| 245 |
+
|
| 246 |
+
def agent_node(state: AgentState) -> dict:
|
| 247 |
+
"""Invokes the LLM to decide on the next action with retry logic."""
|
| 248 |
+
print("\n🤖 [AGENT NODE] Deciding next action...")
|
| 249 |
+
|
| 250 |
+
# Use retry logic
|
| 251 |
+
response = _call_llm_with_retry(_agent_chain, state)
|
| 252 |
+
|
| 253 |
+
# Check if there are tool calls
|
| 254 |
+
if hasattr(response, 'tool_calls') and response.tool_calls:
|
| 255 |
+
print(f"📝 [AGENT NODE] Requesting {len(response.tool_calls)} tool call(s)")
|
| 256 |
+
for tc in response.tool_calls:
|
| 257 |
+
print(f" - {tc['name']}")
|
| 258 |
+
else:
|
| 259 |
+
print(f"📝 [AGENT NODE] Response: {response.content[:200]}...")
|
| 260 |
+
|
| 261 |
+
return {"messages": [response]}
|
| 262 |
+
|
| 263 |
+
def generation_node(state: AgentState) -> dict:
|
| 264 |
+
"""Invokes the LLM to generate a final answer."""
|
| 265 |
+
print("\n✨ [GENERATION NODE] Creating final answer...")
|
| 266 |
+
response = _generation_chain.invoke(state)
|
| 267 |
+
print(f"✅ [GENERATION NODE] Final answer: {response.content[:200]}...")
|
| 268 |
+
return {"messages": [AIMessage(content=response.content)]}
|
| 269 |
+
|
| 270 |
+
def tool_node(state: AgentState) -> dict:
|
| 271 |
+
"""Runs the tools using OpenAI's native tool calling."""
|
| 272 |
+
print("\n🔧 [TOOL NODE] Executing tools...")
|
| 273 |
+
last_message = state["messages"][-1]
|
| 274 |
+
|
| 275 |
+
# Check if the message has tool_calls (OpenAI's native format)
|
| 276 |
+
if not hasattr(last_message, 'tool_calls') or not last_message.tool_calls:
|
| 277 |
+
print("⚠️ [TOOL NODE] No tool calls found")
|
| 278 |
+
return {"messages": []}
|
| 279 |
+
|
| 280 |
+
tool_messages = []
|
| 281 |
+
for tool_call in last_message.tool_calls:
|
| 282 |
+
tool_name = tool_call['name']
|
| 283 |
+
tool_args = tool_call['args']
|
| 284 |
+
tool_call_id = tool_call['id']
|
| 285 |
+
|
| 286 |
+
print(f" 🛠️ Calling tool: {tool_name}")
|
| 287 |
+
print(f" Args: {str(tool_args)[:100]}...")
|
| 288 |
+
|
| 289 |
+
tool_to_call = next((t for t in _tools if t.name == tool_name), None)
|
| 290 |
+
if tool_to_call:
|
| 291 |
+
try:
|
| 292 |
+
observation = tool_to_call.invoke(tool_args)
|
| 293 |
+
result_preview = str(observation)[:150]
|
| 294 |
+
print(f" ✅ Result: {result_preview}...")
|
| 295 |
+
tool_messages.append(ToolMessage(
|
| 296 |
+
content=str(observation),
|
| 297 |
+
tool_call_id=tool_call_id
|
| 298 |
+
))
|
| 299 |
+
except Exception as e:
|
| 300 |
+
print(f" ❌ Error: {e}")
|
| 301 |
+
tool_messages.append(ToolMessage(
|
| 302 |
+
content=f"Error: {e}",
|
| 303 |
+
tool_call_id=tool_call_id
|
| 304 |
+
))
|
| 305 |
+
else:
|
| 306 |
+
print(f" ⚠️ Tool '{tool_name}' not found")
|
| 307 |
+
|
| 308 |
+
print(f"🔧 [TOOL NODE] Executed {len(tool_messages)} tool(s)")
|
| 309 |
+
return {"messages": tool_messages}
|
| 310 |
+
|
| 311 |
+
def should_continue(state: AgentState) -> Literal["tools", "__end__"]:
|
| 312 |
+
"""Determines the next node to execute based on OpenAI's tool calls."""
|
| 313 |
+
last_message = state["messages"][-1]
|
| 314 |
+
|
| 315 |
+
# If the last message has tool calls, go to tools node
|
| 316 |
+
if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
|
| 317 |
+
print("➡️ Routing to: TOOLS")
|
| 318 |
+
return "tools"
|
| 319 |
+
|
| 320 |
+
# Otherwise, we're done
|
| 321 |
+
print("➡️ Routing to: END")
|
| 322 |
+
return "__end__"
|
| 323 |
+
|
| 324 |
+
def build_agent_graph() -> StateGraph:
|
| 325 |
+
"""Builds the state graph for the agent."""
|
| 326 |
+
# Initialize tools and chains (only happens once)
|
| 327 |
+
_initialize_chains_and_tools()
|
| 328 |
+
|
| 329 |
+
workflow = StateGraph(AgentState)
|
| 330 |
+
workflow.add_node("agent", agent_node)
|
| 331 |
+
workflow.add_node("tools", tool_node)
|
| 332 |
+
workflow.set_entry_point("agent")
|
| 333 |
+
workflow.add_conditional_edges("agent", should_continue)
|
| 334 |
+
workflow.add_edge("tools", "agent")
|
| 335 |
+
return workflow.compile()
|
| 336 |
+
|
| 337 |
+
def agent_graph_mermaid() -> str:
|
| 338 |
+
"""Returns the LangGraph structure in Mermaid format."""
|
| 339 |
+
graph = build_agent_graph()
|
| 340 |
+
return graph.get_graph().draw_mermaid()
|
| 341 |
+
|
| 342 |
+
def agent_graph_png_base64(filename: str = "agent_graph.png") -> str | None:
|
| 343 |
+
"""Generates a PNG of the agent graph and returns it as a base64 string."""
|
| 344 |
+
graph = build_agent_graph()
|
| 345 |
+
output_path = CACHE_DIR / filename
|
| 346 |
+
try:
|
| 347 |
+
graph.get_graph().draw_png(str(output_path))
|
| 348 |
+
except Exception as exc:
|
| 349 |
+
print(f"Warning: Failed to render agent graph PNG: {exc}")
|
| 350 |
+
return None
|
| 351 |
+
|
| 352 |
+
try:
|
| 353 |
+
return base64.b64encode(output_path.read_bytes()).decode("ascii")
|
| 354 |
+
except Exception as exc:
|
| 355 |
+
print(f"Warning: Unable to read rendered graph PNG: {exc}")
|
| 356 |
+
return None
|
cache/agent/tools.py
ADDED
|
@@ -0,0 +1,476 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LangGraph tools for the agent."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Optional
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
from langchain.tools import BaseTool
|
| 9 |
+
from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
|
| 10 |
+
from langchain_community.utilities import WikipediaAPIWrapper
|
| 11 |
+
from langchain_experimental.tools import PythonAstREPLTool
|
| 12 |
+
from langchain_tavily import TavilySearch
|
| 13 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 14 |
+
import re as regex
|
| 15 |
+
import requests
|
| 16 |
+
from urllib.parse import urlparse
|
| 17 |
+
import google.generativeai as genai
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def find_file(filename: str) -> str:
|
| 21 |
+
"""Helper function to find a file in multiple locations."""
|
| 22 |
+
# Try multiple locations in order
|
| 23 |
+
locations = [
|
| 24 |
+
Path(filename), # Current directory
|
| 25 |
+
Path("downloads") / filename, # Downloads directory
|
| 26 |
+
Path(".") / filename, # Explicit current directory
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
for path in locations:
|
| 30 |
+
if path.exists():
|
| 31 |
+
return str(path)
|
| 32 |
+
|
| 33 |
+
# File not found, return the downloads path as default
|
| 34 |
+
return str(Path("downloads") / filename)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class ListFilesTool(BaseTool):
|
| 38 |
+
"""Tool to list files in the current directory or a specified directory."""
|
| 39 |
+
|
| 40 |
+
name: str = "list_files"
|
| 41 |
+
description: str = "Lists all files in the current directory or a specified directory. Input should be a directory path (optional, defaults to current directory)."
|
| 42 |
+
|
| 43 |
+
def _run(self, directory: str = ".") -> str:
|
| 44 |
+
"""List files in the specified directory."""
|
| 45 |
+
try:
|
| 46 |
+
# Check both current directory and downloads directory
|
| 47 |
+
paths_to_check = [Path(directory)]
|
| 48 |
+
if directory == ".":
|
| 49 |
+
paths_to_check.append(Path("downloads"))
|
| 50 |
+
|
| 51 |
+
all_files = []
|
| 52 |
+
for path in paths_to_check:
|
| 53 |
+
if path.exists():
|
| 54 |
+
location = "downloads/" if path.name == "downloads" else "./"
|
| 55 |
+
for item in path.iterdir():
|
| 56 |
+
if item.is_file():
|
| 57 |
+
all_files.append(f"{location}{item.name} ({item.stat().st_size} bytes)")
|
| 58 |
+
|
| 59 |
+
if not all_files:
|
| 60 |
+
return f"No files found in '{directory}'."
|
| 61 |
+
|
| 62 |
+
return "Files found:\n" + "\n".join(all_files)
|
| 63 |
+
except Exception as e:
|
| 64 |
+
return f"Error listing files: {str(e)}"
|
| 65 |
+
|
| 66 |
+
async def _arun(self, directory: str = ".") -> str:
|
| 67 |
+
"""Async version."""
|
| 68 |
+
return self._run(directory)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class ReadFileTool(BaseTool):
|
| 72 |
+
"""Tool to read the contents of a text file."""
|
| 73 |
+
|
| 74 |
+
name: str = "read_file"
|
| 75 |
+
description: str = "Reads the contents of a text file. Input should be the file path."
|
| 76 |
+
|
| 77 |
+
def _run(self, file_path: str) -> str:
|
| 78 |
+
"""Read the file contents."""
|
| 79 |
+
try:
|
| 80 |
+
# Find file in multiple locations
|
| 81 |
+
actual_path = find_file(file_path)
|
| 82 |
+
path = Path(actual_path)
|
| 83 |
+
if not path.exists():
|
| 84 |
+
return f"File '{file_path}' does not exist in current directory or downloads/."
|
| 85 |
+
|
| 86 |
+
with open(path, 'r', encoding='utf-8') as f:
|
| 87 |
+
content = f.read()
|
| 88 |
+
|
| 89 |
+
return content
|
| 90 |
+
except Exception as e:
|
| 91 |
+
return f"Error reading file: {str(e)}"
|
| 92 |
+
|
| 93 |
+
async def _arun(self, file_path: str) -> str:
|
| 94 |
+
"""Async version."""
|
| 95 |
+
return self._run(file_path)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
class ExcelReaderTool(BaseTool):
|
| 99 |
+
"""Tool for reading and analyzing Excel files."""
|
| 100 |
+
|
| 101 |
+
name: str = "read_excel"
|
| 102 |
+
description: str = "Reads an Excel file and returns its contents as a pandas DataFrame. Input should be the file path to the Excel file (.xlsx or .xls)."
|
| 103 |
+
|
| 104 |
+
def _run(self, file_path: str) -> str:
|
| 105 |
+
"""Read Excel file and return summary."""
|
| 106 |
+
try:
|
| 107 |
+
import pandas as pd
|
| 108 |
+
|
| 109 |
+
# Find file in multiple locations
|
| 110 |
+
actual_path = find_file(file_path)
|
| 111 |
+
path = Path(actual_path)
|
| 112 |
+
if not path.exists():
|
| 113 |
+
return f"File '{file_path}' does not exist in current directory or downloads/."
|
| 114 |
+
|
| 115 |
+
# Read the Excel file
|
| 116 |
+
df = pd.read_excel(path)
|
| 117 |
+
|
| 118 |
+
# Return a summary
|
| 119 |
+
result = f"Excel file loaded successfully.\n"
|
| 120 |
+
result += f"Shape: {df.shape[0]} rows, {df.shape[1]} columns\n"
|
| 121 |
+
result += f"Columns: {', '.join(df.columns.tolist())}\n\n"
|
| 122 |
+
result += f"First few rows:\n{df.head().to_string()}\n\n"
|
| 123 |
+
result += f"Data types:\n{df.dtypes.to_string()}\n\n"
|
| 124 |
+
result += f"Summary statistics:\n{df.describe().to_string()}"
|
| 125 |
+
|
| 126 |
+
return result
|
| 127 |
+
except Exception as e:
|
| 128 |
+
return f"Error reading Excel file: {str(e)}"
|
| 129 |
+
|
| 130 |
+
async def _arun(self, file_path: str) -> str:
|
| 131 |
+
"""Async version."""
|
| 132 |
+
return self._run(file_path)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
class DownloadFileTool(BaseTool):
|
| 136 |
+
"""Tool for downloading files from URLs."""
|
| 137 |
+
|
| 138 |
+
name: str = "download_file"
|
| 139 |
+
description: str = "Downloads a file from a URL and saves it to the current directory. Input should be the URL of the file to download. Returns the local file path."
|
| 140 |
+
|
| 141 |
+
def _run(self, url: str) -> str:
|
| 142 |
+
"""Download file from URL."""
|
| 143 |
+
try:
|
| 144 |
+
# Parse URL to get filename
|
| 145 |
+
parsed = urlparse(url)
|
| 146 |
+
filename = os.path.basename(parsed.path)
|
| 147 |
+
|
| 148 |
+
# If no filename in URL, generate one
|
| 149 |
+
if not filename or '.' not in filename:
|
| 150 |
+
# Try to get from Content-Disposition header
|
| 151 |
+
response = requests.head(url, allow_redirects=True, timeout=10)
|
| 152 |
+
if 'Content-Disposition' in response.headers:
|
| 153 |
+
content_disp = response.headers['Content-Disposition']
|
| 154 |
+
if 'filename=' in content_disp:
|
| 155 |
+
filename = content_disp.split('filename=')[1].strip('"\'')
|
| 156 |
+
|
| 157 |
+
# Still no filename? Generate one based on content type
|
| 158 |
+
if not filename or '.' not in filename:
|
| 159 |
+
content_type = response.headers.get('Content-Type', '')
|
| 160 |
+
ext = '.bin'
|
| 161 |
+
if 'image' in content_type:
|
| 162 |
+
ext = '.png' if 'png' in content_type else '.jpg'
|
| 163 |
+
elif 'excel' in content_type or 'spreadsheet' in content_type:
|
| 164 |
+
ext = '.xlsx'
|
| 165 |
+
elif 'pdf' in content_type:
|
| 166 |
+
ext = '.pdf'
|
| 167 |
+
filename = f"downloaded_file{ext}"
|
| 168 |
+
|
| 169 |
+
# Download the file
|
| 170 |
+
print(f"📥 Downloading: {url}")
|
| 171 |
+
response = requests.get(url, timeout=30)
|
| 172 |
+
response.raise_for_status()
|
| 173 |
+
|
| 174 |
+
# Save to current directory
|
| 175 |
+
filepath = Path(filename)
|
| 176 |
+
with open(filepath, 'wb') as f:
|
| 177 |
+
f.write(response.content)
|
| 178 |
+
|
| 179 |
+
file_size = len(response.content)
|
| 180 |
+
print(f"✅ Downloaded: {filename} ({file_size} bytes)")
|
| 181 |
+
|
| 182 |
+
return f"File downloaded successfully: {filename} ({file_size} bytes)"
|
| 183 |
+
except Exception as e:
|
| 184 |
+
return f"Error downloading file: {str(e)}"
|
| 185 |
+
|
| 186 |
+
async def _arun(self, url: str) -> str:
|
| 187 |
+
"""Async version."""
|
| 188 |
+
return self._run(url)
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
class YouTubeTranscriptTool(BaseTool):
|
| 192 |
+
"""Tool for getting transcripts from YouTube videos."""
|
| 193 |
+
|
| 194 |
+
name: str = "youtube_transcript"
|
| 195 |
+
description: str = "Gets the transcript/captions from a YouTube video. Input should be either a YouTube URL or video ID."
|
| 196 |
+
|
| 197 |
+
def _run(self, video_input: str) -> str:
|
| 198 |
+
"""Get YouTube transcript."""
|
| 199 |
+
try:
|
| 200 |
+
# Extract video ID from URL if needed
|
| 201 |
+
video_id = video_input
|
| 202 |
+
if 'youtube.com' in video_input or 'youtu.be' in video_input:
|
| 203 |
+
# Extract video ID from various YouTube URL formats
|
| 204 |
+
patterns = [
|
| 205 |
+
r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
|
| 206 |
+
r'(?:embed\/)([0-9A-Za-z_-]{11})',
|
| 207 |
+
r'(?:watch\?v=)([0-9A-Za-z_-]{11})'
|
| 208 |
+
]
|
| 209 |
+
for pattern in patterns:
|
| 210 |
+
match = regex.search(pattern, video_input)
|
| 211 |
+
if match:
|
| 212 |
+
video_id = match.group(1)
|
| 213 |
+
break
|
| 214 |
+
|
| 215 |
+
# Get transcript using the correct API
|
| 216 |
+
try:
|
| 217 |
+
# Try to get transcript (auto-generated or manual)
|
| 218 |
+
transcript_data = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
|
| 219 |
+
except:
|
| 220 |
+
# Try any available language
|
| 221 |
+
try:
|
| 222 |
+
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 223 |
+
transcript = next(iter(transcript_list))
|
| 224 |
+
transcript_data = transcript.fetch()
|
| 225 |
+
except:
|
| 226 |
+
return f"Error: No transcript available for video {video_id}"
|
| 227 |
+
|
| 228 |
+
# Format transcript
|
| 229 |
+
full_transcript = "\n".join([f"[{item['start']:.1f}s] {item['text']}" for item in transcript_data])
|
| 230 |
+
|
| 231 |
+
return f"YouTube Transcript for video {video_id}:\n\n{full_transcript}"
|
| 232 |
+
except Exception as e:
|
| 233 |
+
return f"Error getting YouTube transcript: {str(e)}"
|
| 234 |
+
|
| 235 |
+
async def _arun(self, video_input: str) -> str:
|
| 236 |
+
"""Async version."""
|
| 237 |
+
return self._run(video_input)
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
class CalculatorTool(BaseTool):
|
| 241 |
+
"""Tool for performing mathematical calculations safely."""
|
| 242 |
+
|
| 243 |
+
name: str = "calculator"
|
| 244 |
+
description: str = "Useful for mathematical calculations. Input should be a mathematical expression as a string (e.g., '2 + 2', '(5 * 3) / 2')."
|
| 245 |
+
|
| 246 |
+
def _run(self, expression: str) -> str:
|
| 247 |
+
"""Evaluate a mathematical expression safely."""
|
| 248 |
+
try:
|
| 249 |
+
# Remove any potentially dangerous operations
|
| 250 |
+
if any(dangerous in expression.lower() for dangerous in ['import', 'exec', 'eval', '__']):
|
| 251 |
+
return "Error: Expression contains forbidden operations."
|
| 252 |
+
|
| 253 |
+
# Evaluate using Python's eval with restricted namespace
|
| 254 |
+
result = eval(expression, {"__builtins__": {}}, {})
|
| 255 |
+
return str(result)
|
| 256 |
+
except Exception as e:
|
| 257 |
+
return f"Error calculating: {str(e)}"
|
| 258 |
+
|
| 259 |
+
async def _arun(self, expression: str) -> str:
|
| 260 |
+
"""Async version."""
|
| 261 |
+
return self._run(expression)
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
class GeminiVideoTool(BaseTool):
|
| 265 |
+
"""Tool for understanding YouTube videos using Google Gemini."""
|
| 266 |
+
|
| 267 |
+
name: str = "understand_video"
|
| 268 |
+
description: str = """Analyzes YouTube videos using Google Gemini's native video understanding.
|
| 269 |
+
Input format: 'URL: <youtube_url> | QUESTION: <specific_question>'
|
| 270 |
+
Example: 'URL: https://www.youtube.com/watch?v=abc123 | QUESTION: How many birds are visible?'
|
| 271 |
+
Can answer questions about video content without transcripts."""
|
| 272 |
+
|
| 273 |
+
def _run(self, youtube_url: str) -> str:
|
| 274 |
+
"""Analyze YouTube video using Gemini."""
|
| 275 |
+
try:
|
| 276 |
+
# Check if GEMINI_API_KEY is available
|
| 277 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 278 |
+
if not api_key:
|
| 279 |
+
return "Error: GEMINI_API_KEY not set. Cannot analyze video."
|
| 280 |
+
|
| 281 |
+
# Parse input - check if it contains both URL and question
|
| 282 |
+
url = youtube_url
|
| 283 |
+
question = None
|
| 284 |
+
|
| 285 |
+
if '|' in youtube_url:
|
| 286 |
+
parts = youtube_url.split('|')
|
| 287 |
+
for part in parts:
|
| 288 |
+
part = part.strip()
|
| 289 |
+
if part.startswith('URL:'):
|
| 290 |
+
url = part.replace('URL:', '').strip()
|
| 291 |
+
elif part.startswith('QUESTION:'):
|
| 292 |
+
question = part.replace('QUESTION:', '').strip()
|
| 293 |
+
|
| 294 |
+
# Configure Gemini
|
| 295 |
+
genai.configure(api_key=api_key)
|
| 296 |
+
model = genai.GenerativeModel('gemini-2.0-flash-exp')
|
| 297 |
+
|
| 298 |
+
# Create targeted prompt
|
| 299 |
+
if question:
|
| 300 |
+
prompt = f"Watch this YouTube video carefully and answer the following specific question: {question}\n\nProvide a direct, concise answer based only on what you observe in the video."
|
| 301 |
+
else:
|
| 302 |
+
prompt = "Analyze this YouTube video and describe what you see in detail. Pay attention to all visual details, objects, people, actions, and events."
|
| 303 |
+
|
| 304 |
+
response = model.generate_content([prompt, url])
|
| 305 |
+
|
| 306 |
+
return f"Video Analysis:\n{response.text}"
|
| 307 |
+
except Exception as e:
|
| 308 |
+
return f"Error analyzing video: {str(e)}"
|
| 309 |
+
|
| 310 |
+
async def _arun(self, youtube_url: str) -> str:
|
| 311 |
+
"""Async version."""
|
| 312 |
+
return self._run(youtube_url)
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
class GeminiAudioTool(BaseTool):
|
| 316 |
+
"""Tool for understanding audio files (MP3) using Google Gemini."""
|
| 317 |
+
|
| 318 |
+
name: str = "understand_audio"
|
| 319 |
+
description: str = "Analyzes audio files (MP3) using Google Gemini's audio understanding. Input should be the file path to the audio file. Can transcribe and answer questions about audio content."
|
| 320 |
+
|
| 321 |
+
def _run(self, file_path: str) -> str:
|
| 322 |
+
"""Analyze audio file using Gemini."""
|
| 323 |
+
try:
|
| 324 |
+
# Check if GEMINI_API_KEY is available
|
| 325 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 326 |
+
if not api_key:
|
| 327 |
+
return "Error: GEMINI_API_KEY not set. Cannot analyze audio."
|
| 328 |
+
|
| 329 |
+
# Find file in multiple locations
|
| 330 |
+
actual_path = find_file(file_path)
|
| 331 |
+
if not os.path.exists(actual_path):
|
| 332 |
+
return f"Error: Audio file '{file_path}' not found in current directory or downloads/."
|
| 333 |
+
|
| 334 |
+
file_path = actual_path # Use the found path
|
| 335 |
+
|
| 336 |
+
# Configure Gemini
|
| 337 |
+
genai.configure(api_key=api_key)
|
| 338 |
+
|
| 339 |
+
# Upload audio file to Gemini
|
| 340 |
+
print(f"Uploading audio file to Gemini: {file_path}")
|
| 341 |
+
audio_file = genai.upload_file(path=file_path)
|
| 342 |
+
|
| 343 |
+
# Wait for file to be processed
|
| 344 |
+
import time
|
| 345 |
+
while audio_file.state.name == "PROCESSING":
|
| 346 |
+
time.sleep(2)
|
| 347 |
+
audio_file = genai.get_file(audio_file.name)
|
| 348 |
+
|
| 349 |
+
if audio_file.state.name == "FAILED":
|
| 350 |
+
return f"Error: Gemini failed to process audio file"
|
| 351 |
+
|
| 352 |
+
# Analyze audio
|
| 353 |
+
model = genai.GenerativeModel('gemini-2.0-flash-exp')
|
| 354 |
+
prompt = "Please transcribe this audio file and provide the complete content. Pay attention to all details, numbers, names, and instructions mentioned."
|
| 355 |
+
|
| 356 |
+
response = model.generate_content([audio_file, prompt])
|
| 357 |
+
|
| 358 |
+
return f"Audio Transcription:\n{response.text}"
|
| 359 |
+
except Exception as e:
|
| 360 |
+
return f"Error analyzing audio: {str(e)}"
|
| 361 |
+
|
| 362 |
+
async def _arun(self, file_path: str) -> str:
|
| 363 |
+
"""Async version."""
|
| 364 |
+
return self._run(file_path)
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
class ImageAnalysisTool(BaseTool):
|
| 368 |
+
"""Tool for analyzing images using Google Gemini."""
|
| 369 |
+
|
| 370 |
+
name: str = "analyze_image"
|
| 371 |
+
description: str = "Analyzes images using Google Gemini's vision capabilities. Input should be the file path to the image. Can describe images, read text, analyze chess positions, etc."
|
| 372 |
+
|
| 373 |
+
def _run(self, file_path: str) -> str:
|
| 374 |
+
"""Analyze image using Gemini."""
|
| 375 |
+
try:
|
| 376 |
+
# Check if GEMINI_API_KEY is available
|
| 377 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 378 |
+
if not api_key:
|
| 379 |
+
return "Error: GEMINI_API_KEY not set. Cannot analyze image."
|
| 380 |
+
|
| 381 |
+
# Find file in multiple locations
|
| 382 |
+
actual_path = find_file(file_path)
|
| 383 |
+
if not os.path.exists(actual_path):
|
| 384 |
+
return f"Error: Image file '{file_path}' not found in current directory or downloads/."
|
| 385 |
+
|
| 386 |
+
file_path = actual_path # Use the found path
|
| 387 |
+
|
| 388 |
+
# Configure Gemini
|
| 389 |
+
genai.configure(api_key=api_key)
|
| 390 |
+
|
| 391 |
+
# Upload image file to Gemini
|
| 392 |
+
print(f"Uploading image to Gemini: {file_path}")
|
| 393 |
+
image_file = genai.upload_file(path=file_path)
|
| 394 |
+
|
| 395 |
+
# Analyze image
|
| 396 |
+
model = genai.GenerativeModel('gemini-2.0-flash-exp')
|
| 397 |
+
prompt = "Please analyze this image in detail. Describe everything you see including: objects, text, positions, colors, patterns, and any relevant information. If this is a chess board, provide the position in detail. If there's text, transcribe it."
|
| 398 |
+
|
| 399 |
+
response = model.generate_content([image_file, prompt])
|
| 400 |
+
|
| 401 |
+
return f"Image Analysis:\n{response.text}"
|
| 402 |
+
except Exception as e:
|
| 403 |
+
return f"Error analyzing image: {str(e)}"
|
| 404 |
+
|
| 405 |
+
async def _arun(self, file_path: str) -> str:
|
| 406 |
+
"""Async version."""
|
| 407 |
+
return self._run(file_path)
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
class ExecutePythonFileTool(BaseTool):
|
| 411 |
+
"""Tool for executing Python files and capturing output."""
|
| 412 |
+
|
| 413 |
+
name: str = "execute_python_file"
|
| 414 |
+
description: str = "Executes a Python file and returns its output. Input should be the file path to the .py file. Captures stdout and returns the final output."
|
| 415 |
+
|
| 416 |
+
def _run(self, file_path: str) -> str:
|
| 417 |
+
"""Execute Python file and return output."""
|
| 418 |
+
try:
|
| 419 |
+
import subprocess
|
| 420 |
+
import sys
|
| 421 |
+
|
| 422 |
+
# Find file in multiple locations
|
| 423 |
+
actual_path = find_file(file_path)
|
| 424 |
+
if not os.path.exists(actual_path):
|
| 425 |
+
return f"Error: Python file '{file_path}' not found in current directory or downloads/."
|
| 426 |
+
|
| 427 |
+
# Execute the Python file
|
| 428 |
+
result = subprocess.run(
|
| 429 |
+
[sys.executable, actual_path],
|
| 430 |
+
capture_output=True,
|
| 431 |
+
text=True,
|
| 432 |
+
timeout=30 # 30 second timeout
|
| 433 |
+
)
|
| 434 |
+
|
| 435 |
+
output = ""
|
| 436 |
+
if result.stdout:
|
| 437 |
+
output += f"Output:\n{result.stdout}\n"
|
| 438 |
+
if result.stderr:
|
| 439 |
+
output += f"Errors:\n{result.stderr}\n"
|
| 440 |
+
if result.returncode != 0:
|
| 441 |
+
output += f"Exit code: {result.returncode}\n"
|
| 442 |
+
|
| 443 |
+
return output if output else "Script executed successfully with no output."
|
| 444 |
+
except subprocess.TimeoutExpired:
|
| 445 |
+
return "Error: Script execution timed out (30 seconds limit)."
|
| 446 |
+
except Exception as e:
|
| 447 |
+
return f"Error executing Python file: {str(e)}"
|
| 448 |
+
|
| 449 |
+
async def _arun(self, file_path: str) -> str:
|
| 450 |
+
"""Async version."""
|
| 451 |
+
return self._run(file_path)
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
# Create Wikipedia tool wrapper
|
| 455 |
+
def create_wikipedia_tool():
|
| 456 |
+
"""Create a Wikipedia search tool."""
|
| 457 |
+
api_wrapper = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=4000)
|
| 458 |
+
return WikipediaQueryRun(api_wrapper=api_wrapper)
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
tool_classes = [
|
| 462 |
+
DuckDuckGoSearchRun,
|
| 463 |
+
TavilySearch,
|
| 464 |
+
PythonAstREPLTool,
|
| 465 |
+
ListFilesTool,
|
| 466 |
+
ReadFileTool,
|
| 467 |
+
ExcelReaderTool,
|
| 468 |
+
DownloadFileTool,
|
| 469 |
+
ExecutePythonFileTool, # Execute Python files
|
| 470 |
+
YouTubeTranscriptTool,
|
| 471 |
+
GeminiVideoTool, # Gemini video understanding
|
| 472 |
+
GeminiAudioTool, # Gemini audio transcription (MP3)
|
| 473 |
+
ImageAnalysisTool, # Gemini image analysis (chess, diagrams, etc.)
|
| 474 |
+
CalculatorTool,
|
| 475 |
+
create_wikipedia_tool # This returns an instance, not a class
|
| 476 |
+
]
|
cache/agent_graph.png
ADDED
|
requirements.txt
CHANGED
|
@@ -1,2 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
gradio
|
| 2 |
-
requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain
|
| 2 |
+
langgraph
|
| 3 |
+
langchain-huggingface
|
| 4 |
+
langchain-community
|
| 5 |
+
langchain-core
|
| 6 |
+
huggingface_hub
|
| 7 |
+
python-dotenv
|
| 8 |
gradio
|
| 9 |
+
requests
|
| 10 |
+
lxml
|
| 11 |
+
html5lib
|
| 12 |
+
beautifulsoup4
|
| 13 |
+
tavily-python
|
| 14 |
+
duckduckgo-search
|
| 15 |
+
langchain-openai
|
| 16 |
+
langchain-experimental
|
| 17 |
+
pandas
|
| 18 |
+
openpyxl
|
| 19 |
+
langchain-tavily
|
| 20 |
+
wikipedia
|
| 21 |
+
youtube-transcript-api
|
| 22 |
+
google-generativeai
|
rows.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|