Diego-Fco's picture
Clean project structure with English comments
b712b2b
import io
from contextlib import redirect_stdout
from smolagents import (
CodeAgent,
LiteLLMModel,
InferenceClientModel,
DuckDuckGoSearchTool,
VisitWebpageTool,
WikipediaSearchTool
)
from config import (
USE_LOCAL_MODEL,
OLLAMA_MODEL_ID, OLLAMA_API_BASE, OLLAMA_API_KEY,
HF_MODEL_ID, HF_TOKEN,
MAX_STEPS, VERBOSITY_LEVEL, AUTHORIZED_IMPORTS,
QUESTION_TYPES
)
from tools import smart_visit, get_youtube_info
from utils import clean_answer, clean_ansi_codes
class EnhancedAgent:
"""Enhanced agent with question-type specific strategies."""
def __init__(self):
print(f" 🤖 Initializing agent...")
if USE_LOCAL_MODEL:
# Usar Ollama local
self.model = LiteLLMModel(
model_id=OLLAMA_MODEL_ID,
api_base=OLLAMA_API_BASE,
api_key=OLLAMA_API_KEY
)
print(f" 📦 Model: {OLLAMA_MODEL_ID} (local)")
else:
# Use HuggingFace API
self.model = InferenceClientModel(
model_id=HF_MODEL_ID,
token=HF_TOKEN
)
print(f" ☁️ Model: {HF_MODEL_ID} (HuggingFace)")
search_tool = DuckDuckGoSearchTool()
visit_tool = VisitWebpageTool()
wiki_tool = WikipediaSearchTool()
self.agent = CodeAgent(
tools=[search_tool, visit_tool, wiki_tool, smart_visit, get_youtube_info],
model=self.model,
max_steps=MAX_STEPS,
verbosity_level=VERBOSITY_LEVEL,
additional_authorized_imports=AUTHORIZED_IMPORTS
)
def build_prompt(self, question, local_file, question_type):
"""Construye prompt optimizado según el tipo de pregunta."""
base_context = f"""TASK: You are solving a GAIA benchmark question. Be precise and methodical.
QUESTION: {question}
"""
strategies = {
QUESTION_TYPES['YOUTUBE_VIDEO']: """
STRATEGY - YouTube Video:
1. Extract the video ID from the URL in the question
2. Use get_youtube_info tool to get context
3. Search DuckDuckGo for: "[video_id] transcript" or "[video_id] [keywords_from_question]"
4. Look for Reddit threads, forums, or blogs discussing this video
5. Find the specific information requested
IMPORTANT: You CANNOT watch the video. Search for transcripts or discussions online.
""",
QUESTION_TYPES['IMAGE_FILE']: f"""
STRATEGY - Image File:
1. File '{local_file}' is in current directory
2. You CANNOT read image files directly with Python
3. Search online for: "{local_file}" OR search for keywords from the question
4. Look for discussions, analysis, or descriptions of this image online
5. For chess positions: search "[piece positions] chess position solution"
IMPORTANT: Do NOT attempt cv2, PIL, or any image processing. Search online instead.
""",
QUESTION_TYPES['AUDIO_FILE']: f"""
STRATEGY - Audio File:
1. File '{local_file}' is in current directory
2. You CANNOT play or transcribe audio with Python
3. Search online for: "{local_file}" OR the exact question text
4. Look for transcripts, Reddit threads, or forums discussing this audio
IMPORTANT: Do NOT attempt librosa, soundfile, or audio processing. Search online.
""",
QUESTION_TYPES['DATA_FILE']: f"""
STRATEGY - Data File (Excel/CSV):
1. File '{local_file}' is in current directory
2. Use pandas to read: pd.read_excel('{local_file}') or pd.read_csv('{local_file}')
3. Explore columns with df.columns and df.head()
4. Filter and sum/count as needed
5. Double-check calculations
CODE TEMPLATE:
```python
import pandas as pd
df = pd.read_excel('{local_file}') # or read_csv
print(df.columns)
print(df.head())
# ... your analysis
```
""",
QUESTION_TYPES['CODE_FILE']: f"""
STRATEGY - Code File:
1. File '{local_file}' is in current directory
2. Read it with open('{local_file}', 'r').read()
3. Analyze the code logic carefully
4. If needed, execute it: exec(open('{local_file}').read())
5. Return the requested output
IMPORTANT: Read and understand before executing.
""",
QUESTION_TYPES['WIKIPEDIA']: """
STRATEGY - Wikipedia Search:
1. Identify the exact topic/entity from the question
2. Use web_search to find the correct Wikipedia article URL
3. Use smart_visit to read the Wikipedia page content
4. Extract the specific information requested (dates, numbers, names, etc.)
5. For counting tasks: CREATE A PYTHON LIST with each item, then count with len()
TIPS:
- Search: "[topic] Wikipedia 2022" for latest version
- For discographies: look for "Discography" section or table
- For featured articles: search "Wikipedia Featured Article [topic] [date]"
- ALWAYS create a list and count programmatically, don't count manually
""",
QUESTION_TYPES['COUNTING']: """
STRATEGY - Counting Task:
1. Research and LIST all items first (don't just count)
2. Use smart_visit to get complete data from Wikipedia or official sources
3. Store items in a Python list: items = []
4. Count with len(items) and verify manually
5. Double-check you haven't missed anything
IMPORTANT: First collect ALL items, THEN count. Show your work.
""",
QUESTION_TYPES['TEXT_MANIPULATION']: """
STRATEGY - Text Manipulation:
1. Read the question VERY carefully
2. If text is backwards, reverse it: text[::-1]
3. If asking for opposite: use logic (left ↔ right, up ↔ down, etc.)
4. Return ONLY the answer, no explanation
EXAMPLE: ".rewsna eht sa 'tfel' drow..."
→ Reverse to read: "...word 'left' as the answer."
→ Opposite of "left" is "right"
""",
QUESTION_TYPES['GENERAL']: """
STRATEGY - General Research:
1. Break down the question into sub-tasks
2. Use web_search for initial research
3. Use smart_visit to read relevant pages in detail
4. Cross-reference multiple sources if needed
5. Extract the precise answer requested
TIPS:
- Be specific in searches: include years, full names, exact terms
- Read carefully - answers are often in tables, lists, or footnotes
"""
}
strategy = strategies.get(question_type, strategies[QUESTION_TYPES['GENERAL']])
output_format = """
FINAL OUTPUT FORMAT:
Return ONLY the answer value. No markdown, no "The answer is", no explanations.
Examples of GOOD answers:
- "3"
- "right"
- "Ian Rose"
- "14.50"
- "d5, e2"
Examples of BAD answers:
- "The answer is 3"
- "**3**"
- "Based on my research, the answer is 3."
"""
return base_context + strategy + output_format
def solve(self, question, local_file=None, question_type=None):
"""
Solve a question using an optimized strategy.
Args:
question: The question text
local_file: Path to attached file (optional)
question_type: Detected question type
Returns:
tuple: (answer, execution logs)
"""
if question_type is None:
question_type = QUESTION_TYPES['GENERAL']
prompt = self.build_prompt(question, local_file, question_type)
log_capture = io.StringIO()
final_answer = "Error"
try:
with redirect_stdout(log_capture):
answer = self.agent.run(prompt)
final_answer = clean_answer(answer)
# Si está vacío después de limpiar, buscar en logs
if not final_answer or final_answer == "Error":
logs = log_capture.getvalue()
for line in reversed(logs.split('\n')):
if line.strip() and not any(x in line for x in ['===', '---', 'Step', 'Tool']):
potential_answer = line.strip()
if len(potential_answer) < 200:
final_answer = potential_answer
break
except Exception as e:
log_capture.write(f"\n❌ CRITICAL ERROR: {e}\n")
final_answer = "Error"
return final_answer, clean_ansi_codes(log_capture.getvalue())