Final_Assignment_Template

Sleeping

File size: 8,282 Bytes

import io
from contextlib import redirect_stdout

from smolagents import (
    CodeAgent, 
    LiteLLMModel, 
    InferenceClientModel,
    DuckDuckGoSearchTool, 
    VisitWebpageTool, 
    WikipediaSearchTool
)

from config import (
    USE_LOCAL_MODEL,
    OLLAMA_MODEL_ID, OLLAMA_API_BASE, OLLAMA_API_KEY,
    HF_MODEL_ID, HF_TOKEN,
    MAX_STEPS, VERBOSITY_LEVEL, AUTHORIZED_IMPORTS,
    QUESTION_TYPES
)
from tools import smart_visit, get_youtube_info
from utils import clean_answer, clean_ansi_codes


class EnhancedAgent:
    """Enhanced agent with question-type specific strategies."""
    
    def __init__(self):
        print(f"   🤖 Initializing agent...")
        
        if USE_LOCAL_MODEL:
            # Usar Ollama local
            self.model = LiteLLMModel(
                model_id=OLLAMA_MODEL_ID, 
                api_base=OLLAMA_API_BASE,
                api_key=OLLAMA_API_KEY
            )
            print(f"   📦 Model: {OLLAMA_MODEL_ID} (local)")
        else:
            # Use HuggingFace API
            self.model = InferenceClientModel(
                model_id=HF_MODEL_ID,
                token=HF_TOKEN
            )
            print(f"   ☁️  Model: {HF_MODEL_ID} (HuggingFace)")

        search_tool = DuckDuckGoSearchTool()
        visit_tool = VisitWebpageTool()
        wiki_tool = WikipediaSearchTool()

        self.agent = CodeAgent(
            tools=[search_tool, visit_tool, wiki_tool, smart_visit, get_youtube_info], 
            model=self.model,
            max_steps=MAX_STEPS,
            verbosity_level=VERBOSITY_LEVEL,
            additional_authorized_imports=AUTHORIZED_IMPORTS
        )
    
    def build_prompt(self, question, local_file, question_type):
        """Construye prompt optimizado según el tipo de pregunta."""
        
        base_context = f"""TASK: You are solving a GAIA benchmark question. Be precise and methodical.

QUESTION: {question}
"""
        
        strategies = {
            QUESTION_TYPES['YOUTUBE_VIDEO']: """
STRATEGY - YouTube Video:
1. Extract the video ID from the URL in the question
2. Use get_youtube_info tool to get context
3. Search DuckDuckGo for: "[video_id] transcript" or "[video_id] [keywords_from_question]"
4. Look for Reddit threads, forums, or blogs discussing this video
5. Find the specific information requested

IMPORTANT: You CANNOT watch the video. Search for transcripts or discussions online.
""",
            
            QUESTION_TYPES['IMAGE_FILE']: f"""
STRATEGY - Image File:
1. File '{local_file}' is in current directory
2. You CANNOT read image files directly with Python
3. Search online for: "{local_file}" OR search for keywords from the question
4. Look for discussions, analysis, or descriptions of this image online
5. For chess positions: search "[piece positions] chess position solution"

IMPORTANT: Do NOT attempt cv2, PIL, or any image processing. Search online instead.
""",
            
            QUESTION_TYPES['AUDIO_FILE']: f"""
STRATEGY - Audio File:
1. File '{local_file}' is in current directory
2. You CANNOT play or transcribe audio with Python
3. Search online for: "{local_file}" OR the exact question text
4. Look for transcripts, Reddit threads, or forums discussing this audio

IMPORTANT: Do NOT attempt librosa, soundfile, or audio processing. Search online.
""",
            
            QUESTION_TYPES['DATA_FILE']: f"""
STRATEGY - Data File (Excel/CSV):
1. File '{local_file}' is in current directory
2. Use pandas to read: pd.read_excel('{local_file}') or pd.read_csv('{local_file}')
3. Explore columns with df.columns and df.head()
4. Filter and sum/count as needed
5. Double-check calculations

CODE TEMPLATE:
```python
import pandas as pd
df = pd.read_excel('{local_file}')  # or read_csv
print(df.columns)
print(df.head())
# ... your analysis
```
""",
            
            QUESTION_TYPES['CODE_FILE']: f"""
STRATEGY - Code File:
1. File '{local_file}' is in current directory
2. Read it with open('{local_file}', 'r').read()
3. Analyze the code logic carefully
4. If needed, execute it: exec(open('{local_file}').read())
5. Return the requested output

IMPORTANT: Read and understand before executing.
""",
            
            QUESTION_TYPES['WIKIPEDIA']: """
STRATEGY - Wikipedia Search:
1. Identify the exact topic/entity from the question
2. Use web_search to find the correct Wikipedia article URL
3. Use smart_visit to read the Wikipedia page content
4. Extract the specific information requested (dates, numbers, names, etc.)
5. For counting tasks: CREATE A PYTHON LIST with each item, then count with len()

TIPS:
- Search: "[topic] Wikipedia 2022" for latest version
- For discographies: look for "Discography" section or table
- For featured articles: search "Wikipedia Featured Article [topic] [date]"
- ALWAYS create a list and count programmatically, don't count manually
""",
            
            QUESTION_TYPES['COUNTING']: """
STRATEGY - Counting Task:
1. Research and LIST all items first (don't just count)
2. Use smart_visit to get complete data from Wikipedia or official sources
3. Store items in a Python list: items = []
4. Count with len(items) and verify manually
5. Double-check you haven't missed anything

IMPORTANT: First collect ALL items, THEN count. Show your work.
""",
            
            QUESTION_TYPES['TEXT_MANIPULATION']: """
STRATEGY - Text Manipulation:
1. Read the question VERY carefully
2. If text is backwards, reverse it: text[::-1]
3. If asking for opposite: use logic (left ↔ right, up ↔ down, etc.)
4. Return ONLY the answer, no explanation

EXAMPLE: ".rewsna eht sa 'tfel' drow..." 
→ Reverse to read: "...word 'left' as the answer."
→ Opposite of "left" is "right"
""",
            
            QUESTION_TYPES['GENERAL']: """
STRATEGY - General Research:
1. Break down the question into sub-tasks
2. Use web_search for initial research
3. Use smart_visit to read relevant pages in detail
4. Cross-reference multiple sources if needed
5. Extract the precise answer requested

TIPS:
- Be specific in searches: include years, full names, exact terms
- Read carefully - answers are often in tables, lists, or footnotes
"""
        }
        
        strategy = strategies.get(question_type, strategies[QUESTION_TYPES['GENERAL']])
        
        output_format = """
FINAL OUTPUT FORMAT:
Return ONLY the answer value. No markdown, no "The answer is", no explanations.

Examples of GOOD answers:
- "3"
- "right"
- "Ian Rose"
- "14.50"
- "d5, e2"

Examples of BAD answers:
- "The answer is 3"
- "**3**"
- "Based on my research, the answer is 3."
"""
        
        return base_context + strategy + output_format
    
    def solve(self, question, local_file=None, question_type=None):
        """
        Solve a question using an optimized strategy.
        
        Args:
            question: The question text
            local_file: Path to attached file (optional)
            question_type: Detected question type
            
        Returns:
            tuple: (answer, execution logs)
        """
        if question_type is None:
            question_type = QUESTION_TYPES['GENERAL']
            
        prompt = self.build_prompt(question, local_file, question_type)
        
        log_capture = io.StringIO()
        final_answer = "Error"
        
        try:
            with redirect_stdout(log_capture):
                answer = self.agent.run(prompt)
                final_answer = clean_answer(answer)
                
                # Si está vacío después de limpiar, buscar en logs
                if not final_answer or final_answer == "Error":
                    logs = log_capture.getvalue()
                    for line in reversed(logs.split('\n')):
                        if line.strip() and not any(x in line for x in ['===', '---', 'Step', 'Tool']):
                            potential_answer = line.strip()
                            if len(potential_answer) < 200:
                                final_answer = potential_answer
                                break
                
        except Exception as e:
            log_capture.write(f"\n❌ CRITICAL ERROR: {e}\n")
            final_answer = "Error"
        
        return final_answer, clean_ansi_codes(log_capture.getvalue())