Final_Assignment_Template

Sleeping

Diego-Fco commited on Dec 28, 2025

Commit

602a16c

1 Parent(s): 66aa5ca

feat: Implement GAIA Agent v2 with enhanced question handling and strategies

- Added configuration and constants for GAIA Agent v2 in config.py
- Developed custom tools for web content retrieval and YouTube video information in tools.py
- Created utility functions for file handling and question type detection in utils.py
- Built the main agent logic with specific strategies for different question types in agent.py
- Simplified main execution script for processing questions and submitting results in main_simple.py
- Updated requirements for necessary dependencies in requirements-v2.txt

Files changed (12) hide show

agent.py +240 -0
app.py +174 -120
config.py +53 -0
requirements.txt +15 -1
tools.py +89 -0
utils.py +180 -0
v2/agent.py +251 -0
v2/config.py +47 -0
v2/main_simple.py +227 -0
v2/requirements-v2.txt +20 -0
v2/tools.py +125 -0
v2/utils.py +212 -0

agent.py ADDED Viewed

	@@ -0,0 +1,240 @@

+import io
+from contextlib import redirect_stdout
+from smolagents import (
+    CodeAgent,
+    LiteLLMModel,
+    InferenceClientModel,
+    DuckDuckGoSearchTool,
+    VisitWebpageTool,
+    WikipediaSearchTool
+)
+from config import (
+    USE_LOCAL_MODEL,
+    OLLAMA_MODEL_ID, OLLAMA_API_BASE, OLLAMA_API_KEY,
+    HF_MODEL_ID, HF_TOKEN,
+    MAX_STEPS, VERBOSITY_LEVEL, AUTHORIZED_IMPORTS,
+    QUESTION_TYPES
+)
+from tools import smart_visit, get_youtube_info
+from utils import clean_answer, clean_ansi_codes
+class EnhancedAgent:
+    """Agente mejorado con estrategias específicas por tipo de pregunta."""
+    def __init__(self):
+        print(f"   🤖 Inicializando agente...")
+        if USE_LOCAL_MODEL:
+            # Usar Ollama local
+            self.model = LiteLLMModel(
+                model_id=OLLAMA_MODEL_ID,
+                api_base=OLLAMA_API_BASE,
+                api_key=OLLAMA_API_KEY
+            )
+            print(f"   📦 Modelo: {OLLAMA_MODEL_ID} (local)")
+        else:
+            # Usar HuggingFace API
+            self.model = InferenceClientModel(
+                model_id=HF_MODEL_ID,
+                token=HF_TOKEN
+            )
+            print(f"   ☁️  Modelo: {HF_MODEL_ID} (HuggingFace)")
+        search_tool = DuckDuckGoSearchTool()
+        visit_tool = VisitWebpageTool()
+        wiki_tool = WikipediaSearchTool()
+        self.agent = CodeAgent(
+            tools=[search_tool, visit_tool, wiki_tool, smart_visit, get_youtube_info],
+            model=self.model,
+            max_steps=MAX_STEPS,
+            verbosity_level=VERBOSITY_LEVEL,
+            additional_authorized_imports=AUTHORIZED_IMPORTS
+        )
+    def build_prompt(self, question, local_file, question_type):
+        """Construye prompt optimizado según el tipo de pregunta."""
+        base_context = f"""TASK: You are solving a GAIA benchmark question. Be precise and methodical.
+QUESTION: {question}
+"""
+        strategies = {
+            QUESTION_TYPES['YOUTUBE_VIDEO']: """
+STRATEGY - YouTube Video:
+1. Extract the video ID from the URL in the question
+2. Use get_youtube_info tool to get context
+3. Search DuckDuckGo for: "[video_id] transcript" or "[video_id] [keywords_from_question]"
+4. Look for Reddit threads, forums, or blogs discussing this video
+5. Find the specific information requested
+IMPORTANT: You CANNOT watch the video. Search for transcripts or discussions online.
+""",
+            QUESTION_TYPES['IMAGE_FILE']: f"""
+STRATEGY - Image File:
+1. File '{local_file}' is in current directory
+2. You CANNOT read image files directly with Python
+3. Search online for: "{local_file}" OR search for keywords from the question
+4. Look for discussions, analysis, or descriptions of this image online
+5. For chess positions: search "[piece positions] chess position solution"
+IMPORTANT: Do NOT attempt cv2, PIL, or any image processing. Search online instead.
+""",
+            QUESTION_TYPES['AUDIO_FILE']: f"""
+STRATEGY - Audio File:
+1. File '{local_file}' is in current directory
+2. You CANNOT play or transcribe audio with Python
+3. Search online for: "{local_file}" OR the exact question text
+4. Look for transcripts, Reddit threads, or forums discussing this audio
+IMPORTANT: Do NOT attempt librosa, soundfile, or audio processing. Search online.
+""",
+            QUESTION_TYPES['DATA_FILE']: f"""
+STRATEGY - Data File (Excel/CSV):
+1. File '{local_file}' is in current directory
+2. Use pandas to read: pd.read_excel('{local_file}') or pd.read_csv('{local_file}')
+3. Explore columns with df.columns and df.head()
+4. Filter and sum/count as needed
+5. Double-check calculations
+CODE TEMPLATE:
+```python
+import pandas as pd
+df = pd.read_excel('{local_file}')  # or read_csv
+print(df.columns)
+print(df.head())
+# ... your analysis
+```
+""",
+            QUESTION_TYPES['CODE_FILE']: f"""
+STRATEGY - Code File:
+1. File '{local_file}' is in current directory
+2. Read it with open('{local_file}', 'r').read()
+3. Analyze the code logic carefully
+4. If needed, execute it: exec(open('{local_file}').read())
+5. Return the requested output
+IMPORTANT: Read and understand before executing.
+""",
+            QUESTION_TYPES['WIKIPEDIA']: """
+STRATEGY - Wikipedia Search:
+1. Identify the exact topic/entity from the question
+2. Use web_search to find the correct Wikipedia article URL
+3. Use smart_visit to read the Wikipedia page content
+4. Extract the specific information requested (dates, numbers, names, etc.)
+5. For counting tasks: CREATE A PYTHON LIST with each item, then count with len()
+TIPS:
+- Search: "[topic] Wikipedia 2022" for latest version
+- For discographies: look for "Discography" section or table
+- For featured articles: search "Wikipedia Featured Article [topic] [date]"
+- ALWAYS create a list and count programmatically, don't count manually
+""",
+            QUESTION_TYPES['COUNTING']: """
+STRATEGY - Counting Task:
+1. Research and LIST all items first (don't just count)
+2. Use smart_visit to get complete data from Wikipedia or official sources
+3. Store items in a Python list: items = []
+4. Count with len(items) and verify manually
+5. Double-check you haven't missed anything
+IMPORTANT: First collect ALL items, THEN count. Show your work.
+""",
+            QUESTION_TYPES['TEXT_MANIPULATION']: """
+STRATEGY - Text Manipulation:
+1. Read the question VERY carefully
+2. If text is backwards, reverse it: text[::-1]
+3. If asking for opposite: use logic (left ↔ right, up ↔ down, etc.)
+4. Return ONLY the answer, no explanation
+EXAMPLE: ".rewsna eht sa 'tfel' drow..."
+→ Reverse to read: "...word 'left' as the answer."
+→ Opposite of "left" is "right"
+""",
+            QUESTION_TYPES['GENERAL']: """
+STRATEGY - General Research:
+1. Break down the question into sub-tasks
+2. Use web_search for initial research
+3. Use smart_visit to read relevant pages in detail
+4. Cross-reference multiple sources if needed
+5. Extract the precise answer requested
+TIPS:
+- Be specific in searches: include years, full names, exact terms
+- Read carefully - answers are often in tables, lists, or footnotes
+"""
+        }
+        strategy = strategies.get(question_type, strategies[QUESTION_TYPES['GENERAL']])
+        output_format = """
+FINAL OUTPUT FORMAT:
+Return ONLY the answer value. No markdown, no "The answer is", no explanations.
+Examples of GOOD answers:
+- "3"
+- "right"
+- "Ian Rose"
+- "14.50"
+- "d5, e2"
+Examples of BAD answers:
+- "The answer is 3"
+- "**3**"
+- "Based on my research, the answer is 3."
+"""
+        return base_context + strategy + output_format
+    def solve(self, question, local_file=None, question_type=None):
+        """
+        Resuelve una pregunta con estrategia optimizada.
+        Args:
+            question: Texto de la pregunta
+            local_file: Ruta al archivo adjunto (opcional)
+            question_type: Tipo de pregunta detectado
+        Returns:
+            tuple: (respuesta, logs de ejecución)
+        """
+        if question_type is None:
+            question_type = QUESTION_TYPES['GENERAL']
+        prompt = self.build_prompt(question, local_file, question_type)
+        log_capture = io.StringIO()
+        final_answer = "Error"
+        try:
+            with redirect_stdout(log_capture):
+                answer = self.agent.run(prompt)
+                final_answer = clean_answer(answer)
+                # Si está vacío después de limpiar, buscar en logs
+                if not final_answer or final_answer == "Error":
+                    logs = log_capture.getvalue()
+                    for line in reversed(logs.split('\n')):
+                        if line.strip() and not any(x in line for x in ['===', '---', 'Step', 'Tool']):
+                            potential_answer = line.strip()
+                            if len(potential_answer) < 200:
+                                final_answer = potential_answer
+                                break
+        except Exception as e:
+            log_capture.write(f"\n❌ CRITICAL ERROR: {e}\n")
+            final_answer = "Error"
+        return final_answer, clean_ansi_codes(log_capture.getvalue())

app.py CHANGED Viewed

@@ -1,96 +1,29 @@
 import os
 import gradio as gr
 import requests
 import pandas as pd
-import shutil
-from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, VisitWebpageTool
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-def download_file(task_id, filename):
-    """
-    Downloads the file associated with a task_id if it exists.
-    """
-    file_url = f"{DEFAULT_API_URL}/files/{task_id}"
-    try:
-        response = requests.get(file_url, stream=True)
-        if response.status_code == 200:
-            # Try to get filename from headers, otherwise use task_id
-            if not filename:
-                if "content-disposition" in response.headers:
-                    filename = response.headers["content-disposition"].split("filename=")[1].strip('"')
-                else:
-                    filename = f"{task_id}_file"
-            with open(filename, 'wb') as f:
-                shutil.copyfileobj(response.raw, f)
-            print(f"Downloaded file: {filename}")
-            return filename
-    except Exception as e:
-        print(f"Failed to download file for task {task_id}: {e}")
-    return None
-class BasicAgent:
-    def __init__(self):
-        model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
-        search_tool = DuckDuckGoSearchTool()
-        visit_tool = VisitWebpageTool()
-        self.model = InferenceClientModel(
-            model_id=model_id,
-            token=os.getenv("HF_TOKEN")
-        )
-        self.agent = CodeAgent(
-            tools=[search_tool, visit_tool],
-            model=self.model,
-            max_steps=12,
-            verbosity_level=1,
-            additional_authorized_imports=[
-                'csv', 'pandas', 'bs4', 'requests', 're', 'collections',
-                'itertools', 'io', 'json', 'math', 'statistics', 'queue',
-                'xml', 'datetime', 'time'
-            ]
-        )
-    def __call__(self, question: str, file_path: str = None) -> str:
-        # Prompt dinámico que avisa si hay un archivo local disponible
-        file_instruction = ""
-        if file_path:
-            file_instruction = f"A file named '{file_path}' has been downloaded to your current directory. Use python to read it if relevant."
-        prompt = f"""
-        TASK: Answer the following question accurately.
-        QUESTION: {question}
-        {file_instruction}
-        CRITICAL RULES:
-        1. If a file is mentioned and downloaded, use pandas or standard python libraries to read it directly.
-        2. Use 'visit_webpage' if you find promising URLs.
-        3. FINAL OUTPUT: Return ONLY the raw answer string.
-           - NO markdown (no ```), NO explanations.
-           - Just the value (e.g., "14", "Paris", "519").
-        """
-        try:
-            answer = self.agent.run(prompt)
-            clean = str(answer).strip()
-            # Limpieza agresiva de basura en la respuesta
-            if "Final Answer:" in clean:
-                clean = clean.split("Final Answer:")[-1].strip()
-            if "```" in clean:
-                clean = clean.replace("```", "")
-            return clean
-        except Exception as e:
-            print(f"Error executing agent: {e}")
-            return "Error"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
     else:
@@ -99,75 +32,196 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    agent_code = f"[https://huggingface.co/spaces/](https://huggingface.co/spaces/){space_id}/tree/main"
     try:
         response = requests.get(questions_url, timeout=15)
         questions_data = response.json()
     except Exception as e:
         return f"Error fetching questions: {e}", None
     results_log = []
     answers_payload = []
-    print(f"Starting run on {len(questions_data)} questions...")
-    for item in questions_data:
         task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id: continue
-        print(f"Processing Task: {task_id}")
-        # 1. Intentar descargar archivo si la pregunta lo sugiere
-        local_filename = None
-        # Comprobamos si la pregunta menciona archivos comunes
-        if "attached" in question_text.lower() or "file" in question_text.lower() or ".xlsx" in question_text.lower() or ".mp3" in question_text.lower():
-             # Intentamos adivinar la extensión o nombre
-             filename_hint = "data.xlsx" if "Excel" in question_text else "data.txt"
-             if ".mp3" in question_text: filename_hint = "audio.mp3"
-             if ".csv" in question_text: filename_hint = "data.csv"
-             # Descargamos el archivo real de la API
-             local_filename = download_file(task_id, filename_hint)
         try:
-            # 2. Reiniciar agente (Memoria Limpia)
-            current_agent = BasicAgent()
-            # 3. Ejecutar con contexto del archivo
-            submitted_answer = current_agent(question_text, file_path=local_filename)
-            # 4. Limpieza de seguridad para envío
             if len(submitted_answer) > 200:
                 submitted_answer = submitted_answer[:200]
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": "Error"})
-    # Submit
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         try:
             result = response.json()
-            return f"Submission Successful! Score: {result.get('score', 'N/A')}%", pd.DataFrame(results_log)
-        except:
             return f"Submission Failed (Server Error): {response.text}", pd.DataFrame(results_log)
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(results_log)
 with gr.Blocks() as demo:
-    gr.Markdown("# GAIA Agent - Optimized for Files & Logic")
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Status", interactive=False)
-    results_table = gr.DataFrame(label="Results", wrap=True)
-    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
     demo.launch()

+"""
+GAIA Agent v2 - Interfaz Principal Gradio
+Agente mejorado con estrategias específicas por tipo de pregunta
+"""
 import os
+import re
+import json
+import time
 import gradio as gr
 import requests
 import pandas as pd
+# Importar módulos locales
+from config import DEFAULT_API_URL
+from agent import EnhancedAgent
+from utils import detect_question_type, download_file_for_task
+# ============================================================================
+# FUNCIONES PRINCIPALES DE GRADIO
+# ============================================================================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """Ejecuta el agente en todas las preguntas y envía los resultados."""
     space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
     else:
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    # Cargar preguntas
     try:
+        print("📥 Cargando preguntas del servidor...")
         response = requests.get(questions_url, timeout=15)
         questions_data = response.json()
+        print(f"   ✓ {len(questions_data)} preguntas cargadas")
     except Exception as e:
         return f"Error fetching questions: {e}", None
+    # Crear agente (reutilizable)
+    print("\n🤖 Creando agente...")
+    agent = EnhancedAgent()
     results_log = []
     answers_payload = []
+    diagnostics = []
+    print(f"\n{'='*80}")
+    print(f"🚀 Iniciando procesamiento de {len(questions_data)} preguntas")
+    print(f"{'='*80}\n")
+    for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
+        question_text = item.get("question", "")
+        file_name = item.get("file_name", "")
+        if not task_id:
+            continue
+        print(f"\n{'='*80}")
+        print(f"[{i+1}/{len(questions_data)}] Task: {task_id}")
+        print(f"{'='*80}")
+        print(f"❓ Pregunta: {question_text[:150]}...")
+        # Detectar tipo de pregunta
+        question_type = detect_question_type(question_text, file_name)
+        print(f"🔍 Tipo detectado: {question_type}")
+        if file_name:
+            print(f"📎 Archivo esperado: {file_name}")
+        # Descargar archivo si existe
+        local_file = download_file_for_task(task_id)
+        # Mostrar URLs encontradas en la pregunta
+        url_pattern = r"https?://[\w\-\./?&=%#]+"
+        found_urls = re.findall(url_pattern, question_text)
+        for url in found_urls:
+            print(f"   🔗 URL encontrada: {url}")
+        # Ejecutar agente
+        start_time = time.time()
+        print(f"⚙️  Procesando con estrategia '{question_type}'...")
         try:
+            submitted_answer, execution_logs = agent.solve(
+                question_text,
+                local_file,
+                question_type
+            )
+            # Limpieza de seguridad
             if len(submitted_answer) > 200:
                 submitted_answer = submitted_answer[:200]
         except Exception as e:
+            print(f"❌ Error: {e}")
+            submitted_answer = "Error"
+            execution_logs = str(e)
+        elapsed = time.time() - start_time
+        print(f"\n✅ Respuesta: {submitted_answer}")
+        print(f"⏱️  Tiempo: {elapsed:.1f}s")
+        # Guardar resultados
+        answers_payload.append({
+            "task_id": task_id,
+            "submitted_answer": submitted_answer
+        })
+        results_log.append({
+            "Task ID": task_id,
+            "Índice": i,
+            "Tipo": question_type,
+            "Pregunta": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+            "Archivo": file_name or "N/A",
+            "Respuesta": submitted_answer,
+            "Tiempo (s)": round(elapsed, 1)
+        })
+        diagnostics.append({
+            "index": i,
+            "task_id": task_id,
+            "question_type": question_type,
+            "question": question_text,
+            "file_name": file_name,
+            "answer": submitted_answer,
+            "elapsed_seconds": round(elapsed, 1)
+        })
+        # Limpiar archivo temporal
+        if local_file and os.path.exists(local_file):
+            try:
+                os.remove(local_file)
+            except:
+                pass
+    # Guardar diagnóstico
+    try:
+        ts = time.strftime("%Y%m%d_%H%M%S")
+        diag_path = f"diagnostics_{ts}.json"
+        with open(diag_path, "w", encoding="utf-8") as f:
+            json.dump(diagnostics, f, ensure_ascii=False, indent=2)
+        print(f"\n📊 Diagnóstico guardado: {diag_path}")
+    except Exception as e:
+        print(f"⚠️ Error guardando diagnóstico: {e}")
+    # Enviar resultados
+    print(f"\n{'='*80}")
+    print("📤 Enviando respuestas al servidor...")
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload
+    }
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         try:
             result = response.json()
+            score = result.get('score', 'N/A')
+            correct = result.get('correct_count', '?')
+            total = result.get('total_count', len(questions_data))
+            status_msg = f"""✅ Submission Successful!
+📊 Score: {score}%
+✓ Correct: {correct}/{total}
+👤 Username: {username}
+"""
+            print(status_msg)
+            return status_msg, pd.DataFrame(results_log)
+        except Exception:
             return f"Submission Failed (Server Error): {response.text}", pd.DataFrame(results_log)
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(results_log)
+# ============================================================================
+# INTERFAZ GRADIO
+# ============================================================================
 with gr.Blocks() as demo:
+    gr.Markdown("""
+    # 🤖 GAIA Agent v2 - Optimizado para Archivos, YouTube y Lógica
+    Este agente usa estrategias específicas por tipo de pregunta:
+    - 📊 **Archivos Excel/CSV**: Lee y analiza datos con pandas
+    - 🎬 **YouTube**: Busca transcripciones y discusiones online
+    - 🖼️ **Imágenes**: Busca información en la web
+    - 🎵 **Audio**: Busca transcripciones online
+    - 📝 **Wikipedia**: Navega y extrae información
+    - 🔢 **Conteo**: Lista items y cuenta programáticamente
+    - 🔄 **Manipulación de texto**: Maneja texto invertido, opuestos, etc.
+    """)
     gr.LoginButton()
+    with gr.Row():
+        run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
+    status_output = gr.Textbox(label="📋 Status", lines=6, interactive=False)
+    results_table = gr.DataFrame(
+        label="📊 Resultados Detallados",
+        wrap=True,
+        headers=["Task ID", "Índice", "Tipo", "Pregunta", "Archivo", "Respuesta", "Tiempo (s)"]
+    )
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
 if __name__ == "__main__":
+    print("🚀 Iniciando GAIA Agent v2...")
     demo.launch()

config.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""
+Configuración y constantes del GAIA Agent v2
+"""
+import os
+# ============================================================================
+# API CONFIGURATION
+# ============================================================================
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# ============================================================================
+# MODEL CONFIGURATION
+# ============================================================================
+# Cambiar según el entorno
+USE_LOCAL_MODEL = False  # True = Ollama local, False = HuggingFace API
+# Configuración para Ollama (local)
+OLLAMA_MODEL_ID = "ollama/qwen2.5-coder:14b"
+OLLAMA_API_BASE = "http://localhost:11434"
+OLLAMA_API_KEY = "ollama"
+# Configuración para HuggingFace (cloud)
+# Modelo más potente para mejor rendimiento en GAIA benchmark
+HF_MODEL_ID = "Qwen/Qwen2.5-72B-Instruct"
+HF_TOKEN = os.getenv("HF_TOKEN")
+# ============================================================================
+# AGENT CONFIGURATION
+# ============================================================================
+MAX_STEPS = 12
+VERBOSITY_LEVEL = 2
+AUTHORIZED_IMPORTS = [
+    'csv', 'pandas', 'bs4', 'requests', 're', 'collections',
+    'itertools', 'io', 'json', 'math', 'statistics', 'queue',
+    'xml', 'datetime', 'time', 'openpyxl', 'numpy', 'markdownify',
+    'urllib'
+]
+# ============================================================================
+# QUESTION TYPES
+# ============================================================================
+QUESTION_TYPES = {
+    'YOUTUBE_VIDEO': 'youtube_video',
+    'IMAGE_FILE': 'image_file',
+    'AUDIO_FILE': 'audio_file',
+    'DATA_FILE': 'data_file',
+    'CODE_FILE': 'code_file',
+    'WIKIPEDIA': 'wikipedia_search',
+    'COUNTING': 'counting_task',
+    'TEXT_MANIPULATION': 'text_manipulation',
+    'GENERAL': 'general_research'
+}

requirements.txt CHANGED Viewed

@@ -1,5 +1,19 @@
 gradio
 requests
 pandas
 smolagents[toolkit]
-duckduckgo-search

+# Core dependencies
 gradio
 requests
 pandas
 smolagents[toolkit]
+litellm>=1.0.0
+# Data processing
+openpyxl>=3.1.0
+numpy
+# Web scraping and parsing
+beautifulsoup4>=4.12.0
+lxml>=4.9.0
+markdownify>=0.11.0
+# Search tools
+duckduckgo-search>=3.9.0
+wikipedia-api

tools.py ADDED Viewed

	@@ -0,0 +1,89 @@

+"""
+Herramientas personalizadas para el GAIA Agent v2
+"""
+import requests
+from smolagents import tool
+from markdownify import markdownify as md
+@tool
+def smart_visit(url: str) -> str:
+    """
+    Visits a webpage and returns its content converted to Markdown.
+    Essential for Wikipedia, documentation, or any web content.
+    Args:
+        url: The URL of the page to visit.
+    Returns:
+        str: Webpage content in Markdown format (max 25000 chars)
+    """
+    try:
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Referer': 'https://www.google.com/'
+        }
+        response = requests.get(url, headers=headers, timeout=25)
+        response.raise_for_status()
+        content = md(response.text)
+        return content[:25000]
+    except Exception as e:
+        return f"Error visiting {url}: {str(e)}"
+@tool
+def get_youtube_info(video_url: str) -> str:
+    """
+    Gets information about a YouTube video including title, description,
+    and attempts to find transcripts or related information.
+    Args:
+        video_url: YouTube video URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)
+    Returns:
+        str: Video information and transcript search strategy
+    """
+    try:
+        if "youtube.com" in video_url:
+            video_id = video_url.split("v=")[1].split("&")[0] if "v=" in video_url else ""
+        elif "youtu.be" in video_url:
+            video_id = video_url.split("/")[-1].split("?")[0]
+        else:
+            return "Invalid YouTube URL"
+        if not video_id:
+            return "Could not extract video ID"
+        return f"""Video ID: {video_id}
+STRATEGY TO ANSWER:
+1. Search for '{video_id}' + keywords from the question on DuckDuckGo
+2. Look for transcripts, comments, or discussion forums about this video
+3. The video URL is: {video_url}
+Note: Direct video playback is not available. Search online for transcripts or summaries."""
+    except Exception as e:
+        return f"Error processing YouTube video: {str(e)}"
+@tool
+def wikipedia_search(query: str) -> str:
+    """
+    Searches Wikipedia for a query and returns the page content in Markdown format.
+    Args:
+        query: The search term or topic to look up on Wikipedia
+    Returns:
+        str: The Wikipedia page content in Markdown format, or an error message
+    """
+    try:
+        import urllib.parse
+        search_url = f"https://en.wikipedia.org/w/index.php?search={urllib.parse.quote_plus(query)}&title=Special%3ASearch&go=Go"
+        return smart_visit.forward(search_url)
+    except Exception as e:
+        return f"Error searching Wikipedia: {e}"

utils.py ADDED Viewed

	@@ -0,0 +1,180 @@

+"""
+Funciones de utilidad para el GAIA Agent v2
+"""
+import os
+import re
+import shutil
+import urllib.parse
+import requests
+from bs4 import BeautifulSoup
+from config import DEFAULT_API_URL, QUESTION_TYPES
+def clean_ansi_codes(text):
+    """Limpia los códigos ANSI de color de la terminal."""
+    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
+    return ansi_escape.sub('', text)
+def clean_answer(answer):
+    """Limpia la respuesta del agente eliminando formato innecesario."""
+    answer = str(answer).strip()
+    patterns_to_remove = [
+        (r'^Final Answer:\s*', ''),
+        (r'^Answer:\s*', ''),
+        (r'^The answer is\s*', ''),
+        (r'^Based on[^,]*,\s*', ''),
+        (r'```', ''),
+        (r'\*\*', ''),
+        (r'^##\s*', '')
+    ]
+    for pattern, replacement in patterns_to_remove:
+        answer = re.sub(pattern, replacement, answer, flags=re.IGNORECASE)
+    return answer.strip()
+def detect_question_type(question, file_name):
+    """
+    Detecta el tipo de pregunta para aplicar estrategia específica.
+    Args:
+        question: Texto de la pregunta
+        file_name: Nombre del archivo adjunto (si existe)
+    Returns:
+        str: Tipo de pregunta (ver QUESTION_TYPES en config.py)
+    """
+    q_lower = question.lower()
+    if "youtube.com" in question or "youtu.be" in question:
+        return QUESTION_TYPES['YOUTUBE_VIDEO']
+    elif file_name and file_name.endswith(".png"):
+        return QUESTION_TYPES['IMAGE_FILE']
+    elif file_name and file_name.endswith(".mp3"):
+        return QUESTION_TYPES['AUDIO_FILE']
+    elif file_name and (file_name.endswith(".xlsx") or file_name.endswith(".csv")):
+        return QUESTION_TYPES['DATA_FILE']
+    elif file_name and file_name.endswith(".py"):
+        return QUESTION_TYPES['CODE_FILE']
+    elif "wikipedia" in q_lower:
+        return QUESTION_TYPES['WIKIPEDIA']
+    elif any(word in q_lower for word in ["how many", "count", "number of"]):
+        return QUESTION_TYPES['COUNTING']
+    elif "reverse" in q_lower or "backwards" in q_lower or ".rewsna" in question:
+        return QUESTION_TYPES['TEXT_MANIPULATION']
+    else:
+        return QUESTION_TYPES['GENERAL']
+def download_file_for_task(task_id):
+    """
+    Descarga el archivo adjunto de una tarea si existe.
+    Args:
+        task_id: ID de la tarea
+    Returns:
+        str: Ruta del archivo descargado o None si no hay archivo
+    """
+    file_url = f"{DEFAULT_API_URL}/files/{task_id}"
+    try:
+        response = requests.get(file_url, stream=True, timeout=30)
+        if response.status_code == 200:
+            filename = f"file_{task_id}"
+            # Obtener nombre real del header
+            if "content-disposition" in response.headers:
+                cd = response.headers["content-disposition"]
+                if "filename=" in cd:
+                    filename = cd.split("filename=")[1].strip('"')
+            # Asegurar extensión correcta
+            if "." not in filename:
+                content_type = response.headers.get("content-type", "")
+                if "excel" in content_type or "spreadsheet" in content_type:
+                    filename += ".xlsx"
+                elif "audio" in content_type or "mpeg" in content_type:
+                    filename += ".mp3"
+                elif "image" in content_type or "png" in content_type:
+                    filename += ".png"
+                elif "python" in content_type:
+                    filename += ".py"
+            with open(filename, 'wb') as f:
+                shutil.copyfileobj(response.raw, f)
+            print(f"      ✓ Archivo descargado: {filename} ({os.path.getsize(filename)} bytes)")
+            return filename
+    except Exception as e:
+        print(f"      ✗ Error descargando archivo: {e}")
+    return None
+def fetch_and_download_links(url, dest_dir, max_files=20):
+    """
+    Descarga recursos vinculados desde una URL.
+    Args:
+        url: URL de la página a escanear
+        dest_dir: Directorio destino para los archivos
+        max_files: Máximo número de archivos a descargar
+    Returns:
+        list: Lista de rutas de archivos descargados
+    """
+    downloaded = []
+    try:
+        os.makedirs(dest_dir, exist_ok=True)
+        resp = requests.get(url, timeout=20)
+        resp.raise_for_status()
+        soup = BeautifulSoup(resp.text, "lxml")
+        candidates = []
+        for tag in soup.find_all(['a', 'link']):
+            href = tag.get('href')
+            if href:
+                candidates.append(href)
+        for tag in soup.find_all(['img', 'script', 'source']):
+            src = tag.get('src')
+            if src:
+                candidates.append(src)
+        seen = set()
+        allowed_exts = {'.png', '.jpg', '.jpeg', '.gif', '.svg', '.pdf', '.zip',
+                       '.mp3', '.mp4', '.py', '.txt', '.csv', '.xlsx', '.xls'}
+        for c in candidates:
+            if len(downloaded) >= max_files:
+                break
+            full = urllib.parse.urljoin(url, c)
+            if full in seen:
+                continue
+            seen.add(full)
+            path = urllib.parse.urlparse(full).path
+            ext = os.path.splitext(path)[1].lower()
+            if ext in allowed_exts:
+                try:
+                    r = requests.get(full, stream=True, timeout=20)
+                    r.raise_for_status()
+                    cd = r.headers.get('content-disposition')
+                    if cd and 'filename=' in cd:
+                        fname = cd.split('filename=')[1].strip('"')
+                    else:
+                        fname = os.path.basename(path) or f"resource_{len(downloaded)}{ext}"
+                    out_path = os.path.join(dest_dir, fname)
+                    with open(out_path, 'wb') as of:
+                        shutil.copyfileobj(r.raw, of)
+                    downloaded.append(out_path)
+                except Exception:
+                    continue
+    except Exception:
+        pass
+    return downloaded

v2/agent.py ADDED Viewed

	@@ -0,0 +1,251 @@

+"""
+Agente mejorado con estrategias específicas por tipo de pregunta
+"""
+import io
+from contextlib import redirect_stdout
+from smolagents import CodeAgent, LiteLLMModel, DuckDuckGoSearchTool, VisitWebpageTool, WikipediaSearchTool
+from config import (
+    MODEL_ID, MODEL_API_BASE, MODEL_API_KEY,
+    MAX_STEPS, VERBOSITY_LEVEL, AUTHORIZED_IMPORTS,
+    QUESTION_TYPES
+)
+from tools import smart_visit, get_youtube_info
+from utils import clean_answer, clean_ansi_codes
+class EnhancedLocalAgent:
+    """Agente mejorado con estrategias específicas por tipo de pregunta."""
+    def __init__(self):
+        print(f"   🤖 Inicializando agente con {MODEL_ID.split('/')[-1]}...")
+        self.model = LiteLLMModel(
+            model_id=MODEL_ID,
+            api_base=MODEL_API_BASE,
+            api_key=MODEL_API_KEY
+        )
+        search_tool = DuckDuckGoSearchTool()
+        visit_tool = VisitWebpageTool()
+        wiki_tool = WikipediaSearchTool()
+        self.agent = CodeAgent(
+            tools=[search_tool, visit_tool, wiki_tool, smart_visit, get_youtube_info],
+            model=self.model,
+            max_steps=MAX_STEPS,
+            verbosity_level=VERBOSITY_LEVEL,
+            additional_authorized_imports=AUTHORIZED_IMPORTS
+        )
+    def build_prompt(self, question, local_file, question_type):
+        """Construye prompt optimizado según el tipo de pregunta."""
+        base_context = f"""TASK: You are solving a GAIA benchmark question. Be precise and methodical.
+QUESTION: {question}
+"""
+        # Estrategias específicas por tipo
+        strategies = {
+            QUESTION_TYPES['YOUTUBE_VIDEO']: """
+STRATEGY - YouTube Video:
+1. Extract the video ID from the URL in the question
+2. Use get_youtube_info tool to get context
+3. Search DuckDuckGo for: "[video_id] transcript" or "[video_id] [keywords_from_question]"
+4. Look for Reddit threads, forums, or blogs discussing this video
+5. Find the specific information requested
+IMPORTANT: You CANNOT watch the video. Search for transcripts or discussions online.
+""",
+            QUESTION_TYPES['IMAGE_FILE']: f"""
+STRATEGY - Image File:
+1. File '{local_file}' is in current directory
+2. You CANNOT read image files directly with Python
+3. Search online for: "{local_file}" OR search for keywords from the question
+4. Look for discussions, analysis, or descriptions of this image online
+5. For chess positions: search "[piece positions] chess position solution"
+IMPORTANT: Do NOT attempt cv2, PIL, or any image processing. Search online instead.
+""",
+            QUESTION_TYPES['AUDIO_FILE']: f"""
+STRATEGY - Audio File:
+1. File '{local_file}' is in current directory
+2. You CANNOT play or transcribe audio with Python
+3. Search online for: "{local_file}" OR the exact question text
+4. Look for transcripts, Reddit threads, or forums discussing this audio
+IMPORTANT: Do NOT attempt librosa, soundfile, or audio processing. Search online.
+""",
+            QUESTION_TYPES['DATA_FILE']: f"""
+STRATEGY - Data File (Excel/CSV):
+1. File '{local_file}' is in current directory
+2. Use pandas to read: pd.read_excel('{local_file}') or pd.read_csv('{local_file}')
+3. Explore columns with df.columns and df.head()
+4. Filter and sum/count as needed
+5. Double-check calculations
+CODE TEMPLATE:
+```python
+import pandas as pd
+df = pd.read_excel('{local_file}')  # or read_csv
+print(df.columns)
+print(df.head())
+# ... your analysis
+```
+""",
+            QUESTION_TYPES['CODE_FILE']: f"""
+STRATEGY - Code File:
+1. File '{local_file}' is in current directory
+2. Read it with open('{local_file}', 'r').read()
+3. Analyze the code logic carefully
+4. If needed, execute it: exec(open('{local_file}').read())
+5. Return the requested output
+IMPORTANT: Read and understand before executing.
+""",
+            QUESTION_TYPES['WIKIPEDIA']: """
+STRATEGY - Wikipedia Search:
+1. Identify the exact topic/entity from the question
+2. Use web_search to find the correct Wikipedia article URL
+3. Use smart_visit to read the Wikipedia page content
+4. Extract the specific information requested (dates, numbers, names, etc.)
+5. For counting tasks: CREATE A PYTHON LIST with each item, then count with len()
+TIPS:
+- Search: "[topic] Wikipedia 2022" for latest version
+- For discographies: look for "Discography" section or table
+- For featured articles: search "Wikipedia Featured Article [topic] [date]"
+- ALWAYS create a list and count programmatically, don't count manually
+EXAMPLE for counting:
+```python
+albums_2000_2009 = [
+    "Album 1 (2000)",
+    "Album 2 (2001)",
+    # ... list ALL albums
+]
+count = len(albums_2000_2009)
+print(count)
+```
+""",
+            QUESTION_TYPES['COUNTING']: """
+STRATEGY - Counting Task:
+1. Research and LIST all items first (don't just count)
+2. Use smart_visit to get complete data from Wikipedia or official sources
+3. Store items in a Python list: items = []
+4. Count with len(items) and verify manually
+5. Double-check you haven't missed anything
+IMPORTANT: First collect ALL items, THEN count. Show your work.
+""",
+            QUESTION_TYPES['TEXT_MANIPULATION']: """
+STRATEGY - Text Manipulation:
+1. Read the question VERY carefully
+2. If text is backwards, reverse it: text[::-1]
+3. If asking for opposite: use logic (left ↔ right, up ↔ down, etc.)
+4. Return ONLY the answer, no explanation
+EXAMPLE: ".rewsna eht sa 'tfel' drow..."
+→ Reverse to read: "...word 'left' as the answer."
+→ Opposite of "left" is "right"
+""",
+            QUESTION_TYPES['GENERAL']: """
+STRATEGY - General Research:
+1. Break down the question into sub-tasks
+2. Use web_search for initial research
+3. Use smart_visit to read relevant pages in detail
+4. Cross-reference multiple sources if needed
+5. Extract the precise answer requested
+TIPS:
+- Be specific in searches: include years, full names, exact terms
+- Read carefully - answers are often in tables, lists, or footnotes
+"""
+        }
+        strategy = strategies.get(question_type, strategies[QUESTION_TYPES['GENERAL']])
+        output_format = """
+FINAL OUTPUT FORMAT:
+Return ONLY the answer value. No markdown, no "The answer is", no explanations.
+Examples of GOOD answers:
+- "3"
+- "right"
+- "Ian Rose"
+- "14.50"
+- "d5, e2"
+Examples of BAD answers:
+- "The answer is 3"
+- "**3**"
+- "Based on my research, the answer is 3."
+"""
+        return base_context + strategy + output_format
+    def solve(self, question, local_file=None, question_type=QUESTION_TYPES['GENERAL']):
+        """
+        Resuelve una pregunta con estrategia optimizada.
+        Args:
+            question: Texto de la pregunta
+            local_file: Ruta al archivo adjunto (opcional)
+            question_type: Tipo de pregunta detectado
+        Returns:
+            tuple: (respuesta, logs de ejecución)
+        """
+        prompt = self.build_prompt(question, local_file, question_type)
+        log_capture = io.StringIO()
+        final_answer = "Error"
+        try:
+            with redirect_stdout(log_capture):
+                answer = self.agent.run(prompt)
+                final_answer = clean_answer(answer)
+                # Si está vacío después de limpiar, buscar en logs
+                if not final_answer or final_answer == "Error":
+                    logs = log_capture.getvalue()
+                    for line in reversed(logs.split('\n')):
+                        if line.strip() and not any(x in line for x in ['===', '---', 'Step', 'Tool']):
+                            potential_answer = line.strip()
+                            if len(potential_answer) < 200:
+                                final_answer = potential_answer
+                                break
+        except Exception as e:
+            log_capture.write(f"\n❌ CRITICAL ERROR: {e}\n")
+            final_answer = "Error"
+        return final_answer, clean_ansi_codes(log_capture.getvalue())
+def call_agent(question: str, file_path: str = None):
+    """
+    Función de compatibilidad para llamar al agente de forma simple.
+    Args:
+        question: Pregunta a resolver
+        file_path: Ruta al archivo adjunto (opcional)
+    Returns:
+        str: Respuesta del agente
+    """
+    from utils import detect_question_type
+    agent = EnhancedLocalAgent()
+    question_type = detect_question_type(question, file_path or "")
+    answer, _ = agent.solve(question, file_path, question_type)
+    return answer

v2/config.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""
+Configuración y constantes del proyecto GAIA Agent
+"""
+# --- API CONFIGURATION ---
+AGENT_CODE_URL = "https://huggingface.co/spaces/Diego-Fco/Final_Assignment_Template/tree/main"
+USERNAME = "Diego-Fco"
+API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- QUESTION FILTERING ---
+# Lista de índices (0-based) de preguntas específicas a testear
+# Ejemplos: [3] = Ajedrez, [4] = Dinosaurio Wikipedia
+INDICES_A_TESTEAR = []
+# Si INDICES_A_TESTEAR está vacío, se usará este límite
+# None = procesar todas las preguntas disponibles
+LIMITE_PREGUNTAS = 6
+# --- MODEL CONFIGURATION ---
+MODEL_ID = "ollama/qwen2.5-coder:14b"
+MODEL_API_BASE = "http://localhost:11434"
+MODEL_API_KEY = "ollama"
+# --- AGENT CONFIGURATION ---
+MAX_STEPS = 12
+VERBOSITY_LEVEL = 2         # Nivel de logs (1=básico, 2=detallado)
+# Imports adicionales permitidos para el agente
+AUTHORIZED_IMPORTS = [
+    'csv', 'pandas', 'bs4', 'requests', 're', 'collections',
+    'itertools', 'io', 'json', 'math', 'statistics', 'queue',
+    'xml', 'datetime', 'time', 'openpyxl', 'numpy', 'markdownify',
+    'urllib'
+]
+# --- QUESTION TYPES ---
+QUESTION_TYPES = {
+    'YOUTUBE_VIDEO': 'youtube_video',
+    'IMAGE_FILE': 'image_file',
+    'AUDIO_FILE': 'audio_file',
+    'DATA_FILE': 'data_file',
+    'CODE_FILE': 'code_file',
+    'WIKIPEDIA': 'wikipedia_search',
+    'COUNTING': 'counting_task',
+    'TEXT_MANIPULATION': 'text_manipulation',
+    'GENERAL': 'general_research'
+}

v2/main_simple.py ADDED Viewed

	@@ -0,0 +1,227 @@

+"""
+GAIA Agent v2 - Script Principal Simplificado
+Resuelve preguntas del benchmark GAIA usando estrategias optimizadas
+"""
+import os
+import re
+import shutil
+import requests
+import json
+import time
+# Importar módulos locales
+from config import (
+    AGENT_CODE_URL, USERNAME, API_URL,
+    INDICES_A_TESTEAR, LIMITE_PREGUNTAS
+)
+from agent import EnhancedLocalAgent
+from utils import download_file_for_task, detect_question_type, fetch_and_download_links
+def load_questions():
+    """Carga las preguntas desde el servidor y las guarda localmente."""
+    print("📥 Cargando preguntas...")
+    try:
+        all_questions = requests.get(f"{API_URL}/questions").json()
+        # Guardar copia local
+        if not os.path.exists("tasks"):
+            os.makedirs("tasks")
+        with open(os.path.join("tasks", "all_questions.json"), "w", encoding="utf-8") as qf:
+            json.dump(all_questions, qf, ensure_ascii=False, indent=2)
+        print(f"   ✓ {len(all_questions)} preguntas cargadas\n")
+        return all_questions
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        return None
+def select_questions(all_questions):
+    """Selecciona qué preguntas procesar según configuración."""
+    questions_to_process = []
+    if INDICES_A_TESTEAR and len(INDICES_A_TESTEAR) > 0:
+        print(f"🎯 MODO QUIRÚRGICO: Procesando índices {INDICES_A_TESTEAR}\n")
+        for idx in INDICES_A_TESTEAR:
+            if 0 <= idx < len(all_questions):
+                q = all_questions[idx]
+                q['_original_index'] = idx
+                questions_to_process.append(q)
+    else:
+        limit = LIMITE_PREGUNTAS if LIMITE_PREGUNTAS else len(all_questions)
+        print(f"🔥 MODO SECUENCIAL: Procesando las primeras {limit} preguntas\n")
+        for i, q in enumerate(all_questions[:limit]):
+            q['_original_index'] = i
+            questions_to_process.append(q)
+    return questions_to_process
+def process_questions(agent, questions_to_process):
+    """Procesa todas las preguntas con el agente."""
+    results = []
+    diagnostics = []
+    for i, item in enumerate(questions_to_process):
+        task_id = item["task_id"]
+        question = item["question"]
+        file_name = item.get("file_name", "")
+        idx_original = item.get('_original_index', '?')
+        print(f"\n{'='*80}")
+        print(f"[{i+1}/{len(questions_to_process)}] Índice: {idx_original} | Task: {task_id}")
+        print(f"{'='*80}")
+        print(f"❓ Pregunta: {question[:100]}...")
+        if file_name:
+            print(f"📎 Archivo: {file_name}")
+        # Detectar tipo y descargar archivo
+        question_type = detect_question_type(question, file_name)
+        print(f"🔍 Tipo detectado: {question_type}")
+        # Descargar archivo principal si existe en la API
+        local_file = download_file_for_task(task_id)
+        # Crear carpeta para esta pregunta
+        task_dir = os.path.join("tasks", f"question_{idx_original}_{task_id}")
+        os.makedirs(task_dir, exist_ok=True)
+        # Mover archivo descargado a la carpeta de la pregunta
+        if local_file and os.path.exists(local_file):
+            new_file_path = os.path.join(task_dir, os.path.basename(local_file))
+            shutil.move(local_file, new_file_path)
+            local_file = new_file_path
+            print(f"      ✓ Archivo movido a: {local_file}")
+        # Descargar recursos vinculados desde URLs en la pregunta
+        resource_dir = os.path.join(task_dir, "resources")
+        # Extraer urls simples del texto de la pregunta
+        url_pattern = r"https?://[\w\-\./?&=%#]+"
+        found_urls = re.findall(url_pattern, question)
+        for u in found_urls:
+            print(f"   🔗 Encontrada URL en pregunta: {u} — descargando recursos...")
+            downloaded = fetch_and_download_links(u, resource_dir)
+            if downloaded:
+                print(f"      ✓ {len(downloaded)} recursos descargados en {resource_dir}")
+        # Resolver
+        print(f"⚙️  Procesando con estrategia '{question_type}'...")
+        start_time = time.time()
+        answer, execution_logs = agent.solve(question, local_file, question_type)
+        elapsed = time.time() - start_time
+        print(f"\n✅ Respuesta: {answer}")
+        print(f"⏱️  Tiempo: {elapsed:.1f}s")
+        # Guardar logs
+        task_dir = save_logs(task_id, idx_original, question, question_type,
+                  answer, local_file, elapsed, execution_logs)
+        results.append({"task_id": task_id, "submitted_answer": answer})
+        diagnostics.append({
+            "idx_original": idx_original,
+            "task_id": task_id,
+            "question_type": question_type,
+            "question": question[:200],
+            "answer": answer,
+            "elapsed_seconds": round(elapsed, 1),
+            "folder": task_dir
+        })
+    return results, diagnostics
+def save_logs(task_id, idx_original, question, question_type,
+              answer, local_file, elapsed, execution_logs):
+    """Guarda los logs de ejecución de una pregunta en su propia carpeta."""
+    # Crear carpeta específica para esta pregunta
+    task_dir = f"tasks/question_{idx_original}_{task_id}"
+    os.makedirs(task_dir, exist_ok=True)
+    # Guardar archivo de respuesta
+    task_filename = os.path.join(task_dir, "answer.md")
+    with open(task_filename, "w", encoding="utf-8") as f:
+        f.write(f"# Pregunta {idx_original}\n\n")
+        f.write(f"**Task ID:** {task_id}\n\n")
+        f.write(f"**Tipo:** {question_type}\n\n")
+        f.write(f"**Pregunta:** {question}\n\n")
+        f.write(f"**Archivo adjunto:** {local_file or 'N/A'}\n\n")
+        f.write(f"**Tiempo de ejecución:** {elapsed:.1f}s\n\n")
+        f.write(f"## ✅ Respuesta Final\n\n```\n{answer}\n```\n\n")
+        f.write("## 📋 Logs de Ejecución\n\n```text\n")
+        f.write(execution_logs)
+        f.write("\n```\n")
+    return task_dir
+def submit_results(results):
+    """Envía los resultados al servidor."""
+    print(f"\n{'='*80}")
+    print("📤 Enviando respuestas al servidor...")
+    payload = {
+        "username": USERNAME,
+        "agent_code": AGENT_CODE_URL,
+        "answers": results
+    }
+    try:
+        response = requests.post(f"{API_URL}/submit", json=payload, timeout=60)
+        result = response.json()
+        print(f"✅ Respuesta del servidor:")
+        print(f"   {json.dumps(result, indent=2)}")
+        return result
+    except Exception as e:
+        print(f"❌ Error al enviar: {e}")
+        return None
+def save_diagnostics(diagnostics):
+    """Guarda el archivo de diagnóstico."""
+    ts = time.strftime("%Y%m%d_%H%M%S")
+    diag_path = os.path.join("tasks", f"diagnostics_v2_{ts}.json")
+    with open(diag_path, "w", encoding="utf-8") as df:
+        json.dump(diagnostics, df, ensure_ascii=False, indent=2)
+    print(f"\n✅ Diagnóstico guardado: {diag_path}")
+def main():
+    """Función principal del script."""
+    print("🚀 Iniciando Agente Local MEJORADO v2...")
+    print("   Modelo: qwen2.5-coder:14b")
+    print("   Objetivo: Resolver 6+ preguntas correctamente\n")
+    # 1. Cargar preguntas
+    all_questions = load_questions()
+    if not all_questions:
+        return
+    # 2. Seleccionar preguntas a procesar
+    questions_to_process = select_questions(all_questions)
+    if not questions_to_process:
+        print("⚠️ No hay preguntas para procesar.")
+        return
+    # 3. Crear agente (una sola instancia reutilizable)
+    print("🤖 Creando agente reutilizable...\n")
+    agent = EnhancedLocalAgent()
+    # 4. Procesar todas las preguntas
+    results, diagnostics = process_questions(agent, questions_to_process)
+    # 5. Enviar resultados
+    submit_results(results)
+    # 6. Guardar diagnóstico
+    save_diagnostics(diagnostics)
+    print(f"\n{'='*80}")
+    print("🎯 Ejecución completada")
+    print(f"{'='*80}\n")
+if __name__ == "__main__":
+    main()

v2/requirements-v2.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+# Core dependencies
+requests>=2.31.0
+smolagents>=1.0.0
+# Model support
+litellm>=1.0.0
+# Data processing
+pandas>=2.0.0
+openpyxl>=3.1.0  # For Excel files
+# Web scraping and parsing
+beautifulsoup4>=4.12.0
+lxml>=4.9.0
+markdownify>=0.11.0
+# Optional: For additional features
+duckduckgo-search>=3.9.0  # If using DuckDuckGo search
+wikipedia-api

v2/tools.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""
+Herramientas personalizadas para el GAIA Agent
+"""
+import requests
+from smolagents import tool
+from markdownify import markdownify as md
+@tool
+def smart_visit(url: str) -> str:
+    """
+    Visits a webpage and returns its content converted to Markdown.
+    Essential for Wikipedia, documentation, or any web content.
+    Args:
+        url: The URL of the page to visit.
+    Returns:
+        str: Webpage content in Markdown format (max 25000 chars)
+    """
+    try:
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Referer': 'https://www.google.com/'
+        }
+        response = requests.get(url, headers=headers, timeout=25)
+        response.raise_for_status()
+        content = md(response.text)
+        return content[:25000]
+    except Exception as e:
+        return f"Error visiting {url}: {str(e)}"
+@tool
+def get_youtube_info(video_url: str) -> str:
+    """
+    Gets information about a YouTube video including title, description,
+    and attempts to find transcripts or related information.
+    Args:
+        video_url: YouTube video URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)
+    Returns:
+        str: Video information and transcript search strategy
+    """
+    try:
+        # Extraer video ID
+        if "youtube.com" in video_url:
+            video_id = video_url.split("v=")[1].split("&")[0] if "v=" in video_url else ""
+        elif "youtu.be" in video_url:
+            video_id = video_url.split("/")[-1].split("?")[0]
+        else:
+            return "Invalid YouTube URL"
+        if not video_id:
+            return "Could not extract video ID"
+        return f"""Video ID: {video_id}
+STRATEGY TO ANSWER:
+1. Search for '{video_id}' + keywords from the question on DuckDuckGo
+2. Look for transcripts, comments, or discussion forums about this video
+3. The video URL is: {video_url}
+Note: Direct video playback is not available. Search online for transcripts or summaries."""
+    except Exception as e:
+        return f"Error processing YouTube video: {str(e)}"
+@tool
+def visit_webpage(url: str) -> str:
+    """
+    Visits a webpage and returns its content in Markdown format.
+    Args:
+        url: The URL of the webpage to visit
+    Returns:
+        str: The webpage content converted to Markdown
+    """
+    return smart_visit(url)
+@tool
+def wikipedia_search(query: str) -> str:
+    """
+    Searches Wikipedia for a query and returns the page content in Markdown format.
+    Args:
+        query: The search term or topic to look up on Wikipedia
+    Returns:
+        str: The Wikipedia page content in Markdown format, or an error message
+    """
+    try:
+        import urllib.parse
+        search_url = f"https://en.wikipedia.org/w/index.php?search={urllib.parse.quote_plus(query)}&title=Special%3ASearch&go=Go"
+        return smart_visit(search_url)
+    except Exception as e:
+        return f"Error searching Wikipedia: {e}"
+@tool
+def answer_video_questions(video_url: str, question: str) -> str:
+    """
+    Provides guidance on how to answer questions about a video by extracting metadata
+    and suggesting search queries to find transcripts or discussions.
+    Args:
+        video_url: The URL of the video (YouTube or similar platform)
+        question: The specific question to answer about the video
+    Returns:
+        str: Video metadata and suggested search queries to find answers
+    """
+    try:
+        info = get_youtube_info(video_url)
+        # Provide a compact actionable payload for the agent
+        return f"VIDEO_INFO:\n{info}\n\nSUGGESTED_QUERIES:\n- \"{video_url} transcript\"\n- \"{video_url} subtitles\"\n- \"{video_url} comments discussion\"\n\nUse VisitWebpageTool/WikipediaSearchTool to follow links."
+    except Exception as e:
+        return f"Error answering video question: {e}"

v2/utils.py ADDED Viewed

	@@ -0,0 +1,212 @@

+"""
+Funciones de utilidad para el GAIA Agent
+"""
+import os
+import re
+import requests
+import shutil
+import urllib.parse
+import mimetypes
+from bs4 import BeautifulSoup
+from config import API_URL, QUESTION_TYPES
+def clean_ansi_codes(text):
+    """Limpia los códigos ANSI de color de la terminal."""
+    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
+    return ansi_escape.sub('', text)
+def download_file_for_task(task_id):
+    """
+    Descarga el archivo adjunto de una tarea si existe.
+    Args:
+        task_id: ID de la tarea
+    Returns:
+        str: Ruta del archivo descargado o None si no hay archivo
+    """
+    file_url = f"{API_URL}/files/{task_id}"
+    try:
+        response = requests.get(file_url, stream=True, timeout=30)
+        if response.status_code == 200:
+            filename = f"file_{task_id}"
+            # Obtener nombre real del header
+            if "content-disposition" in response.headers:
+                cd = response.headers["content-disposition"]
+                if "filename=" in cd:
+                    filename = cd.split("filename=")[1].strip('"')
+            # Asegurar extensión correcta
+            if "." not in filename:
+                content_type = response.headers.get("content-type", "")
+                if "excel" in content_type or "spreadsheet" in content_type:
+                    filename += ".xlsx"
+                elif "audio" in content_type or "mpeg" in content_type:
+                    filename += ".mp3"
+                elif "image" in content_type or "png" in content_type:
+                    filename += ".png"
+                elif "python" in content_type:
+                    filename += ".py"
+            with open(filename, 'wb') as f:
+                shutil.copyfileobj(response.raw, f)
+            print(f"      ✓ Archivo descargado: {filename} ({os.path.getsize(filename)} bytes)")
+            return filename
+    except Exception as e:
+        print(f"      ✗ Error descargando archivo: {e}")
+    return None
+def fetch_and_download_links(url, dest_dir, max_files=20):
+    """
+    Fetch a webpage, extract links to common resource file types and download them.
+    Args:
+        url (str): Webpage URL to scan for resources.
+        dest_dir (str): Directory where downloaded resources will be saved.
+        max_files (int): Maximum number of files to download.
+    Returns:
+        list: Paths of downloaded files.
+    """
+    downloaded = []
+    try:
+        os.makedirs(dest_dir, exist_ok=True)
+        resp = requests.get(url, timeout=20)
+        resp.raise_for_status()
+        soup = BeautifulSoup(resp.text, "lxml")
+        # find candidate links from href and src
+        candidates = []
+        for tag in soup.find_all(['a', 'link']):
+            href = tag.get('href')
+            if href:
+                candidates.append(href)
+        for tag in soup.find_all(['img', 'script', 'source']):
+            src = tag.get('src')
+            if src:
+                candidates.append(src)
+        # normalize and filter
+        seen = set()
+        allowed_exts = {'.png', '.jpg', '.jpeg', '.gif', '.svg', '.pdf', '.zip', '.mp3', '.mp4', '.py', '.txt', '.csv', '.xlsx', '.xls'}
+        for c in candidates:
+            if len(downloaded) >= max_files:
+                break
+            full = urllib.parse.urljoin(url, c)
+            if full in seen:
+                continue
+            seen.add(full)
+            path = urllib.parse.urlparse(full).path
+            ext = os.path.splitext(path)[1].lower()
+            # Accept if extension recognized or content-type later
+            if ext in allowed_exts:
+                try:
+                    r = requests.get(full, stream=True, timeout=20)
+                    r.raise_for_status()
+                    cd = r.headers.get('content-disposition')
+                    if cd and 'filename=' in cd:
+                        fname = cd.split('filename=')[1].strip('"')
+                    else:
+                        fname = os.path.basename(path) or f"resource_{len(downloaded)}{ext}"
+                    out_path = os.path.join(dest_dir, fname)
+                    with open(out_path, 'wb') as of:
+                        shutil.copyfileobj(r.raw, of)
+                    downloaded.append(out_path)
+                except Exception:
+                    continue
+            else:
+                # try a HEAD request to see if content-type indicates a file
+                try:
+                    h = requests.head(full, timeout=10)
+                    ctype = h.headers.get('content-type', '')
+                    if any(t in ctype for t in ['image/', 'audio/', 'video/', 'application/pdf', 'text/', 'application/octet-stream', 'application/zip', 'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml']):
+                        # download
+                        try:
+                            r = requests.get(full, stream=True, timeout=20)
+                            r.raise_for_status()
+                            fname = os.path.basename(urllib.parse.urlparse(full).path) or f"resource_{len(downloaded)}"
+                            if not os.path.splitext(fname)[1]:
+                                ext = mimetypes.guess_extension(ctype.split(';')[0].strip()) or ''
+                                fname += ext
+                            out_path = os.path.join(dest_dir, fname)
+                            with open(out_path, 'wb') as of:
+                                shutil.copyfileobj(r.raw, of)
+                            downloaded.append(out_path)
+                        except Exception:
+                            continue
+                except Exception:
+                    continue
+    except Exception:
+        return downloaded
+    return downloaded
+def detect_question_type(question, file_name):
+    """
+    Detecta el tipo de pregunta para aplicar estrategia específica.
+    Args:
+        question: Texto de la pregunta
+        file_name: Nombre del archivo adjunto (si existe)
+    Returns:
+        str: Tipo de pregunta (ver QUESTION_TYPES en config.py)
+    """
+    q_lower = question.lower()
+    if "youtube.com" in question or "youtu.be" in question:
+        return QUESTION_TYPES['YOUTUBE_VIDEO']
+    elif file_name and file_name.endswith(".png"):
+        return QUESTION_TYPES['IMAGE_FILE']
+    elif file_name and file_name.endswith(".mp3"):
+        return QUESTION_TYPES['AUDIO_FILE']
+    elif file_name and file_name.endswith((".xlsx", ".csv")):
+        return QUESTION_TYPES['DATA_FILE']
+    elif file_name and file_name.endswith(".py"):
+        return QUESTION_TYPES['CODE_FILE']
+    elif "wikipedia" in q_lower:
+        return QUESTION_TYPES['WIKIPEDIA']
+    elif any(word in q_lower for word in ["how many", "count", "number of"]):
+        return QUESTION_TYPES['COUNTING']
+    elif "reverse" in q_lower or "backwards" in q_lower or ".rewsna" in question:
+        return QUESTION_TYPES['TEXT_MANIPULATION']
+    else:
+        return QUESTION_TYPES['GENERAL']
+def clean_answer(answer):
+    """
+    Limpia la respuesta del agente eliminando formato innecesario.
+    Preserva mayúsculas originales.
+    Args:
+        answer: Respuesta del agente
+    Returns:
+        str: Respuesta limpia
+    """
+    answer = str(answer).strip()
+    # Limpiar patrones comunes (case-insensitive para búsqueda, pero preservar original)
+    patterns_to_remove = [
+        (r'^Final Answer:\s*', ''),
+        (r'^Answer:\s*', ''),
+        (r'^The answer is\s*', ''),
+        (r'^Based on[^,]*,\s*', ''),
+        (r'```', ''),
+        (r'\*\*', ''),
+        (r'^##\s*', '')
+    ]
+    for pattern, replacement in patterns_to_remove:
+        answer = re.sub(pattern, replacement, answer, flags=re.IGNORECASE)
+    return answer.strip()