Final_Assignment_Template

Sleeping

App Files Files Community

Diego-Fco commited on Dec 28, 2025

Commit

b712b2b

1 Parent(s): 602a16c

Clean project structure with English comments

Browse files

Files changed (11) hide show

agent.py +10 -10
app.py +45 -45
config.py +6 -6
tools.py +1 -1
utils.py +19 -19
v2/agent.py +0 -251
v2/config.py +0 -47
v2/main_simple.py +0 -227
v2/requirements-v2.txt +0 -20
v2/tools.py +0 -125
v2/utils.py +0 -212

agent.py CHANGED Viewed

@@ -22,10 +22,10 @@ from utils import clean_answer, clean_ansi_codes
 class EnhancedAgent:
-    """Agente mejorado con estrategias específicas por tipo de pregunta."""
     def __init__(self):
-        print(f"   🤖 Inicializando agente...")
         if USE_LOCAL_MODEL:
             # Usar Ollama local
@@ -34,14 +34,14 @@ class EnhancedAgent:
                 api_base=OLLAMA_API_BASE,
                 api_key=OLLAMA_API_KEY
             )
-            print(f"   📦 Modelo: {OLLAMA_MODEL_ID} (local)")
         else:
-            # Usar HuggingFace API
             self.model = InferenceClientModel(
                 model_id=HF_MODEL_ID,
                 token=HF_TOKEN
             )
-            print(f"   ☁️  Modelo: {HF_MODEL_ID} (HuggingFace)")
         search_tool = DuckDuckGoSearchTool()
         visit_tool = VisitWebpageTool()
@@ -200,15 +200,15 @@ Examples of BAD answers:
     def solve(self, question, local_file=None, question_type=None):
         """
-        Resuelve una pregunta con estrategia optimizada.
         Args:
-            question: Texto de la pregunta
-            local_file: Ruta al archivo adjunto (opcional)
-            question_type: Tipo de pregunta detectado
         Returns:
-            tuple: (respuesta, logs de ejecución)
         """
         if question_type is None:
             question_type = QUESTION_TYPES['GENERAL']

 class EnhancedAgent:
+    """Enhanced agent with question-type specific strategies."""
     def __init__(self):
+        print(f"   🤖 Initializing agent...")
         if USE_LOCAL_MODEL:
             # Usar Ollama local
                 api_base=OLLAMA_API_BASE,
                 api_key=OLLAMA_API_KEY
             )
+            print(f"   📦 Model: {OLLAMA_MODEL_ID} (local)")
         else:
+            # Use HuggingFace API
             self.model = InferenceClientModel(
                 model_id=HF_MODEL_ID,
                 token=HF_TOKEN
             )
+            print(f"   ☁️  Model: {HF_MODEL_ID} (HuggingFace)")
         search_tool = DuckDuckGoSearchTool()
         visit_tool = VisitWebpageTool()
     def solve(self, question, local_file=None, question_type=None):
         """
+        Solve a question using an optimized strategy.
         Args:
+            question: The question text
+            local_file: Path to attached file (optional)
+            question_type: Detected question type
         Returns:
+            tuple: (answer, execution logs)
         """
         if question_type is None:
             question_type = QUESTION_TYPES['GENERAL']

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-GAIA Agent v2 - Interfaz Principal Gradio
-Agente mejorado con estrategias específicas por tipo de pregunta
 """
 import os
 import re
@@ -21,7 +21,7 @@ from utils import detect_question_type, download_file_for_task
 # ============================================================================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """Ejecuta el agente en todas las preguntas y envía los resultados."""
     space_id = os.getenv("SPACE_ID")
     if profile:
@@ -34,17 +34,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     submit_url = f"{api_url}/submit"
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    # Cargar preguntas
     try:
-        print("📥 Cargando preguntas del servidor...")
         response = requests.get(questions_url, timeout=15)
         questions_data = response.json()
-        print(f"   ✓ {len(questions_data)} preguntas cargadas")
     except Exception as e:
         return f"Error fetching questions: {e}", None
-    # Crear agente (reutilizable)
-    print("\n🤖 Creando agente...")
     agent = EnhancedAgent()
     results_log = []
@@ -52,7 +52,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     diagnostics = []
     print(f"\n{'='*80}")
-    print(f"🚀 Iniciando procesamiento de {len(questions_data)} preguntas")
     print(f"{'='*80}\n")
     for i, item in enumerate(questions_data):
@@ -66,27 +66,27 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"\n{'='*80}")
         print(f"[{i+1}/{len(questions_data)}] Task: {task_id}")
         print(f"{'='*80}")
-        print(f"❓ Pregunta: {question_text[:150]}...")
-        # Detectar tipo de pregunta
         question_type = detect_question_type(question_text, file_name)
-        print(f"🔍 Tipo detectado: {question_type}")
         if file_name:
-            print(f"📎 Archivo esperado: {file_name}")
-        # Descargar archivo si existe
         local_file = download_file_for_task(task_id)
-        # Mostrar URLs encontradas en la pregunta
         url_pattern = r"https?://[\w\-\./?&=%#]+"
         found_urls = re.findall(url_pattern, question_text)
         for url in found_urls:
-            print(f"   🔗 URL encontrada: {url}")
-        # Ejecutar agente
         start_time = time.time()
-        print(f"⚙️  Procesando con estrategia '{question_type}'...")
         try:
             submitted_answer, execution_logs = agent.solve(
@@ -106,10 +106,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         elapsed = time.time() - start_time
-        print(f"\n✅ Respuesta: {submitted_answer}")
-        print(f"⏱️  Tiempo: {elapsed:.1f}s")
-        # Guardar resultados
         answers_payload.append({
             "task_id": task_id,
             "submitted_answer": submitted_answer
@@ -117,12 +117,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         results_log.append({
             "Task ID": task_id,
-            "Índice": i,
-            "Tipo": question_type,
-            "Pregunta": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-            "Archivo": file_name or "N/A",
-            "Respuesta": submitted_answer,
-            "Tiempo (s)": round(elapsed, 1)
         })
         diagnostics.append({
@@ -135,26 +135,26 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             "elapsed_seconds": round(elapsed, 1)
         })
-        # Limpiar archivo temporal
         if local_file and os.path.exists(local_file):
             try:
                 os.remove(local_file)
             except:
                 pass
-    # Guardar diagnóstico
     try:
         ts = time.strftime("%Y%m%d_%H%M%S")
         diag_path = f"diagnostics_{ts}.json"
         with open(diag_path, "w", encoding="utf-8") as f:
             json.dump(diagnostics, f, ensure_ascii=False, indent=2)
-        print(f"\n📊 Diagnóstico guardado: {diag_path}")
     except Exception as e:
-        print(f"⚠️ Error guardando diagnóstico: {e}")
-    # Enviar resultados
     print(f"\n{'='*80}")
-    print("📤 Enviando respuestas al servidor...")
     submission_data = {
         "username": username.strip(),
@@ -191,16 +191,16 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 with gr.Blocks() as demo:
     gr.Markdown("""
-    # 🤖 GAIA Agent v2 - Optimizado para Archivos, YouTube y Lógica
-    Este agente usa estrategias específicas por tipo de pregunta:
-    - 📊 **Archivos Excel/CSV**: Lee y analiza datos con pandas
-    - 🎬 **YouTube**: Busca transcripciones y discusiones online
-    - 🖼️ **Imágenes**: Busca información en la web
-    - 🎵 **Audio**: Busca transcripciones online
-    - 📝 **Wikipedia**: Navega y extrae información
-    - 🔢 **Conteo**: Lista items y cuenta programáticamente
-    - 🔄 **Manipulación de texto**: Maneja texto invertido, opuestos, etc.
     """)
     gr.LoginButton()
@@ -211,9 +211,9 @@ with gr.Blocks() as demo:
     status_output = gr.Textbox(label="📋 Status", lines=6, interactive=False)
     results_table = gr.DataFrame(
-        label="📊 Resultados Detallados",
         wrap=True,
-        headers=["Task ID", "Índice", "Tipo", "Pregunta", "Archivo", "Respuesta", "Tiempo (s)"]
     )
     run_button.click(
@@ -223,5 +223,5 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
-    print("🚀 Iniciando GAIA Agent v2...")
     demo.launch()

 """
+GAIA Agent - Main Gradio Interface
+Enhanced agent with question-type specific strategies
 """
 import os
 import re
 # ============================================================================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """Run the agent on all questions and submit the results."""
     space_id = os.getenv("SPACE_ID")
     if profile:
     submit_url = f"{api_url}/submit"
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    # Load questions
     try:
+        print("📥 Loading questions from server...")
         response = requests.get(questions_url, timeout=15)
         questions_data = response.json()
+        print(f"   ✓ {len(questions_data)} questions loaded")
     except Exception as e:
         return f"Error fetching questions: {e}", None
+    # Create agent (reusable)
+    print("\n🤖 Creating agent...")
     agent = EnhancedAgent()
     results_log = []
     diagnostics = []
     print(f"\n{'='*80}")
+    print(f"🚀 Starting processing of {len(questions_data)} questions")
     print(f"{'='*80}\n")
     for i, item in enumerate(questions_data):
         print(f"\n{'='*80}")
         print(f"[{i+1}/{len(questions_data)}] Task: {task_id}")
         print(f"{'='*80}")
+        print(f"❓ Question: {question_text[:150]}...")
+        # Detect question type
         question_type = detect_question_type(question_text, file_name)
+        print(f"🔍 Detected type: {question_type}")
         if file_name:
+            print(f"📎 Expected file: {file_name}")
+        # Download file if exists
         local_file = download_file_for_task(task_id)
+        # Show URLs found in the question
         url_pattern = r"https?://[\w\-\./?&=%#]+"
         found_urls = re.findall(url_pattern, question_text)
         for url in found_urls:
+            print(f"   🔗 URL found: {url}")
+        # Execute agent
         start_time = time.time()
+        print(f"⚙️  Processing with strategy '{question_type}'...")
         try:
             submitted_answer, execution_logs = agent.solve(
         elapsed = time.time() - start_time
+        print(f"\n✅ Answer: {submitted_answer}")
+        print(f"⏱️  Time: {elapsed:.1f}s")
+        # Save results
         answers_payload.append({
             "task_id": task_id,
             "submitted_answer": submitted_answer
         results_log.append({
             "Task ID": task_id,
+            "Index": i,
+            "Type": question_type,
+            "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+            "File": file_name or "N/A",
+            "Answer": submitted_answer,
+            "Time (s)": round(elapsed, 1)
         })
         diagnostics.append({
             "elapsed_seconds": round(elapsed, 1)
         })
+        # Clean up temporary file
         if local_file and os.path.exists(local_file):
             try:
                 os.remove(local_file)
             except:
                 pass
+    # Save diagnostics
     try:
         ts = time.strftime("%Y%m%d_%H%M%S")
         diag_path = f"diagnostics_{ts}.json"
         with open(diag_path, "w", encoding="utf-8") as f:
             json.dump(diagnostics, f, ensure_ascii=False, indent=2)
+        print(f"\n📊 Diagnostics saved: {diag_path}")
     except Exception as e:
+        print(f"⚠️ Error saving diagnostics: {e}")
+    # Submit results
     print(f"\n{'='*80}")
+    print("📤 Submitting answers to server...")
     submission_data = {
         "username": username.strip(),
 with gr.Blocks() as demo:
     gr.Markdown("""
+    # 🤖 GAIA Agent - Optimized for Files, YouTube and Logic
+    This agent uses question-specific strategies:
+    - 📊 **Excel/CSV Files**: Reads and analyzes data with pandas
+    - 🎬 **YouTube**: Searches for transcripts and online discussions
+    - 🖼️ **Images**: Searches for information on the web
+    - 🎵 **Audio**: Searches for transcripts online
+    - 📝 **Wikipedia**: Navigates and extracts information
+    - 🔢 **Counting**: Lists items and counts programmatically
+    - 🔄 **Text Manipulation**: Handles reversed text, opposites, etc.
     """)
     gr.LoginButton()
     status_output = gr.Textbox(label="📋 Status", lines=6, interactive=False)
     results_table = gr.DataFrame(
+        label="📊 Detailed Results",
         wrap=True,
+        headers=["Task ID", "Index", "Type", "Question", "File", "Answer", "Time (s)"]
     )
     run_button.click(
 if __name__ == "__main__":
+    print("🚀 Starting GAIA Agent...")
     demo.launch()

config.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Configuración y constantes del GAIA Agent v2
 """
 import os
@@ -11,16 +11,16 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # ============================================================================
 # MODEL CONFIGURATION
 # ============================================================================
-# Cambiar según el entorno
-USE_LOCAL_MODEL = False  # True = Ollama local, False = HuggingFace API
-# Configuración para Ollama (local)
 OLLAMA_MODEL_ID = "ollama/qwen2.5-coder:14b"
 OLLAMA_API_BASE = "http://localhost:11434"
 OLLAMA_API_KEY = "ollama"
-# Configuración para HuggingFace (cloud)
-# Modelo más potente para mejor rendimiento en GAIA benchmark
 HF_MODEL_ID = "Qwen/Qwen2.5-72B-Instruct"
 HF_TOKEN = os.getenv("HF_TOKEN")

 """
+Configuration and constants for the GAIA Agent
 """
 import os
 # ============================================================================
 # MODEL CONFIGURATION
 # ============================================================================
+# Set based on environment
+USE_LOCAL_MODEL = False  # True = Local Ollama, False = HuggingFace API
+# Ollama configuration (local)
 OLLAMA_MODEL_ID = "ollama/qwen2.5-coder:14b"
 OLLAMA_API_BASE = "http://localhost:11434"
 OLLAMA_API_KEY = "ollama"
+# HuggingFace configuration (cloud)
+# Using a powerful model for better GAIA benchmark performance
 HF_MODEL_ID = "Qwen/Qwen2.5-72B-Instruct"
 HF_TOKEN = os.getenv("HF_TOKEN")

tools.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Herramientas personalizadas para el GAIA Agent v2
 """
 import requests
 from smolagents import tool

 """
+Custom tools for the GAIA Agent
 """
 import requests
 from smolagents import tool

utils.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Funciones de utilidad para el GAIA Agent v2
 """
 import os
 import re
@@ -12,13 +12,13 @@ from config import DEFAULT_API_URL, QUESTION_TYPES
 def clean_ansi_codes(text):
-    """Limpia los códigos ANSI de color de la terminal."""
     ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
     return ansi_escape.sub('', text)
 def clean_answer(answer):
-    """Limpia la respuesta del agente eliminando formato innecesario."""
     answer = str(answer).strip()
     patterns_to_remove = [
@@ -39,14 +39,14 @@ def clean_answer(answer):
 def detect_question_type(question, file_name):
     """
-    Detecta el tipo de pregunta para aplicar estrategia específica.
     Args:
-        question: Texto de la pregunta
-        file_name: Nombre del archivo adjunto (si existe)
     Returns:
-        str: Tipo de pregunta (ver QUESTION_TYPES en config.py)
     """
     q_lower = question.lower()
@@ -72,13 +72,13 @@ def detect_question_type(question, file_name):
 def download_file_for_task(task_id):
     """
-    Descarga el archivo adjunto de una tarea si existe.
     Args:
-        task_id: ID de la tarea
     Returns:
-        str: Ruta del archivo descargado o None si no hay archivo
     """
     file_url = f"{DEFAULT_API_URL}/files/{task_id}"
     try:
@@ -86,13 +86,13 @@ def download_file_for_task(task_id):
         if response.status_code == 200:
             filename = f"file_{task_id}"
-            # Obtener nombre real del header
             if "content-disposition" in response.headers:
                 cd = response.headers["content-disposition"]
                 if "filename=" in cd:
                     filename = cd.split("filename=")[1].strip('"')
-            # Asegurar extensión correcta
             if "." not in filename:
                 content_type = response.headers.get("content-type", "")
                 if "excel" in content_type or "spreadsheet" in content_type:
@@ -107,24 +107,24 @@ def download_file_for_task(task_id):
             with open(filename, 'wb') as f:
                 shutil.copyfileobj(response.raw, f)
-            print(f"      ✓ Archivo descargado: {filename} ({os.path.getsize(filename)} bytes)")
             return filename
     except Exception as e:
-        print(f"      ✗ Error descargando archivo: {e}")
     return None
 def fetch_and_download_links(url, dest_dir, max_files=20):
     """
-    Descarga recursos vinculados desde una URL.
     Args:
-        url: URL de la página a escanear
-        dest_dir: Directorio destino para los archivos
-        max_files: Máximo número de archivos a descargar
     Returns:
-        list: Lista de rutas de archivos descargados
     """
     downloaded = []
     try:

 """
+Utility functions for the GAIA Agent
 """
 import os
 import re
 def clean_ansi_codes(text):
+    """Remove ANSI color codes from terminal output."""
     ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
     return ansi_escape.sub('', text)
 def clean_answer(answer):
+    """Clean the agent response by removing unnecessary formatting."""
     answer = str(answer).strip()
     patterns_to_remove = [
 def detect_question_type(question, file_name):
     """
+    Detect the question type to apply a specific strategy.
     Args:
+        question: The question text
+        file_name: Name of the attached file (if any)
     Returns:
+        str: Question type (see QUESTION_TYPES in config.py)
     """
     q_lower = question.lower()
 def download_file_for_task(task_id):
     """
+    Download the attached file for a task if it exists.
     Args:
+        task_id: The task ID
     Returns:
+        str: Path to downloaded file or None if no file exists
     """
     file_url = f"{DEFAULT_API_URL}/files/{task_id}"
     try:
         if response.status_code == 200:
             filename = f"file_{task_id}"
+            # Get real filename from header
             if "content-disposition" in response.headers:
                 cd = response.headers["content-disposition"]
                 if "filename=" in cd:
                     filename = cd.split("filename=")[1].strip('"')
+            # Ensure correct extension
             if "." not in filename:
                 content_type = response.headers.get("content-type", "")
                 if "excel" in content_type or "spreadsheet" in content_type:
             with open(filename, 'wb') as f:
                 shutil.copyfileobj(response.raw, f)
+            print(f"      ✓ File downloaded: {filename} ({os.path.getsize(filename)} bytes)")
             return filename
     except Exception as e:
+        print(f"      ✗ Error downloading file: {e}")
     return None
 def fetch_and_download_links(url, dest_dir, max_files=20):
     """
+    Download linked resources from a URL.
     Args:
+        url: URL of the page to scan
+        dest_dir: Destination directory for files
+        max_files: Maximum number of files to download
     Returns:
+        list: List of downloaded file paths
     """
     downloaded = []
     try:

v2/agent.py DELETED Viewed

@@ -1,251 +0,0 @@
-"""
-Agente mejorado con estrategias específicas por tipo de pregunta
-"""
-import io
-from contextlib import redirect_stdout
-from smolagents import CodeAgent, LiteLLMModel, DuckDuckGoSearchTool, VisitWebpageTool, WikipediaSearchTool
-from config import (
-    MODEL_ID, MODEL_API_BASE, MODEL_API_KEY,
-    MAX_STEPS, VERBOSITY_LEVEL, AUTHORIZED_IMPORTS,
-    QUESTION_TYPES
-)
-from tools import smart_visit, get_youtube_info
-from utils import clean_answer, clean_ansi_codes
-class EnhancedLocalAgent:
-    """Agente mejorado con estrategias específicas por tipo de pregunta."""
-    def __init__(self):
-        print(f"   🤖 Inicializando agente con {MODEL_ID.split('/')[-1]}...")
-        self.model = LiteLLMModel(
-            model_id=MODEL_ID,
-            api_base=MODEL_API_BASE,
-            api_key=MODEL_API_KEY
-        )
-        search_tool = DuckDuckGoSearchTool()
-        visit_tool = VisitWebpageTool()
-        wiki_tool = WikipediaSearchTool()
-        self.agent = CodeAgent(
-            tools=[search_tool, visit_tool, wiki_tool, smart_visit, get_youtube_info],
-            model=self.model,
-            max_steps=MAX_STEPS,
-            verbosity_level=VERBOSITY_LEVEL,
-            additional_authorized_imports=AUTHORIZED_IMPORTS
-        )
-    def build_prompt(self, question, local_file, question_type):
-        """Construye prompt optimizado según el tipo de pregunta."""
-        base_context = f"""TASK: You are solving a GAIA benchmark question. Be precise and methodical.
-QUESTION: {question}
-"""
-        # Estrategias específicas por tipo
-        strategies = {
-            QUESTION_TYPES['YOUTUBE_VIDEO']: """
-STRATEGY - YouTube Video:
-1. Extract the video ID from the URL in the question
-2. Use get_youtube_info tool to get context
-3. Search DuckDuckGo for: "[video_id] transcript" or "[video_id] [keywords_from_question]"
-4. Look for Reddit threads, forums, or blogs discussing this video
-5. Find the specific information requested
-IMPORTANT: You CANNOT watch the video. Search for transcripts or discussions online.
-""",
-            QUESTION_TYPES['IMAGE_FILE']: f"""
-STRATEGY - Image File:
-1. File '{local_file}' is in current directory
-2. You CANNOT read image files directly with Python
-3. Search online for: "{local_file}" OR search for keywords from the question
-4. Look for discussions, analysis, or descriptions of this image online
-5. For chess positions: search "[piece positions] chess position solution"
-IMPORTANT: Do NOT attempt cv2, PIL, or any image processing. Search online instead.
-""",
-            QUESTION_TYPES['AUDIO_FILE']: f"""
-STRATEGY - Audio File:
-1. File '{local_file}' is in current directory
-2. You CANNOT play or transcribe audio with Python
-3. Search online for: "{local_file}" OR the exact question text
-4. Look for transcripts, Reddit threads, or forums discussing this audio
-IMPORTANT: Do NOT attempt librosa, soundfile, or audio processing. Search online.
-""",
-            QUESTION_TYPES['DATA_FILE']: f"""
-STRATEGY - Data File (Excel/CSV):
-1. File '{local_file}' is in current directory
-2. Use pandas to read: pd.read_excel('{local_file}') or pd.read_csv('{local_file}')
-3. Explore columns with df.columns and df.head()
-4. Filter and sum/count as needed
-5. Double-check calculations
-CODE TEMPLATE:
-```python
-import pandas as pd
-df = pd.read_excel('{local_file}')  # or read_csv
-print(df.columns)
-print(df.head())
-# ... your analysis
-```
-""",
-            QUESTION_TYPES['CODE_FILE']: f"""
-STRATEGY - Code File:
-1. File '{local_file}' is in current directory
-2. Read it with open('{local_file}', 'r').read()
-3. Analyze the code logic carefully
-4. If needed, execute it: exec(open('{local_file}').read())
-5. Return the requested output
-IMPORTANT: Read and understand before executing.
-""",
-            QUESTION_TYPES['WIKIPEDIA']: """
-STRATEGY - Wikipedia Search:
-1. Identify the exact topic/entity from the question
-2. Use web_search to find the correct Wikipedia article URL
-3. Use smart_visit to read the Wikipedia page content
-4. Extract the specific information requested (dates, numbers, names, etc.)
-5. For counting tasks: CREATE A PYTHON LIST with each item, then count with len()
-TIPS:
-- Search: "[topic] Wikipedia 2022" for latest version
-- For discographies: look for "Discography" section or table
-- For featured articles: search "Wikipedia Featured Article [topic] [date]"
-- ALWAYS create a list and count programmatically, don't count manually
-EXAMPLE for counting:
-```python
-albums_2000_2009 = [
-    "Album 1 (2000)",
-    "Album 2 (2001)",
-    # ... list ALL albums
-]
-count = len(albums_2000_2009)
-print(count)
-```
-""",
-            QUESTION_TYPES['COUNTING']: """
-STRATEGY - Counting Task:
-1. Research and LIST all items first (don't just count)
-2. Use smart_visit to get complete data from Wikipedia or official sources
-3. Store items in a Python list: items = []
-4. Count with len(items) and verify manually
-5. Double-check you haven't missed anything
-IMPORTANT: First collect ALL items, THEN count. Show your work.
-""",
-            QUESTION_TYPES['TEXT_MANIPULATION']: """
-STRATEGY - Text Manipulation:
-1. Read the question VERY carefully
-2. If text is backwards, reverse it: text[::-1]
-3. If asking for opposite: use logic (left ↔ right, up ↔ down, etc.)
-4. Return ONLY the answer, no explanation
-EXAMPLE: ".rewsna eht sa 'tfel' drow..."
-→ Reverse to read: "...word 'left' as the answer."
-→ Opposite of "left" is "right"
-""",
-            QUESTION_TYPES['GENERAL']: """
-STRATEGY - General Research:
-1. Break down the question into sub-tasks
-2. Use web_search for initial research
-3. Use smart_visit to read relevant pages in detail
-4. Cross-reference multiple sources if needed
-5. Extract the precise answer requested
-TIPS:
-- Be specific in searches: include years, full names, exact terms
-- Read carefully - answers are often in tables, lists, or footnotes
-"""
-        }
-        strategy = strategies.get(question_type, strategies[QUESTION_TYPES['GENERAL']])
-        output_format = """
-FINAL OUTPUT FORMAT:
-Return ONLY the answer value. No markdown, no "The answer is", no explanations.
-Examples of GOOD answers:
-- "3"
-- "right"
-- "Ian Rose"
-- "14.50"
-- "d5, e2"
-Examples of BAD answers:
-- "The answer is 3"
-- "**3**"
-- "Based on my research, the answer is 3."
-"""
-        return base_context + strategy + output_format
-    def solve(self, question, local_file=None, question_type=QUESTION_TYPES['GENERAL']):
-        """
-        Resuelve una pregunta con estrategia optimizada.
-        Args:
-            question: Texto de la pregunta
-            local_file: Ruta al archivo adjunto (opcional)
-            question_type: Tipo de pregunta detectado
-        Returns:
-            tuple: (respuesta, logs de ejecución)
-        """
-        prompt = self.build_prompt(question, local_file, question_type)
-        log_capture = io.StringIO()
-        final_answer = "Error"
-        try:
-            with redirect_stdout(log_capture):
-                answer = self.agent.run(prompt)
-                final_answer = clean_answer(answer)
-                # Si está vacío después de limpiar, buscar en logs
-                if not final_answer or final_answer == "Error":
-                    logs = log_capture.getvalue()
-                    for line in reversed(logs.split('\n')):
-                        if line.strip() and not any(x in line for x in ['===', '---', 'Step', 'Tool']):
-                            potential_answer = line.strip()
-                            if len(potential_answer) < 200:
-                                final_answer = potential_answer
-                                break
-        except Exception as e:
-            log_capture.write(f"\n❌ CRITICAL ERROR: {e}\n")
-            final_answer = "Error"
-        return final_answer, clean_ansi_codes(log_capture.getvalue())
-def call_agent(question: str, file_path: str = None):
-    """
-    Función de compatibilidad para llamar al agente de forma simple.
-    Args:
-        question: Pregunta a resolver
-        file_path: Ruta al archivo adjunto (opcional)
-    Returns:
-        str: Respuesta del agente
-    """
-    from utils import detect_question_type
-    agent = EnhancedLocalAgent()
-    question_type = detect_question_type(question, file_path or "")
-    answer, _ = agent.solve(question, file_path, question_type)
-    return answer

v2/config.py DELETED Viewed

@@ -1,47 +0,0 @@
-"""
-Configuración y constantes del proyecto GAIA Agent
-"""
-# --- API CONFIGURATION ---
-AGENT_CODE_URL = "https://huggingface.co/spaces/Diego-Fco/Final_Assignment_Template/tree/main"
-USERNAME = "Diego-Fco"
-API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- QUESTION FILTERING ---
-# Lista de índices (0-based) de preguntas específicas a testear
-# Ejemplos: [3] = Ajedrez, [4] = Dinosaurio Wikipedia
-INDICES_A_TESTEAR = []
-# Si INDICES_A_TESTEAR está vacío, se usará este límite
-# None = procesar todas las preguntas disponibles
-LIMITE_PREGUNTAS = 6
-# --- MODEL CONFIGURATION ---
-MODEL_ID = "ollama/qwen2.5-coder:14b"
-MODEL_API_BASE = "http://localhost:11434"
-MODEL_API_KEY = "ollama"
-# --- AGENT CONFIGURATION ---
-MAX_STEPS = 12
-VERBOSITY_LEVEL = 2         # Nivel de logs (1=básico, 2=detallado)
-# Imports adicionales permitidos para el agente
-AUTHORIZED_IMPORTS = [
-    'csv', 'pandas', 'bs4', 'requests', 're', 'collections',
-    'itertools', 'io', 'json', 'math', 'statistics', 'queue',
-    'xml', 'datetime', 'time', 'openpyxl', 'numpy', 'markdownify',
-    'urllib'
-]
-# --- QUESTION TYPES ---
-QUESTION_TYPES = {
-    'YOUTUBE_VIDEO': 'youtube_video',
-    'IMAGE_FILE': 'image_file',
-    'AUDIO_FILE': 'audio_file',
-    'DATA_FILE': 'data_file',
-    'CODE_FILE': 'code_file',
-    'WIKIPEDIA': 'wikipedia_search',
-    'COUNTING': 'counting_task',
-    'TEXT_MANIPULATION': 'text_manipulation',
-    'GENERAL': 'general_research'
-}

v2/main_simple.py DELETED Viewed

@@ -1,227 +0,0 @@
-"""
-GAIA Agent v2 - Script Principal Simplificado
-Resuelve preguntas del benchmark GAIA usando estrategias optimizadas
-"""
-import os
-import re
-import shutil
-import requests
-import json
-import time
-# Importar módulos locales
-from config import (
-    AGENT_CODE_URL, USERNAME, API_URL,
-    INDICES_A_TESTEAR, LIMITE_PREGUNTAS
-)
-from agent import EnhancedLocalAgent
-from utils import download_file_for_task, detect_question_type, fetch_and_download_links
-def load_questions():
-    """Carga las preguntas desde el servidor y las guarda localmente."""
-    print("📥 Cargando preguntas...")
-    try:
-        all_questions = requests.get(f"{API_URL}/questions").json()
-        # Guardar copia local
-        if not os.path.exists("tasks"):
-            os.makedirs("tasks")
-        with open(os.path.join("tasks", "all_questions.json"), "w", encoding="utf-8") as qf:
-            json.dump(all_questions, qf, ensure_ascii=False, indent=2)
-        print(f"   ✓ {len(all_questions)} preguntas cargadas\n")
-        return all_questions
-    except Exception as e:
-        print(f"❌ Error: {e}")
-        return None
-def select_questions(all_questions):
-    """Selecciona qué preguntas procesar según configuración."""
-    questions_to_process = []
-    if INDICES_A_TESTEAR and len(INDICES_A_TESTEAR) > 0:
-        print(f"🎯 MODO QUIRÚRGICO: Procesando índices {INDICES_A_TESTEAR}\n")
-        for idx in INDICES_A_TESTEAR:
-            if 0 <= idx < len(all_questions):
-                q = all_questions[idx]
-                q['_original_index'] = idx
-                questions_to_process.append(q)
-    else:
-        limit = LIMITE_PREGUNTAS if LIMITE_PREGUNTAS else len(all_questions)
-        print(f"🔥 MODO SECUENCIAL: Procesando las primeras {limit} preguntas\n")
-        for i, q in enumerate(all_questions[:limit]):
-            q['_original_index'] = i
-            questions_to_process.append(q)
-    return questions_to_process
-def process_questions(agent, questions_to_process):
-    """Procesa todas las preguntas con el agente."""
-    results = []
-    diagnostics = []
-    for i, item in enumerate(questions_to_process):
-        task_id = item["task_id"]
-        question = item["question"]
-        file_name = item.get("file_name", "")
-        idx_original = item.get('_original_index', '?')
-        print(f"\n{'='*80}")
-        print(f"[{i+1}/{len(questions_to_process)}] Índice: {idx_original} | Task: {task_id}")
-        print(f"{'='*80}")
-        print(f"❓ Pregunta: {question[:100]}...")
-        if file_name:
-            print(f"📎 Archivo: {file_name}")
-        # Detectar tipo y descargar archivo
-        question_type = detect_question_type(question, file_name)
-        print(f"🔍 Tipo detectado: {question_type}")
-        # Descargar archivo principal si existe en la API
-        local_file = download_file_for_task(task_id)
-        # Crear carpeta para esta pregunta
-        task_dir = os.path.join("tasks", f"question_{idx_original}_{task_id}")
-        os.makedirs(task_dir, exist_ok=True)
-        # Mover archivo descargado a la carpeta de la pregunta
-        if local_file and os.path.exists(local_file):
-            new_file_path = os.path.join(task_dir, os.path.basename(local_file))
-            shutil.move(local_file, new_file_path)
-            local_file = new_file_path
-            print(f"      ✓ Archivo movido a: {local_file}")
-        # Descargar recursos vinculados desde URLs en la pregunta
-        resource_dir = os.path.join(task_dir, "resources")
-        # Extraer urls simples del texto de la pregunta
-        url_pattern = r"https?://[\w\-\./?&=%#]+"
-        found_urls = re.findall(url_pattern, question)
-        for u in found_urls:
-            print(f"   🔗 Encontrada URL en pregunta: {u} — descargando recursos...")
-            downloaded = fetch_and_download_links(u, resource_dir)
-            if downloaded:
-                print(f"      ✓ {len(downloaded)} recursos descargados en {resource_dir}")
-        # Resolver
-        print(f"⚙️  Procesando con estrategia '{question_type}'...")
-        start_time = time.time()
-        answer, execution_logs = agent.solve(question, local_file, question_type)
-        elapsed = time.time() - start_time
-        print(f"\n✅ Respuesta: {answer}")
-        print(f"⏱️  Tiempo: {elapsed:.1f}s")
-        # Guardar logs
-        task_dir = save_logs(task_id, idx_original, question, question_type,
-                  answer, local_file, elapsed, execution_logs)
-        results.append({"task_id": task_id, "submitted_answer": answer})
-        diagnostics.append({
-            "idx_original": idx_original,
-            "task_id": task_id,
-            "question_type": question_type,
-            "question": question[:200],
-            "answer": answer,
-            "elapsed_seconds": round(elapsed, 1),
-            "folder": task_dir
-        })
-    return results, diagnostics
-def save_logs(task_id, idx_original, question, question_type,
-              answer, local_file, elapsed, execution_logs):
-    """Guarda los logs de ejecución de una pregunta en su propia carpeta."""
-    # Crear carpeta específica para esta pregunta
-    task_dir = f"tasks/question_{idx_original}_{task_id}"
-    os.makedirs(task_dir, exist_ok=True)
-    # Guardar archivo de respuesta
-    task_filename = os.path.join(task_dir, "answer.md")
-    with open(task_filename, "w", encoding="utf-8") as f:
-        f.write(f"# Pregunta {idx_original}\n\n")
-        f.write(f"**Task ID:** {task_id}\n\n")
-        f.write(f"**Tipo:** {question_type}\n\n")
-        f.write(f"**Pregunta:** {question}\n\n")
-        f.write(f"**Archivo adjunto:** {local_file or 'N/A'}\n\n")
-        f.write(f"**Tiempo de ejecución:** {elapsed:.1f}s\n\n")
-        f.write(f"## ✅ Respuesta Final\n\n```\n{answer}\n```\n\n")
-        f.write("## 📋 Logs de Ejecución\n\n```text\n")
-        f.write(execution_logs)
-        f.write("\n```\n")
-    return task_dir
-def submit_results(results):
-    """Envía los resultados al servidor."""
-    print(f"\n{'='*80}")
-    print("📤 Enviando respuestas al servidor...")
-    payload = {
-        "username": USERNAME,
-        "agent_code": AGENT_CODE_URL,
-        "answers": results
-    }
-    try:
-        response = requests.post(f"{API_URL}/submit", json=payload, timeout=60)
-        result = response.json()
-        print(f"✅ Respuesta del servidor:")
-        print(f"   {json.dumps(result, indent=2)}")
-        return result
-    except Exception as e:
-        print(f"❌ Error al enviar: {e}")
-        return None
-def save_diagnostics(diagnostics):
-    """Guarda el archivo de diagnóstico."""
-    ts = time.strftime("%Y%m%d_%H%M%S")
-    diag_path = os.path.join("tasks", f"diagnostics_v2_{ts}.json")
-    with open(diag_path, "w", encoding="utf-8") as df:
-        json.dump(diagnostics, df, ensure_ascii=False, indent=2)
-    print(f"\n✅ Diagnóstico guardado: {diag_path}")
-def main():
-    """Función principal del script."""
-    print("🚀 Iniciando Agente Local MEJORADO v2...")
-    print("   Modelo: qwen2.5-coder:14b")
-    print("   Objetivo: Resolver 6+ preguntas correctamente\n")
-    # 1. Cargar preguntas
-    all_questions = load_questions()
-    if not all_questions:
-        return
-    # 2. Seleccionar preguntas a procesar
-    questions_to_process = select_questions(all_questions)
-    if not questions_to_process:
-        print("⚠️ No hay preguntas para procesar.")
-        return
-    # 3. Crear agente (una sola instancia reutilizable)
-    print("🤖 Creando agente reutilizable...\n")
-    agent = EnhancedLocalAgent()
-    # 4. Procesar todas las preguntas
-    results, diagnostics = process_questions(agent, questions_to_process)
-    # 5. Enviar resultados
-    submit_results(results)
-    # 6. Guardar diagnóstico
-    save_diagnostics(diagnostics)
-    print(f"\n{'='*80}")
-    print("🎯 Ejecución completada")
-    print(f"{'='*80}\n")
-if __name__ == "__main__":
-    main()

v2/requirements-v2.txt DELETED Viewed

@@ -1,20 +0,0 @@
-# Core dependencies
-requests>=2.31.0
-smolagents>=1.0.0
-# Model support
-litellm>=1.0.0
-# Data processing
-pandas>=2.0.0
-openpyxl>=3.1.0  # For Excel files
-# Web scraping and parsing
-beautifulsoup4>=4.12.0
-lxml>=4.9.0
-markdownify>=0.11.0
-# Optional: For additional features
-duckduckgo-search>=3.9.0  # If using DuckDuckGo search
-wikipedia-api

v2/tools.py DELETED Viewed

@@ -1,125 +0,0 @@
-"""
-Herramientas personalizadas para el GAIA Agent
-"""
-import requests
-from smolagents import tool
-from markdownify import markdownify as md
-@tool
-def smart_visit(url: str) -> str:
-    """
-    Visits a webpage and returns its content converted to Markdown.
-    Essential for Wikipedia, documentation, or any web content.
-    Args:
-        url: The URL of the page to visit.
-    Returns:
-        str: Webpage content in Markdown format (max 25000 chars)
-    """
-    try:
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-            'Accept-Language': 'en-US,en;q=0.5',
-            'Referer': 'https://www.google.com/'
-        }
-        response = requests.get(url, headers=headers, timeout=25)
-        response.raise_for_status()
-        content = md(response.text)
-        return content[:25000]
-    except Exception as e:
-        return f"Error visiting {url}: {str(e)}"
-@tool
-def get_youtube_info(video_url: str) -> str:
-    """
-    Gets information about a YouTube video including title, description,
-    and attempts to find transcripts or related information.
-    Args:
-        video_url: YouTube video URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)
-    Returns:
-        str: Video information and transcript search strategy
-    """
-    try:
-        # Extraer video ID
-        if "youtube.com" in video_url:
-            video_id = video_url.split("v=")[1].split("&")[0] if "v=" in video_url else ""
-        elif "youtu.be" in video_url:
-            video_id = video_url.split("/")[-1].split("?")[0]
-        else:
-            return "Invalid YouTube URL"
-        if not video_id:
-            return "Could not extract video ID"
-        return f"""Video ID: {video_id}
-STRATEGY TO ANSWER:
-1. Search for '{video_id}' + keywords from the question on DuckDuckGo
-2. Look for transcripts, comments, or discussion forums about this video
-3. The video URL is: {video_url}
-Note: Direct video playback is not available. Search online for transcripts or summaries."""
-    except Exception as e:
-        return f"Error processing YouTube video: {str(e)}"
-@tool
-def visit_webpage(url: str) -> str:
-    """
-    Visits a webpage and returns its content in Markdown format.
-    Args:
-        url: The URL of the webpage to visit
-    Returns:
-        str: The webpage content converted to Markdown
-    """
-    return smart_visit(url)
-@tool
-def wikipedia_search(query: str) -> str:
-    """
-    Searches Wikipedia for a query and returns the page content in Markdown format.
-    Args:
-        query: The search term or topic to look up on Wikipedia
-    Returns:
-        str: The Wikipedia page content in Markdown format, or an error message
-    """
-    try:
-        import urllib.parse
-        search_url = f"https://en.wikipedia.org/w/index.php?search={urllib.parse.quote_plus(query)}&title=Special%3ASearch&go=Go"
-        return smart_visit(search_url)
-    except Exception as e:
-        return f"Error searching Wikipedia: {e}"
-@tool
-def answer_video_questions(video_url: str, question: str) -> str:
-    """
-    Provides guidance on how to answer questions about a video by extracting metadata
-    and suggesting search queries to find transcripts or discussions.
-    Args:
-        video_url: The URL of the video (YouTube or similar platform)
-        question: The specific question to answer about the video
-    Returns:
-        str: Video metadata and suggested search queries to find answers
-    """
-    try:
-        info = get_youtube_info(video_url)
-        # Provide a compact actionable payload for the agent
-        return f"VIDEO_INFO:\n{info}\n\nSUGGESTED_QUERIES:\n- \"{video_url} transcript\"\n- \"{video_url} subtitles\"\n- \"{video_url} comments discussion\"\n\nUse VisitWebpageTool/WikipediaSearchTool to follow links."
-    except Exception as e:
-        return f"Error answering video question: {e}"

v2/utils.py DELETED Viewed

@@ -1,212 +0,0 @@
-"""
-Funciones de utilidad para el GAIA Agent
-"""
-import os
-import re
-import requests
-import shutil
-import urllib.parse
-import mimetypes
-from bs4 import BeautifulSoup
-from config import API_URL, QUESTION_TYPES
-def clean_ansi_codes(text):
-    """Limpia los códigos ANSI de color de la terminal."""
-    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
-    return ansi_escape.sub('', text)
-def download_file_for_task(task_id):
-    """
-    Descarga el archivo adjunto de una tarea si existe.
-    Args:
-        task_id: ID de la tarea
-    Returns:
-        str: Ruta del archivo descargado o None si no hay archivo
-    """
-    file_url = f"{API_URL}/files/{task_id}"
-    try:
-        response = requests.get(file_url, stream=True, timeout=30)
-        if response.status_code == 200:
-            filename = f"file_{task_id}"
-            # Obtener nombre real del header
-            if "content-disposition" in response.headers:
-                cd = response.headers["content-disposition"]
-                if "filename=" in cd:
-                    filename = cd.split("filename=")[1].strip('"')
-            # Asegurar extensión correcta
-            if "." not in filename:
-                content_type = response.headers.get("content-type", "")
-                if "excel" in content_type or "spreadsheet" in content_type:
-                    filename += ".xlsx"
-                elif "audio" in content_type or "mpeg" in content_type:
-                    filename += ".mp3"
-                elif "image" in content_type or "png" in content_type:
-                    filename += ".png"
-                elif "python" in content_type:
-                    filename += ".py"
-            with open(filename, 'wb') as f:
-                shutil.copyfileobj(response.raw, f)
-            print(f"      ✓ Archivo descargado: {filename} ({os.path.getsize(filename)} bytes)")
-            return filename
-    except Exception as e:
-        print(f"      ✗ Error descargando archivo: {e}")
-    return None
-def fetch_and_download_links(url, dest_dir, max_files=20):
-    """
-    Fetch a webpage, extract links to common resource file types and download them.
-    Args:
-        url (str): Webpage URL to scan for resources.
-        dest_dir (str): Directory where downloaded resources will be saved.
-        max_files (int): Maximum number of files to download.
-    Returns:
-        list: Paths of downloaded files.
-    """
-    downloaded = []
-    try:
-        os.makedirs(dest_dir, exist_ok=True)
-        resp = requests.get(url, timeout=20)
-        resp.raise_for_status()
-        soup = BeautifulSoup(resp.text, "lxml")
-        # find candidate links from href and src
-        candidates = []
-        for tag in soup.find_all(['a', 'link']):
-            href = tag.get('href')
-            if href:
-                candidates.append(href)
-        for tag in soup.find_all(['img', 'script', 'source']):
-            src = tag.get('src')
-            if src:
-                candidates.append(src)
-        # normalize and filter
-        seen = set()
-        allowed_exts = {'.png', '.jpg', '.jpeg', '.gif', '.svg', '.pdf', '.zip', '.mp3', '.mp4', '.py', '.txt', '.csv', '.xlsx', '.xls'}
-        for c in candidates:
-            if len(downloaded) >= max_files:
-                break
-            full = urllib.parse.urljoin(url, c)
-            if full in seen:
-                continue
-            seen.add(full)
-            path = urllib.parse.urlparse(full).path
-            ext = os.path.splitext(path)[1].lower()
-            # Accept if extension recognized or content-type later
-            if ext in allowed_exts:
-                try:
-                    r = requests.get(full, stream=True, timeout=20)
-                    r.raise_for_status()
-                    cd = r.headers.get('content-disposition')
-                    if cd and 'filename=' in cd:
-                        fname = cd.split('filename=')[1].strip('"')
-                    else:
-                        fname = os.path.basename(path) or f"resource_{len(downloaded)}{ext}"
-                    out_path = os.path.join(dest_dir, fname)
-                    with open(out_path, 'wb') as of:
-                        shutil.copyfileobj(r.raw, of)
-                    downloaded.append(out_path)
-                except Exception:
-                    continue
-            else:
-                # try a HEAD request to see if content-type indicates a file
-                try:
-                    h = requests.head(full, timeout=10)
-                    ctype = h.headers.get('content-type', '')
-                    if any(t in ctype for t in ['image/', 'audio/', 'video/', 'application/pdf', 'text/', 'application/octet-stream', 'application/zip', 'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml']):
-                        # download
-                        try:
-                            r = requests.get(full, stream=True, timeout=20)
-                            r.raise_for_status()
-                            fname = os.path.basename(urllib.parse.urlparse(full).path) or f"resource_{len(downloaded)}"
-                            if not os.path.splitext(fname)[1]:
-                                ext = mimetypes.guess_extension(ctype.split(';')[0].strip()) or ''
-                                fname += ext
-                            out_path = os.path.join(dest_dir, fname)
-                            with open(out_path, 'wb') as of:
-                                shutil.copyfileobj(r.raw, of)
-                            downloaded.append(out_path)
-                        except Exception:
-                            continue
-                except Exception:
-                    continue
-    except Exception:
-        return downloaded
-    return downloaded
-def detect_question_type(question, file_name):
-    """
-    Detecta el tipo de pregunta para aplicar estrategia específica.
-    Args:
-        question: Texto de la pregunta
-        file_name: Nombre del archivo adjunto (si existe)
-    Returns:
-        str: Tipo de pregunta (ver QUESTION_TYPES en config.py)
-    """
-    q_lower = question.lower()
-    if "youtube.com" in question or "youtu.be" in question:
-        return QUESTION_TYPES['YOUTUBE_VIDEO']
-    elif file_name and file_name.endswith(".png"):
-        return QUESTION_TYPES['IMAGE_FILE']
-    elif file_name and file_name.endswith(".mp3"):
-        return QUESTION_TYPES['AUDIO_FILE']
-    elif file_name and file_name.endswith((".xlsx", ".csv")):
-        return QUESTION_TYPES['DATA_FILE']
-    elif file_name and file_name.endswith(".py"):
-        return QUESTION_TYPES['CODE_FILE']
-    elif "wikipedia" in q_lower:
-        return QUESTION_TYPES['WIKIPEDIA']
-    elif any(word in q_lower for word in ["how many", "count", "number of"]):
-        return QUESTION_TYPES['COUNTING']
-    elif "reverse" in q_lower or "backwards" in q_lower or ".rewsna" in question:
-        return QUESTION_TYPES['TEXT_MANIPULATION']
-    else:
-        return QUESTION_TYPES['GENERAL']
-def clean_answer(answer):
-    """
-    Limpia la respuesta del agente eliminando formato innecesario.
-    Preserva mayúsculas originales.
-    Args:
-        answer: Respuesta del agente
-    Returns:
-        str: Respuesta limpia
-    """
-    answer = str(answer).strip()
-    # Limpiar patrones comunes (case-insensitive para búsqueda, pero preservar original)
-    patterns_to_remove = [
-        (r'^Final Answer:\s*', ''),
-        (r'^Answer:\s*', ''),
-        (r'^The answer is\s*', ''),
-        (r'^Based on[^,]*,\s*', ''),
-        (r'```', ''),
-        (r'\*\*', ''),
-        (r'^##\s*', '')
-    ]
-    for pattern, replacement in patterns_to_remove:
-        answer = re.sub(pattern, replacement, answer, flags=re.IGNORECASE)
-    return answer.strip()