File size: 10,267 Bytes
10e9b7d
c3a7354
eccf8e4
c3a7354
 
 
 
 
 
 
 
3c4371f
c4e70e2
10e9b7d
c3a7354
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80aab9
c3a7354
 
31243f4
c3a7354
 
 
31243f4
c3a7354
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c4371f
c3a7354
 
 
3c4371f
c3a7354
 
e80aab9
c3a7354
31243f4
c3a7354
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31243f4
c3a7354
 
 
 
 
 
 
 
 
 
c9bb910
c3a7354
125b7e1
c3a7354
 
 
 
 
 
 
 
 
c9bb910
 
c3a7354
 
 
eccf8e4
c3a7354
7d65c66
c3a7354
 
7d65c66
c3a7354
 
 
 
 
 
125b7e1
c3a7354
125b7e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3a7354
 
 
 
125b7e1
c3a7354
 
31243f4
c3a7354
 
31243f4
c3a7354
 
 
 
7a60cf2
c3a7354
 
31243f4
c4e70e2
 
 
db4bf0f
 
c3a7354
 
 
 
e80aab9
 
c3a7354
 
 
 
 
 
 
 
 
 
7d65c66
c3a7354
 
 
e80aab9
 
c3a7354
c4e70e2
c3a7354
 
 
 
 
 
 
e80aab9
31243f4
 
 
e80aab9
 
 
125b7e1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import os
import sys
import requests
import json
import traceback
from datetime import datetime
import re
import markdownify
import wikipediaapi
import trafilatura
import gradio as gr
import pandas as pd
import time

from smolagents import CodeAgent, LiteLLMModel, tool, DuckDuckGoSearchTool, FinalAnswerTool


@tool
def get_wikipedia_article(search_query: str) -> str:
    """
    Searches for a Wikipedia article by its exact title or a close topic and returns its full, clean content.
    This is the preferred tool for questions that likely have a Wikipedia page.

    Args:
        search_query (str): The title or main subject to search for on Wikipedia (e.g., "Mercedes Sosa").
    """
    print(f"--- Herramienta: Buscando en Wikipedia: '{search_query}' ---")
    try:
        wiki_wiki = wikipediaapi.Wikipedia(
            language='en',
            user_agent='GAIA-Agent-Course-Test/1.0 (tu_email@ejemplo.com)'
        )
        page = wiki_wiki.page(search_query)
        if page.exists():
            print(f"--- Herramienta: Página de Wikipedia encontrada: '{page.title}' ---")
            return page.text
        else:
            return f"Error: La página de Wikipedia para '{search_query}' no fue encontrada."
    except Exception as e:
        return f"An error occurred while fetching from Wikipedia: {e}"

@tool
def visit_and_get_html(url: str) -> str:
    """
    Visits a webpage using robust headers and returns its raw HTML content for further processing.
    Args:
        url (str): The URL of the webpage to visit.
    """
    print(f"--- Herramienta: Visitando URL {url} para obtener HTML ---")
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.9',
            'Referer': 'https://www.google.com/',
        }
        response = requests.get(url, headers=headers, timeout=20)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        error_message = f"Error fetching the webpage {url}: {e}"
        print(error_message)
        return error_message
        
@tool
def extract_main_content(html_content: str) -> str:
    """
    Extracts the main article text from raw HTML content, removing boilerplate like menus, ads, and footers.
    Args:
        html_content (str): The raw HTML of a webpage.
    """
    print("--- Herramienta: Extrayendo contenido principal del HTML con Trafilatura... ---")
    if not html_content or not isinstance(html_content, str) or "Error" in html_content:
        return "Error: No valid HTML content provided to extract."
    return trafilatura.extract(html_content)


GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY
GEMINI_MODEL_ID = "gemini/gemini-1.5-flash-latest"

llm_provider = None
agente_principal = None

if GEMINI_API_KEY:
    try:
        llm_provider = LiteLLMModel(model_id=GEMINI_MODEL_ID, temperature=0.1, max_tokens=8192)
        print(f"LLM ({GEMINI_MODEL_ID}) configurado para usar la API de Google a través de LiteLLM.")

        agente_principal = CodeAgent(
            model=llm_provider,
            name="AgenteGAIA",
            description="Soy un agente autónomo diseñado para resolver preguntas del benchmark GAIA. Utilizo mis herramientas para investigar y encontrar la respuesta correcta.",
            tools=[
                get_wikipedia_article,
                DuckDuckGoSearchTool(), 
                visit_and_get_html,
                extract_main_content,
                FinalAnswerTool()
            ],
            additional_authorized_imports=["re"],
            verbosity_level=2, 
            max_steps=15
        )
        print("AgenteGAIA Principal inicializado.")
    except Exception as e:
        print(f"CRITICAL ERROR: No se pudo inicializar el agente o el LLM: {e}")
        traceback.print_exc()
else:
    print("CRITICAL ERROR: Clave API 'GEMINI_API_KEY' no se encontró en los Secrets del Space.")


DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetches all questions, runs `agente_principal` on them, submits the answers.
    """

    if not profile:
        return "Por favor, inicia sesión con tu perfil de Hugging Face.", pd.DataFrame()

    if not agente_principal:
        return "Error: El agente principal no está inicializado. Revisa los logs y la API Key.", pd.DataFrame()

    username = profile.username
    space_id = os.getenv("SPACE_ID")
    code_link = f"https://huggingface.co/spaces/{space_id}" if space_id else "No se pudo obtener el link al Space."
    
    print(f"Iniciando evaluación para el usuario: {username}")

    # 1. Obtener todas las preguntas del examen
    questions_url = f"{DEFAULT_API_URL}/questions"
    try:
        response = requests.get(questions_url)
        response.raise_for_status()
        all_questions = response.json()
        print(f"Se obtuvieron {len(all_questions)} preguntas del examen.")
    except Exception as e:
        return f"Error al obtener las preguntas: {e}", pd.DataFrame()

    # 2. Iterar, ejecutar el agente y recopilar respuestas
    all_answers = []
    results_for_display = []
    
    
    gaia_rules_prompt = "1. Answer Format: Numbers must be plain digits. Strings must omit articles.\n2. Response Content: ONLY the final answer, without prefixes or explanations."
    main_task_prompt_template = """
    Your mission is to accurately answer the question "{question_text}" using your available tools and a robust, multi-step reasoning process.

    **YOUR OVERALL PLAN:**
    1.  **STRATEGY - WIKIPEDIA FIRST:** Check if the question mentions 'Wikipedia' or is about a topic likely to have a detailed Wikipedia page. If so, your first attempt should be to use your `get_wikipedia_article` tool with a relevant search query.
    2.  **STRATEGY - WEB SEARCH FALLBACK:** If the Wikipedia tool fails or does not provide enough information, your code must then use the `DuckDuckGoSearchTool` to find alternative sources.
    3.  **INFORMATION EXTRACTION:**
        - If you have content from Wikipedia, analyze it directly.
        - If you have a URL from web search, you MUST use the following sequence: First, call `visit_and_get_html` with the URL. Second, call `extract_main_content` on the resulting HTML to get clean text.
    4.  **SYNTHESIZE & VERIFY:** Analyze the clean text you have obtained. Generate Python code (using string methods or the `re` module) to find the specific information needed. If you can, visit a second source to verify your finding.
    5.  **FINAL ANSWER:** Once confident in your answer, call your `final_answer` tool with ONLY the final, clean value, formatted perfectly according to the GAIA rules below.

    **GAIA RULES FOR FINAL ANSWER:**
    {gaia_rules_prompt}

    **INSTRUCTION FOR YOUR CURRENT ACTION:**
    Begin with STEP 1. Follow the plan meticulously, starting with the "Wikipedia-First" approach. Generate your thoughts and the Python code for the NEXT logical step.
    """

    for i, question in enumerate(all_questions):
        task_id = question['task_id']
        question_text = question['question']
        print(f"\n--- Procesando Pregunta {i+1}/{len(all_questions)} (ID: {task_id}) ---")
        
        current_prompt = main_task_prompt_template.format(question_text=question_text, gaia_rules_prompt=gaia_rules_prompt)
        
        agent_answer = None
        try:
            agent_answer = agente_principal.run(current_prompt)
            print(f"Respuesta del Agente: {agent_answer}")
        except Exception as e:
            agent_answer = f"Error al ejecutar el agente: {e}"
            print(agent_answer)
            traceback.print_exc()

        submission_answer = {"task_id": task_id, "submitted_answer": agent_answer}
        all_answers.append(submission_answer)
        results_for_display.append({"Question": question_text, "Agent's Answer": agent_answer})

        print("--- Pausando por 5 segundos para no exceder el límite de la API... ---")
        time.sleep(5)

    submission_file = "submission.jsonl"
    with open(submission_file, 'w') as f:
        for answer in all_answers:
            f.write(json.dumps(answer) + '\n')
    
    print(f"Archivo de envío '{submission_file}' creado con {len(all_answers)} respuestas.")

    try:
        payload = {"username": username, "agent_code": code_link}
        with open(submission_file, 'rb') as f:
            files = {'answers': (submission_file, f, 'application/jsonl')}
            submit_url = f"{DEFAULT_API_URL}/submit"
            response = requests.post(submit_url, data=payload, files=files)
            response.raise_for_status()
        
        submission_result = response.json()
        print("¡Resultados enviados exitosamente!")
        return f"¡Éxito! Tu puntaje final es: {submission_result.get('score', 'N/A')}", pd.DataFrame(results_for_display)
    except Exception as e:
        error_msg = f"Error al enviar los resultados: {e}"
        print(error_msg)
        return error_msg, pd.DataFrame(results_for_display)


with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# AZUFR3 AGENT DAEMON - Ecosistema de Agentes para el Examen Final de GAIA")
    gr.Markdown("Inicia sesión con tu perfil de Hugging Face y haz clic en el botón para ejecutar tu agente en las 20 preguntas del examen y enviar tus resultados. La ejecución puede tardar varios minutos.")
    
    login_button = gr.LoginButton()
    run_button = gr.Button("Ejecutar Evaluación y Enviar Todas las Respuestas", variant="primary")
    
    status_output = gr.Textbox(label="Estado de la Ejecución / Resultado del Envío", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Preguntas y Respuestas del Agente", wrap=True, column_widths=["50%", "50%"])

    run_button.click(
        fn=run_and_submit_all,
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    demo.launch()