Spaces:

Csuarezg
/

Final_Assignment_Template_hf-course

Sleeping

App Files Files Community

Csuarezg commited on May 28, 2025

Commit

df3c43a

verified ·

1 Parent(s): c17fa18

Update app.py

Browse files

Files changed (1) hide show

app.py +465 -214

app.py CHANGED Viewed

@@ -2,273 +2,524 @@ import os
 import gradio as gr
 import requests
 import pandas as pd
 import re
-import pytesseract
-import yt_dlp
-import cv2
 import numpy as np
-import speech_recognition as sr
-from PIL import Image
-from typing import List, Dict
-from langchain_core.tools import tool
 from langchain_openai import ChatOpenAI
 from langchain_community.tools.tavily_search import TavilySearchResults
-from youtube_transcript_api import YouTubeTranscriptApi
-from langgraph.graph import StateGraph, END
 from langgraph.checkpoint.memory import MemorySaver
-from langchain_core.messages import HumanMessage, SystemMessage
-# ================ CONSTANTES ================
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-SYSTEM_PROMPT = SYSTEM_PROMPT = """You are a precision research assistant for the GAIA benchmark. Your mission is EXTREME ACCURACY.
 CRITICAL ANSWER FORMAT RULES:
-# - ALWAYS end with: FINAL ANSWER: [answer]
-# - READ THE QUESTION CAREFULLY - answer EXACTLY what is asked for, nothing more, nothing less
 SPECIFIC FORMATTING BY QUESTION TYPE:
-# - Numbers: ONLY the number, no units, no text
-# Example: "FINAL ANSWER: 2" NOT "FINAL ANSWER: 2 albums"
-# - First name only: ONLY the first name
-# Example: If person is "John Smith" → "FINAL ANSWER: John"
-# - Country codes, IOC codes, abbreviations, symbols: ONLY the code/abbreviation, no country name or brackets
-# Example: If asked for IOC country code → "FINAL ANSWER: PHI" NOT "FINAL ANSWER: PHILIPPINES [PHI]"
-# - When asked for a specific type of identifier (code, abbreviation, symbol):
-#   Give ONLY that identifier, strip all explanatory text, brackets, or full names
-# - Lists/Sets: Exactly as requested format
-# Example: "FINAL ANSWER: a, b, d, e" (comma-separated, alphabetical order)
 CRITICAL TOOL SELECTION:
-# - Wikipedia questions → wikipedia_tool ONLY
-# - File questions → file_analyzer_tool FIRST to inspect contents, then reason based on structure
-# - Current events → web_search_tool ONLY
-# - Mathematical analysis/calculations → wolfram_alpha_tool or python_repl_tool ONLY
-# - Tables, matrices, systematic checking → python_repl_tool ONLY
 FOR MATHEMATICAL PROBLEMS:
-# ALWAYS use python_repl_tool when:
-# - Analyzing mathematical tables or matrices
-# - Checking properties like commutativity, associativity
-# - Systematic verification of mathematical statements
-# - Complex calculations that need precision
-# - ANY problem involving tables, sets, or systematic checking
-MATHEMATICAL ANALYSIS PROCESS:
-# 1. Use python_repl_tool to parse data systematically
-# 2. Write code to check ALL cases (don't rely on manual inspection)
-# 3. Collect results programmatically
-# 4. Verify your logic with multiple approaches
-# 5. Format answer exactly as requested
-# Example for commutativity checking:
-# - Parse the operation table into a data structure
-# - Check ALL pairs (x,y) to see if x*y = y*x
-# - Collect ALL elements involved in ANY counter-example
-# - Return in requested format (e.g., comma-separated, alphabetical)
 FILE HANDLING:
-# - You HAVE the ability to read and analyze uploaded files
-# - ALWAYS use file_analyzer_tool when questions mention files
-# - The tool automatically finds and analyzes Excel, CSV, images, and audio files
-# - For Excel/CSV: Returns columns, data types, sample rows, and numeric totals
-# - NEVER say "I can't access files" - you CAN access them via file_analyzer_tool
-# - Example: "The attached Excel file..." → Use file_analyzer_tool immediately
-SPECIAL CASES TO HANDLE:
-# - If the question appears reversed or encoded, decode it first.
-# - If the question includes an instruction (e.g., "write the opposite of..."), follow the instruction precisely.
-# - DO NOT repeat or paraphrase the question in your answer.
-# - NEVER answer with the full sentence unless explicitly asked to.
-# - If the decoded question asks for a word, give ONLY the word, in the required format.
 REASONING PROCESS:
-# 1. Carefully read what the question is asking for
-# 2. Identify if it needs systematic/mathematical analysis
-# 3. Use appropriate tool (python_repl_tool for math problems)
-# 4. Extract ONLY the specific part requested
-# 5. Format according to the rules above
-# 6. For file questions:
-# a. First use file_analyzer_tool to inspect column names, types, and sample data
-# b. Identify relevant columns based on the question
-# c. Reason using the data (e.g., by counting, filtering, or identifying patterns)
-# d. Only use python_repl_tool if additional computation is necessary
-# 7. If the Wikipedia tool is used but fails to provide an answer (no relevant entry or content), automatically attempt a web search using the same query or a refined version of it
 """
-USERNAME = "Csuarezg"
-AGENT_CODE = "gaia_agent_v1"
-# ================ HERRAMIENTAS ================
-@tool
-def wikipedia_tool(query: str) -> str:
-    """Busca información enciclopédica en Wikipedia. Útil para datos históricos, biografías y conceptos científicos.
-    Args:
-        query: Término de búsqueda específico (ej. 'Teoría de la relatividad')
-    Returns:
-        Resumen conciso del tema en 3 oraciones.
-    """
-    try:
-        import wikipedia
-        wikipedia.set_lang("en")
-        return wikipedia.summary(query, sentences=3)
-    except Exception as e:
-        return f"Error Wikipedia: {str(e)}"
-@tool
-def youtube_transcript_tool(url: str) -> str:
-    """Obtiene el transcript de videos de YouTube. Útil para analizar diálogos o contenido hablado.
-    Args:
-        url: Enlace completo del video (ej. 'https://youtu.be/VIDEO_ID')
-    Returns:
-        Primera parte del transcript (primeros 30 segundos).
-    """
-    try:
-        video_id = re.findall(r'(?:v=|\/)([0-9A-Za-z_-]{11})', url)[0]
-        transcript = YouTubeTranscriptApi.get_transcript(video_id)
-        return " ".join([entry['text'] for entry in transcript[:5]])
-    except Exception as e:
-        return f"Error transcript: {str(e)}"
-@tool
-def file_analyzer_tool(file_path: str) -> str:
-    """Analiza archivos (imágenes, audio) usando OCR y visión por computadora.
-    Args:
-        file_path: Ruta al archivo en el sistema
-    Returns:
-        Texto extraído o análisis de contenido multimedia.
-    """
-    try:
-        if file_path.endswith(('.png', '.jpg', '.jpeg')):
-            img = Image.open(file_path)
-            text = pytesseract.image_to_string(img)
-            return f"Texto detectado: {text[:500]}..." if text else "Sin texto"
-        return "Formato no soportado"
-    except Exception as e:
-        return f"Error análisis archivo: {str(e)}"
-@tool
-def web_search_tool(query: str) -> str:
-    """Realiza búsquedas web en tiempo real. Útil para información actualizada.
-    Args:
-        query: Término de búsqueda con contexto
-    Returns:
-        3 resultados relevantes con fuentes.
-    """
-    try:
-        tavily = TavilySearchResults(api_key=os.getenv("TAVILY_API_KEY"), max_results=3)
-        results = tavily.invoke(query)
-        return "\n".join([f"{res['title']}: {res['content']}" for res in results])
-    except Exception as e:
-        return f"Error búsqueda: {str(e)}"
-# ================ AGENTE PRINCIPAL ================
-class GaiaAgent:
-    def __init__(self):
-        self.tools = [wikipedia_tool, youtube_transcript_tool, file_analyzer_tool, web_search_tool]
-        self.llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
-        self.workflow = self._build_workflow()
-        self.recognizer = sr.Recognizer()
-    def _build_workflow(self):
-        workflow = StateGraph(AgentState)
         def agent_node(state):
-            messages = [SystemMessage(content=SYSTEM_PROMPT)] + state['messages']
-            response = self.llm.bind_tools(self.tools).invoke(messages)
             return {"messages": [response]}
-        workflow.add_node("agent", agent_node)
-        workflow.add_node("tools", ToolNode(self.tools))
-        workflow.set_entry_point("agent")
-        workflow.add_conditional_edges(
-            "agent",
-            lambda x: "tools" if x["messages"][-1].tool_calls else END
-        )
-        workflow.add_edge("tools", "agent")
-        return workflow.compile()
     def __call__(self, question: str) -> str:
         try:
-            response = self.workflow.invoke(
                 {"messages": [HumanMessage(content=question)]},
-                {"configurable": {"thread_id": "main_thread"}}
             )
-            return self._extract_final_answer(response['messages'][-1].content)
         except Exception as e:
             return f"Error: {str(e)}"
-    def _extract_final_answer(self, text: str) -> str:
-        match = re.search(r"FINAL ANSWER:\s*(.*)", text, re.IGNORECASE)
-        return match.group(1).strip() if match else text
-# ================ LÓGICA DE EJECUCIÓN ================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
-    if not profile:
-        return "Por favor inicia sesión primero", None
     try:
-        agent = GaiaAgent()
-        questions_url = f"{DEFAULT_API_URL}/questions"
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
-        answers = []
-        results_log = []
-        for item in questions_data:
-            task_id = item.get("task_id")
-            question_text = item.get("question")
-            if not task_id or not question_text:
-                continue
-            try:
-                answer = agent(question_text)
-                answers.append({"task_id": task_id, "submitted_answer": answer})
-                results_log.append({"Task ID": task_id, "Question": question_text, "Answer": answer})
-            except Exception as e:
-                results_log.append({"Task ID": task_id, "Question": question_text, "Answer": f"Error: {str(e)}"})
-        submission_data = {
-            "username": USERNAME,
-            "agent_code": AGENT_CODE,
-            "answers": answers
-        }
-        submit_response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
-        submit_response.raise_for_status()
-        result = submit_response.json()
-        status = (
-            f"¡Envío exitoso!\n"
-            f"Usuario: {result.get('username', '')}\n"
-            f"Puntaje: {result.get('score', 0)}%\n"
-            f"Mensaje: {result.get('message', '')}"
-        )
-        return status, pd.DataFrame(results_log)
     except Exception as e:
-        return f"Error crítico: {str(e)}", pd.DataFrame()
-# ================ INTERFAZ GRADIO ================
-with gr.Blocks() as demo:
-    gr.Markdown("# GAIA Agent - Evaluación Completa")
-    gr.Markdown("""
-    **Instrucciones:**
-    1. Inicia sesión con tu cuenta de Hugging Face
-    2. Haz clic en 'Ejecutar Evaluación'
-    3. Espera los resultados (puede tomar varios minutos)
-    """)
     gr.LoginButton()
-    run_btn = gr.Button("Ejecutar Evaluación", variant="primary")
-    status_output = gr.Textbox(label="Estado", interactive=False)
-    results_table = gr.DataFrame(label="Resultados Detallados", wrap=True)
-    run_btn.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import requests
 import pandas as pd
+import json
 import re
+import tempfile
+import logging
+from typing import List, Dict, Optional
 import numpy as np
+# Core ML/AI imports
+from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_openai import ChatOpenAI
+from langchain_core.tools import tool
 from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_experimental.tools import PythonREPLTool
+from langgraph.graph import StateGraph, START, END
+from langgraph.graph.message import add_messages
+from langgraph.prebuilt import ToolNode, tools_condition
 from langgraph.checkpoint.memory import MemorySaver
+from typing import TypedDict, Annotated, List as ListType
+# File processing
+import pandas as pd
+import wikipedia
+from youtube_transcript_api import YouTubeTranscriptApi
+import speech_recognition as sr
+# Computer vision (will be downloaded at runtime)
+try:
+    from ultralytics import YOLO
+    import cv2
+    import yt_dlp
+    VISION_AVAILABLE = True
+except ImportError:
+    VISION_AVAILABLE = False
+    print("⚠️ Vision libraries not available, will skip vision tasks")
+# OCR (optional)
+try:
+    import pytesseract
+    from PIL import Image
+    OCR_AVAILABLE = True
+except ImportError:
+    OCR_AVAILABLE = False
+# Silence verbose logging
+os.environ['ULTRALYTICS_VERBOSE'] = 'false'
+os.environ['YOLO_VERBOSE'] = 'false'
+logging.getLogger("ultralytics").setLevel(logging.ERROR)
+# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# System prompt for the agent
+SYSTEM_PROMPT = """You are a precision research assistant for the GAIA benchmark. Your mission is EXTREME ACCURACY.
 CRITICAL ANSWER FORMAT RULES:
+- ALWAYS end with: FINAL ANSWER: [answer]
+- READ THE QUESTION CAREFULLY - answer EXACTLY what is asked for, nothing more, nothing less
 SPECIFIC FORMATTING BY QUESTION TYPE:
+- Numbers: ONLY the number, no units, no text
+  Example: "FINAL ANSWER: 2" NOT "FINAL ANSWER: 2 albums"
+- First name only: ONLY the first name
+  Example: If person is "John Smith" → "FINAL ANSWER: John"
+- Country codes, IOC codes, abbreviations, symbols: ONLY the code/abbreviation, no country name or brackets
+  Example: If asked for IOC country code → "FINAL ANSWER: PHI" NOT "FINAL ANSWER: PHILIPPINES [PHI]"
+- Lists/Sets: Exactly as requested format
+  Example: "FINAL ANSWER: a, b, d, e" (comma-separated, alphabetical order)
 CRITICAL TOOL SELECTION:
+- Wikipedia questions → wikipedia_tool ONLY
+- File questions → file_analyzer_tool FIRST to inspect contents, then reason based on structure
+- Current events → web_search_tool ONLY
+- Mathematical analysis/calculations → wolfram_alpha_tool or python_repl_tool ONLY
+- Tables, matrices, systematic checking → python_repl_tool ONLY
 FOR MATHEMATICAL PROBLEMS:
+ALWAYS use python_repl_tool when:
+- Analyzing mathematical tables or matrices
+- Checking properties like commutativity, associativity
+- Systematic verification of mathematical statements
+- Complex calculations that need precision
+- ANY problem involving tables, sets, or systematic checking
 FILE HANDLING:
+- You HAVE the ability to read and analyze uploaded files
+- ALWAYS use file_analyzer_tool when questions mention files
+- The tool automatically finds and analyzes Excel, CSV, images, and audio files
+- For Excel/CSV: Returns columns, data types, sample rows, and numeric totals
+- NEVER say "I can't access files" - you CAN access them via file_analyzer_tool
+- Example: "The attached Excel file..." → Use file_analyzer_tool immediately
 REASONING PROCESS:
+1. Carefully read what the question is asking for
+2. Identify if it needs systematic/mathematical analysis
+3. Use appropriate tool (python_repl_tool for math problems)
+4. Extract ONLY the specific part requested
+5. Format according to the rules above
 """
+class GAIAAgent:
+    def __init__(self):
+        print("🚀 Initializing GAIA Agent...")
+        # API Keys from HF Secrets
+        self.openai_api_key = os.getenv("OPENAI_API_KEY")
+        self.tavily_api_key = os.getenv("TAVILY_API_KEY")
+        self.wolfram_api_key = os.getenv("WOLFRAM_API_KEY")
+        self.hf_token = os.getenv("HUGGING_FACE_API_TOKEN")
+        if not self.openai_api_key:
+            raise ValueError("OPENAI_API_KEY not found in environment variables")
+        # Initialize LLM
+        self.llm = ChatOpenAI(model="gpt-4-turbo", temperature=0.0, api_key=self.openai_api_key)
+        # Download and initialize YOLO model if vision is available
+        self.yolo_model = None
+        if VISION_AVAILABLE:
+            try:
+                print("📦 Downloading YOLO model...")
+                self.yolo_model = YOLO("yolov8x.pt")
+                print("✅ YOLO model ready")
+            except Exception as e:
+                print(f"⚠️ YOLO model failed to load: {e}")
+                self.yolo_model = None
+        # Setup tools
+        self.tools = self._setup_tools()
+        # Create agent runner
+        self.agent_runner = self._create_agent_runner()
+        print("✅ GAIA Agent initialized successfully!")
+    def _setup_tools(self):
+        """Setup all the tools for the agent"""
+        tools = []
+        # Wikipedia tool
+        @tool
+        def wikipedia_tool(query: str) -> str:
+            """Search Wikipedia for encyclopedic information"""
+            try:
+                wikipedia.set_lang("en")
+                summary = wikipedia.summary(query, sentences=3)
+                page = wikipedia.page(query)
+                return f"WIKIPEDIA: {page.title}\n\n{summary}\n\nURL: {page.url}"
+            except wikipedia.DisambiguationError as e:
+                summary = wikipedia.summary(e.options[0], sentences=3)
+                page = wikipedia.page(e.options[0])
+                return f"WIKIPEDIA: {page.title}\n\n{summary}\n\nURL: {page.url}"
+            except Exception as e:
+                return f"Wikipedia error: {str(e)}"
+        # Web search tool
+        @tool
+        def web_search_tool(query: str) -> str:
+            """Web search for current information"""
+            if not self.tavily_api_key:
+                return "Tavily API key not available"
+            try:
+                tavily_search = TavilySearchResults(api_key=self.tavily_api_key, max_results=5)
+                results = tavily_search.invoke(query)
+                formatted_results = []
+                for i, res in enumerate(results, 1):
+                    formatted_results.append(f"RESULT {i}:\nTitle: {res.get('title', 'N/A')}\nContent: {res.get('content', 'N/A')}")
+                return "\n\n".join(formatted_results)
+            except Exception as e:
+                return f"Search error: {str(e)}"
+        # Wolfram Alpha tool
+        @tool
+        def wolfram_alpha_tool(query: str) -> str:
+            """Use Wolfram Alpha for computational questions"""
+            if not self.wolfram_api_key:
+                return "Wolfram API key not available"
+            params = {
+                'appid': self.wolfram_api_key,
+                'input': query,
+                'format': 'plaintext',
+                'output': 'JSON'
+            }
+            try:
+                resp = requests.get("http://api.wolframalpha.com/v2/query", params=params, timeout=30)
+                resp.raise_for_status()
+                data = resp.json().get('queryresult', {})
+                if not data.get('success'):
+                    return f"Wolfram Alpha couldn't process: {query}"
+                results = []
+                for pod in data.get('pods', []):
+                    pod_title = pod.get('title', 'Unknown')
+                    for subpod in pod.get('subpods', []):
+                        plaintext = subpod.get('plaintext')
+                        if plaintext and plaintext.strip():
+                            results.append(f"{pod_title}: {plaintext}")
+                return " | ".join(results[:5]) if results else "No readable results"
+            except Exception as e:
+                return f"Wolfram Alpha error: {e}"
+        # File analyzer tool
+        @tool
+        def file_analyzer_tool(file_description: str = "uploaded file") -> str:
+            """Analyze uploaded files (Excel, CSV, images, audio)"""
+            try:
+                search_paths = ["./", "./uploads", "./files", "./data"]
+                data_exts = ['.xlsx', '.xls', '.csv']
+                found_files = []
+                for path in search_paths:
+                    if os.path.exists(path):
+                        for file in os.listdir(path):
+                            if any(file.lower().endswith(ext) for ext in data_exts):
+                                found_files.append(os.path.join(path, file))
+                if not found_files:
+                    return "No supported data files found"
+                results = []
+                for file_path in found_files:
+                    try:
+                        ext = os.path.splitext(file_path)[1].lower()
+                        if ext in ['.xlsx', '.xls']:
+                            df = pd.read_excel(file_path)
+                        elif ext == '.csv':
+                            df = pd.read_csv(file_path)
+                        else:
+                            continue
+                        result = f"📄 FILE: {file_path}\n"
+                        result += f"🔢 SHAPE: {df.shape}\n"
+                        result += f"🧠 COLUMNS: {list(df.columns)}\n"
+                        result += f"📊 FIRST 5 ROWS:\n{df.head().to_string(index=False)}\n"
+                        numeric_cols = df.select_dtypes(include=['number']).columns
+                        if len(numeric_cols) > 0:
+                            totals = df[numeric_cols].sum().round(2)
+                            result += f"💰 NUMERIC TOTALS:\n{totals.to_string()}\n"
+                        results.append(result)
+                    except Exception as e:
+                        results.append(f"Error processing {file_path}: {e}")
+                return "\n\n".join(results)
+            except Exception as e:
+                return f"File analysis error: {e}"
+        # Python REPL tool
+        python_repl_tool = PythonREPLTool()
+        tools.extend([
+            wikipedia_tool,
+            web_search_tool,
+            wolfram_alpha_tool,
+            file_analyzer_tool,
+            python_repl_tool
+        ])
+        return tools
+    def _create_agent_runner(self):
+        """Create the LangGraph agent runner"""
+        class AgentState(TypedDict):
+            messages: Annotated[ListType, add_messages]
+        model_with_tools = self.llm.bind_tools(self.tools)
         def agent_node(state):
+            messages = state['messages']
+            if not messages or not isinstance(messages[0], SystemMessage):
+                messages = [SystemMessage(content=SYSTEM_PROMPT)] + messages
+            response = model_with_tools.invoke(messages)
             return {"messages": [response]}
+        tool_node = ToolNode(self.tools)
+        builder = StateGraph(AgentState)
+        builder.add_node("agent", agent_node)
+        builder.add_node("tools", tool_node)
+        builder.add_edge(START, "agent")
+        builder.add_conditional_edges("agent", tools_condition, {"tools": "tools", END: END})
+        builder.add_edge("tools", "agent")
+        memory = MemorySaver()
+        return builder.compile(checkpointer=memory)
+    def _extract_final_answer(self, response_text: str) -> str:
+        """Extract the final answer from agent response"""
+        match = re.search(r"FINAL ANSWER:\s*(.*)", response_text, re.DOTALL | re.IGNORECASE)
+        if match:
+            raw_answer = match.group(1).strip()
+            if "\n" in raw_answer:
+                raw_answer = raw_answer.split("\n", 1)[0].strip()
+            if raw_answer.endswith('.') and not raw_answer[:-1].replace('.', '').isdigit():
+                raw_answer = raw_answer[:-1]
+            return raw_answer.strip()
+        lines = [line.strip() for line in response_text.strip().split('\n') if line.strip()]
+        return lines[-1] if lines else response_text.strip()
     def __call__(self, question: str) -> str:
+        """Main method called by Gradio interface"""
+        print(f"🤖 Processing question: {question[:100]}...")
         try:
+            config = {"configurable": {"thread_id": "gaia_session"}}
+            # Run the agent
+            final_state = None
+            max_iterations = 0
+            events = self.agent_runner.stream(
                 {"messages": [HumanMessage(content=question)]},
+                config=config,
+                stream_mode="values"
             )
+            for event in events:
+                final_state = event
+                max_iterations += 1
+                if max_iterations > 8:  # Prevent infinite loops
+                    break
+            if not final_state or not final_state['messages']:
+                return "Agent execution failed - no response generated"
+            last_message = final_state['messages'][-1]
+            full_response = last_message.content
+            print(f"📝 Agent response: {full_response[:200]}...")
+            # Extract final answer
+            final_answer = self._extract_final_answer(full_response)
+            print(f"🎯 Final answer: {final_answer}")
+            return final_answer
         except Exception as e:
+            print(f"❌ Error processing question: {e}")
+            import traceback
+            traceback.print_exc()
             return f"Error: {str(e)}"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the GAIA Agent on them, submits all answers,
+    and displays the results.
+    """
     space_id = os.getenv("SPACE_ID")
+    if profile:
+        username = f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate GAIA Agent
+    try:
+        agent = GAIAAgent()
+    except Exception as e:
+        print(f"Error instantiating GAIA agent: {e}")
+        return f"Error initializing GAIA agent: {e}", None
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(f"Agent code URL: {agent_code}")
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
+        if not questions_data:
+            return "Fetched questions list is empty.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except Exception as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    # 3. Run GAIA Agent on questions
+    results_log = []
+    answers_payload = []
+    print(f"Running GAIA agent on {len(questions_data)} questions...")
+    for i, item in enumerate(questions_data):
+        task_id = item.get("task_id")
+        question_text = item.get("question") or item.get("Question")
+        if not task_id or question_text is None:
+            print(f"Skipping item {i} with missing data")
+            continue
+        print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
+        try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": submitted_answer
+            })
+            print(f"✅ Question {i+1} completed: {submitted_answer}")
+        except Exception as e:
+            print(f"❌ Error on question {i+1}: {e}")
+            error_msg = f"AGENT ERROR: {str(e)}"
+            answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": error_msg
+            })
+    if not answers_payload:
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare and Submit
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload
+    }
+    print(f"Submitting {len(answers_payload)} answers...")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=120)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"🎉 Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("✅ Submission successful!")
+        return final_status, pd.DataFrame(results_log)
     except Exception as e:
+        error_msg = f"❌ Submission Failed: {str(e)}"
+        print(error_msg)
+        return error_msg, pd.DataFrame(results_log)
+# --- Build Gradio Interface ---
+with gr.Blocks(title="GAIA Agent Evaluation") as demo:
+    gr.Markdown("# 🤖 GAIA Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Advanced GAIA Benchmark Agent**
+        This agent uses:
+        - 🧠 GPT-4 Turbo with specialized tools
+        - 📚 Wikipedia search for encyclopedic information
+        - 🌐 Web search for current events
+        - 🧮 Wolfram Alpha for computational tasks
+        - 📊 File analysis for Excel/CSV data
+        - 🐍 Python REPL for mathematical analysis
+        - 🎯 Specialized prompt engineering for GAIA benchmark
+        **Instructions:**
+        1. Log in to your Hugging Face account
+        2. Click 'Run Evaluation & Submit All Answers'
+        3. Wait for processing (this may take several minutes)
+        ---
+        """
+    )
     gr.LoginButton()
+    run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
+    status_output = gr.Textbox(
+        label="📊 Run Status / Submission Result",
+        lines=8,
+        interactive=False
+    )
+    results_table = gr.DataFrame(
+        label="📝 Questions and Agent Answers",
+        wrap=True,
+        max_height=400
+    )
+    run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
+    print("\n" + "="*50)
+    print("🚀 GAIA Agent HuggingFace Space Starting")
+    print("="*50)
+    # Environment info
+    space_host = os.getenv("SPACE_HOST")
+    space_id = os.getenv("SPACE_ID")
+    if space_host:
+        print(f"✅ SPACE_HOST: {space_host}")
+        print(f"   Runtime URL: https://{space_host}.hf.space")
+    if space_id:
+        print(f"✅ SPACE_ID: {space_id}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id}")
+    print("="*50 + "\n")
+    print("🌟 Launching GAIA Agent Interface...")
+    demo.launch(debug=True, share=False)