Final_Assignment_Template

Sleeping

App Files Files Community

Gianluca Tessitore commited on Mar 10

Commit

bfff34c

1 Parent(s): 81917a3

upload Agent

Browse files

Files changed (3) hide show

.gitignore +4 -0
app.py +488 -73
requirements.txt +8 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+**/.vscode/
+.venv
+.claude
+.env

app.py CHANGED Viewed

@@ -1,34 +1,459 @@
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
-# (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
     def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -38,13 +463,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
@@ -55,24 +480,18 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
-    except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
@@ -80,23 +499,30 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
-        print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
-    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
@@ -110,51 +536,46 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
             error_json = e.response.json()
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except requests.exceptions.Timeout:
         status_message = "Submission Failed: The request timed out."
         print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except requests.exceptions.RequestException as e:
         status_message = f"Submission Failed: Network error - {e}"
         print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
-        """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
@@ -163,19 +584,14 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +599,13 @@ if __name__ == "__main__":
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import os
+import sys
+import json
+# Load .env file if present (local development)
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+import re
+import base64
+from io import StringIO
 import gradio as gr
 import requests
 import pandas as pd
+from huggingface_hub import InferenceClient
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Tool Functions ---
+def web_search(query: str, max_results: int = 5) -> str:
+    """Search the web using DuckDuckGo."""
+    try:
+        from ddgs import DDGS
+        with DDGS() as ddgs:
+            results = list(ddgs.text(query, max_results=max_results))
+        if not results:
+            return "No search results found."
+        output = []
+        for r in results:
+            output.append(
+                f"Title: {r.get('title', '')}\n"
+                f"URL: {r.get('href', '')}\n"
+                f"Snippet: {r.get('body', '')}"
+            )
+        return "\n\n".join(output)
+    except Exception as e:
+        return f"Search error: {e}"
+def visit_webpage(url: str) -> str:
+    """Fetch and return text content of a webpage."""
+    try:
+        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
+        response = requests.get(url, headers=headers, timeout=15)
+        response.raise_for_status()
+        try:
+            from bs4 import BeautifulSoup
+            soup = BeautifulSoup(response.text, "html.parser")
+            for tag in soup(["script", "style", "nav", "footer", "header"]):
+                tag.decompose()
+            text = soup.get_text(separator=" ", strip=True)
+        except ImportError:
+            text = re.sub(r"<[^>]+>", " ", response.text)
+            text = re.sub(r"\s+", " ", text).strip()
+        return text[:12000]
+    except Exception as e:
+        return f"Error visiting webpage: {e}"
+def wikipedia_search(query: str) -> str:
+    """Search Wikipedia for information about a topic."""
+    try:
+        # Try direct page summary
+        encoded = requests.utils.quote(query.replace(" ", "_"))
+        url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded}"
+        resp = requests.get(url, timeout=10)
+        if resp.status_code == 200:
+            data = resp.json()
+            extract = data.get("extract", "")
+            if extract:
+                return f"{data.get('title', '')}: {extract}"
+        # Fallback: use search API
+        search_url = "https://en.wikipedia.org/w/api.php"
+        params = {
+            "action": "query", "list": "search",
+            "srsearch": query, "format": "json",
+            "srlimit": 3, "srprop": "snippet",
+        }
+        resp = requests.get(search_url, params=params, timeout=10)
+        data = resp.json()
+        results = data.get("query", {}).get("search", [])
+        if not results:
+            return "No Wikipedia results found."
+        # Get summary of first result
+        title = results[0].get("title", "")
+        encoded2 = requests.utils.quote(title.replace(" ", "_"))
+        resp2 = requests.get(
+            f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded2}", timeout=10
+        )
+        if resp2.status_code == 200:
+            d = resp2.json()
+            return f"{d.get('title', '')}: {d.get('extract', '')}"
+        return "\n".join(r.get("snippet", "") for r in results)
+    except Exception as e:
+        return f"Wikipedia error: {e}"
+def python_interpreter(code: str) -> str:
+    """Execute Python code and return its printed output."""
+    old_stdout = sys.stdout
+    sys.stdout = buffer = StringIO()
+    try:
+        exec_globals: dict = {}
+        exec(code, exec_globals)  # noqa: S102
+        output = buffer.getvalue()
+        return output if output else "Executed successfully (no output)."
+    except Exception as e:
+        return f"Error: {type(e).__name__}: {e}"
+    finally:
+        sys.stdout = old_stdout
+def download_task_file(task_id: str) -> str:
+    """Download the file associated with a task and return its content."""
+    try:
+        url = f"{DEFAULT_API_URL}/files/{task_id}"
+        resp = requests.get(url, timeout=30)
+        resp.raise_for_status()
+        content_type = resp.headers.get("content-type", "")
+        filename = ""
+        if "content-disposition" in resp.headers:
+            cd = resp.headers["content-disposition"]
+            m = re.search(r'filename=["\']?([^"\';\n]+)', cd)
+            if m:
+                filename = m.group(1).strip()
+        # Determine type by content-type or filename extension
+        is_csv = "text/csv" in content_type or filename.endswith(".csv")
+        is_excel = filename.endswith((".xlsx", ".xls")) or "spreadsheet" in content_type
+        is_image = "image/" in content_type or filename.endswith(
+            (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp")
+        )
+        is_python = filename.endswith(".py")
+        if is_image:
+            media_type = content_type.split(";")[0].strip() or "image/png"
+            img_b64 = base64.b64encode(resp.content).decode()
+            # Special prefix parsed by the agent to pass as vision content
+            return f"IMAGE:{media_type}:{img_b64}"
+        if is_csv:
+            try:
+                import io
+                df = pd.read_csv(io.StringIO(resp.text))
+                return (
+                    f"CSV file: {len(df)} rows × {len(df.columns)} columns.\n"
+                    f"Columns: {list(df.columns)}\n\n"
+                    f"{df.head(20).to_string()}"
+                )
+            except Exception:
+                return resp.text[:5000]
+        if is_excel:
+            try:
+                import io
+                df = pd.read_excel(io.BytesIO(resp.content))
+                return (
+                    f"Excel file: {len(df)} rows × {len(df.columns)} columns.\n"
+                    f"Columns: {list(df.columns)}\n\n"
+                    f"{df.head(20).to_string()}"
+                )
+            except Exception as e:
+                return f"Excel file could not be parsed: {e}"
+        is_audio = filename.endswith((".mp3", ".wav", ".ogg", ".flac", ".m4a")) or "audio/" in content_type
+        if is_audio:
+            try:
+                asr_client = InferenceClient(api_key=os.environ["HF_TOKEN"])
+                transcript = asr_client.automatic_speech_recognition(
+                    audio=resp.content,
+                    model="openai/whisper-large-v3",
+                )
+                text_result = transcript.text if hasattr(transcript, "text") else str(transcript)
+                return f"Audio transcript:\n{text_result}"
+            except Exception as e:
+                return f"Audio file (transcription failed: {e}). File size: {len(resp.content)} bytes."
+        if is_python:
+            return f"Python file:\n```python\n{resp.text[:4000]}\n```"
+        # Default: try to decode as text
+        try:
+            return resp.content.decode("utf-8")[:6000]
+        except Exception:
+            return f"Binary file ({len(resp.content)} bytes, type: {content_type})"
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 404:
+            return "No file associated with this task."
+        return f"Error downloading file: {e}"
+    except Exception as e:
+        return f"Error: {e}"
+# --- Agent Definition ---
+class GAIAAgent:
+    """
+    ReAct-style agent using plain chat completions (no native tool-calling API).
+    Works with any instruction-following model on HF's free serverless inference.
+    """
+    SYSTEM_PROMPT = """You are an expert AI assistant solving questions from the GAIA benchmark.
+You have access to these tools:
+- web_search(query): Search the web via DuckDuckGo for current facts, people, events, statistics.
+- visit_webpage(url): Fetch and read the text content of a specific webpage.
+- wikipedia_search(query): Search Wikipedia for background information on a topic.
+- python_interpreter(code): Execute Python code. Always use print() to output results.
+- download_task_file(task_id): Download the file attached to the current task (image, CSV, Excel, text, etc.).
+Use this EXACT format for every step:
+Thought: [your reasoning]
+Action: [tool_name]
+Action Input: {"key": "value"}
+After receiving the Observation, continue with more Thought/Action steps.
+When you have the final answer, write:
+Thought: I now know the final answer.
+Final Answer: [exact answer]
+Important rules:
+- "Final Answer:" must contain ONLY the bare answer — no explanation, no "FINAL ANSWER:" prefix.
+- Numbers: exact format as requested (integer, decimal, etc.).
+- Names: exact spelling as they appear in authoritative sources.
+- Lists: comma-separated values unless another format is specified.
+- Always use a tool to verify facts rather than relying on memory."""
+    MODEL = "moonshotai/Kimi-K2.5:cheapest"
     def __init__(self):
+        self.client = InferenceClient(
+            api_key=os.environ["HF_TOKEN"],
+        )
+        print("GAIAAgent initialized.")
+    @staticmethod
+    def _strip_think(text: str) -> str:
+        """Remove <think>…</think> reasoning blocks (DeepSeek-R1 / o1-style)."""
+        return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
+    def _run_tool(self, name: str, tool_input: dict) -> str:
+        """Execute a named tool and return its result as a string."""
+        import time
+        t0 = time.time()
+        try:
+            if name == "web_search":
+                query = tool_input.get("query", "")
+                if not query:
+                    return "Error: 'query' parameter is required."
+                return web_search(query)
+            if name == "visit_webpage":
+                url = tool_input.get("url", "")
+                if not url or not url.startswith("http"):
+                    print(f"    [TOOL ERROR] visit_webpage called with invalid url: {url!r}")
+                    return "Error: valid 'url' parameter is required."
+                return visit_webpage(url)
+            if name == "wikipedia_search":
+                query = tool_input.get("query", "")
+                if not query:
+                    return "Error: 'query' parameter is required."
+                return wikipedia_search(query)
+            if name == "python_interpreter":
+                code = tool_input.get("code", "")
+                if not code:
+                    print(f"    [TOOL ERROR] python_interpreter called with empty code. Full input: {tool_input!r}")
+                    return "Error: 'code' parameter is required."
+                return python_interpreter(code)
+            if name == "download_task_file":
+                return download_task_file(tool_input.get("task_id", ""))
+            print(f"    [TOOL ERROR] Unknown tool called: {name!r}")
+            return f"Unknown tool: {name}"
+        except Exception as e:
+            print(f"    [TOOL EXCEPTION] {name} raised {type(e).__name__}: {e}")
+            return f"Tool error: {e}"
+        finally:
+            print(f"    [TOOL TIMING] {name} completed in {time.time() - t0:.2f}s")
+    @staticmethod
+    def _extract_json(text: str, start: int) -> dict:
+        """
+        Extract a JSON object starting at `start` (which must be '{') by
+        counting braces — handles nested dicts/code strings safely.
+        """
+        depth = 0
+        in_string = False
+        escape = False
+        for i in range(start, len(text)):
+            ch = text[i]
+            if escape:
+                escape = False
+                continue
+            if ch == "\\" and in_string:
+                escape = True
+                continue
+            if ch == '"':
+                in_string = not in_string
+                continue
+            if in_string:
+                continue
+            if ch == "{":
+                depth += 1
+            elif ch == "}":
+                depth -= 1
+                if depth == 0:
+                    raw = text[start : i + 1]
+                    try:
+                        return json.loads(raw)
+                    except json.JSONDecodeError as e:
+                        print(f"    [PARSE ERROR] JSON decode failed: {e} | raw={raw[:200]!r}")
+                        return {}
+        print(f"    [PARSE ERROR] Unmatched braces — no closing '}}' found from pos {start}")
+        return {}
+    def _parse_action(self, text: str):
+        """
+        Return (tool_name, tool_input_dict) for the last Action block in text,
+        or (None, None) if none is found.
+        """
+        action_matches = list(re.finditer(r"Action:\s*(\w+)", text))
+        if not action_matches:
+            return None, None
+        tool_name = action_matches[-1].group(1).strip()
+        tool_input: dict = {}
+        ai_matches = list(re.finditer(r"Action Input:\s*", text))
+        if not ai_matches:
+            print(f"    [PARSE WARN] Action '{tool_name}' found but no 'Action Input:' block.")
+        else:
+            pos = ai_matches[-1].end()
+            if pos < len(text) and text[pos] == "{":
+                tool_input = self._extract_json(text, pos)
+                if not tool_input:
+                    print(f"    [PARSE WARN] Action Input for '{tool_name}' parsed as empty dict.")
+            else:
+                snippet = text[pos : pos + 80].replace("\n", "\\n")
+                print(f"    [PARSE WARN] Action Input for '{tool_name}' does not start with '{{': {snippet!r}")
+        return tool_name, tool_input
+    def __call__(self, question: str, task_id: str = None) -> str:
+        import time
+        print(f"\nAgent processing task {task_id}: {question[:80]}...")
+        user_content = f"Task ID: {task_id}\n\nQuestion: {question}" if task_id else question
+        messages = [
+            {"role": "system", "content": self.SYSTEM_PROMPT},
+            {"role": "user", "content": user_content},
+        ]
+        for iteration in range(15):
+            t_llm = time.time()
+            response = None
+            for attempt in range(3):
+                try:
+                    response = self.client.chat.completions.create(
+                        model=self.MODEL,
+                        messages=messages,
+                        max_tokens=4096,
+                        temperature=0.1,
+                    )
+                    break
+                except Exception as e:
+                    is_retryable = any(code in str(e) for code in ("504", "502", "503", "429"))
+                    print(f"  [{iteration}] [LLM ERROR attempt {attempt+1}/3] {type(e).__name__}: {str(e)[:120]}")
+                    if is_retryable and attempt < 2:
+                        wait = 15 * (attempt + 1)
+                        print(f"  [{iteration}] Retrying in {wait}s...")
+                        time.sleep(wait)
+                    else:
+                        raise
+            if response is None:
+                raise RuntimeError("LLM returned no response after retries")
+            llm_elapsed = time.time() - t_llm
+            raw_output = (response.choices[0].message.content or "").strip()
+            think_stripped = len(raw_output) - len(self._strip_think(raw_output))
+            output = self._strip_think(raw_output)
+            usage = response.usage
+            print(
+                f"  [{iteration}] LLM {llm_elapsed:.1f}s | "
+                f"tokens in={getattr(usage, 'prompt_tokens', '?')} "
+                f"out={getattr(usage, 'completion_tokens', '?')} | "
+                f"think_stripped={think_stripped}chars"
+            )
+            print(f"  [{iteration}] Model output: {output[:300]}{'...' if len(output) > 300 else ''}")
+            # ── Final answer found (must be at line start, not inside code/JSON) ──
+            fa_match = re.search(r"(?:^|\n)Final Answer:\s*(.+?)(?:\n|$)", output)
+            if fa_match:
+                answer = fa_match.group(1).strip()
+                print(f"  [{iteration}] => Final Answer: {answer!r}")
+                return answer
+            # ── Tool call found ──
+            tool_name, tool_input = self._parse_action(output)
+            if tool_name:
+                print(f"  [{iteration}] Tool call: {tool_name}({json.dumps(tool_input)[:200]})")
+                result = self._run_tool(tool_name, tool_input)
+                result_preview = result[:200].replace("\n", " ")
+                print(f"  [{iteration}] Tool result ({len(result)} chars): {result_preview}{'...' if len(result) > 200 else ''}")
+                messages.append({"role": "assistant", "content": raw_output})
+                if result.startswith("IMAGE:"):
+                    parts = result.split(":", 2)
+                    media_type, img_b64 = parts[1], parts[2]
+                    print(f"  [{iteration}] Image received: type={media_type}, size={len(img_b64)} b64 chars")
+                    messages.append({
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "Observation: Here is the downloaded image. Analyse it to answer the question."},
+                            {"type": "image_url", "image_url": {"url": f"data:{media_type};base64,{img_b64}"}},
+                        ],
+                    })
+                else:
+                    messages.append({
+                        "role": "user",
+                        "content": f"Observation: {result[:6000]}",
+                    })
+            else:
+                print(f"  [{iteration}] No tool call and no Final Answer — prompting model to conclude.")
+                messages.append({"role": "assistant", "content": raw_output})
+                messages.append({
+                    "role": "user",
+                    "content": (
+                        "You haven't provided a Final Answer yet. "
+                        "Please conclude with:\nFinal Answer: [answer]"
+                    ),
+                })
+        print(f"  [MAX ITERATIONS] Reached iteration limit for task {task_id}.")
+        return "Unable to determine answer."
+# --- Gradio App ---
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
+    Fetches all questions, runs the GAIAAgent on them, submits all answers,
     and displays the results.
     """
+    space_id = os.getenv("SPACE_ID")
     if profile:
+        username = profile.username
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
+        agent = GAIAAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         return f"Error fetching questions: {e}", None
     except Exception as e:
         return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text, task_id=task_id)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text,
+                "Submitted Answer": submitted_answer,
+            })
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text,
+                "Submitted Answer": f"AGENT ERROR: {e}",
+            })
     if not answers_payload:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Submit
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload,
+    }
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
+        return final_status, pd.DataFrame(results_log)
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
             error_json = e.response.json()
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except Exception:
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         print(status_message)
+        return status_message, pd.DataFrame(results_log)
     except requests.exceptions.Timeout:
         status_message = "Submission Failed: The request timed out."
         print(status_message)
+        return status_message, pd.DataFrame(results_log)
     except requests.exceptions.RequestException as e:
         status_message = f"Submission Failed: Network error - {e}"
         print(status_message)
+        return status_message, pd.DataFrame(results_log)
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
+        return status_message, pd.DataFrame(results_log)
+# --- Build Gradio Interface ---
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner")
     gr.Markdown(
+        f"""
         **Instructions:**
+        1. Log in to your Hugging Face account using the button below.
+        2. Click **Run Evaluation & Submit All Answers** to fetch questions, run the agent, submit answers, and see the score.
         ---
+        **Notes:**
+        - The agent uses models via HF InferenceClient (provider=auto) with a ReAct loop: web search, Wikipedia, Python interpreter, and file download tools.
+        - Targets ≥30% on GAIA level-1 questions.
+        - Submission can take several minutes while the agent processes each question.
         """
     )
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
+    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-" * (60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for GAIA Agent Evaluation...")
+    demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,2 +1,9 @@
 gradio
-requests

 gradio
+gradio[oauth]
+requests
+pandas
+huggingface_hub
+ddgs
+beautifulsoup4
+openpyxl
+python-dotenv