Final_Assignment_Template

Sleeping

App Files Files Community

vissutagunawan commited on Jun 29, 2025

Commit

b419a9b

verified ·

1 Parent(s): 81917a3

Update app.py

Browse files

Files changed (1) hide show

app.py +423 -64

app.py CHANGED Viewed

@@ -4,31 +4,336 @@ import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
     def __init__(self):
-        print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -38,18 +343,22 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(agent_code)
     # 2. Fetch Questions
-    print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
@@ -57,61 +366,87 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         if not questions_data:
              print("Fetched questions list is empty.")
              return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
          print(f"Response text: {response.text[:500]}")
          return f"Error decoding server response for questions: {e}", None
     except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
     results_log = []
     answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
-        print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     # 5. Submit
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
         )
-        print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
@@ -119,52 +454,71 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
         except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
-with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
-        **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
@@ -172,25 +526,30 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)

 import inspect
 import pandas as pd
+# smolagents imports
+from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, tool
+import re
+from typing import Optional, Union, Any
+import json
+import csv
+import io
+import math
+import statistics
+# Additional imports for custom tools
+import base64
+from urllib.parse import urlparse
+import mimetypes
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Custom Tools for GAIA Tasks ---
+@tool
+def visit_webpage(url: str) -> str:
+    """Visits a webpage at the given URL and returns its content as text.
+    Args:
+        url: The URL of the webpage to visit
+    Returns:
+        The content of the webpage as text, or an error message if the request fails
+    """
+    try:
+        import requests
+        from bs4 import BeautifulSoup
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.content, 'html.parser')
+        # Remove script and style elements
+        for script in soup(["script", "style"]):
+            script.decompose()
+        # Get text content
+        text = soup.get_text()
+        # Clean up text
+        lines = (line.strip() for line in text.splitlines())
+        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+        text = ' '.join(chunk for chunk in chunks if chunk)
+        # Limit text length to avoid token limits
+        if len(text) > 8000:
+            text = text[:8000] + "... [Content truncated]"
+        return text
+    except Exception as e:
+        return f"Error visiting webpage: {str(e)}"
+@tool
+def calculate_math(expression: str) -> str:
+    """Safely evaluates mathematical expressions and performs calculations.
+    Args:
+        expression: A mathematical expression to evaluate (e.g., "2+2", "sqrt(16)", "log(100)")
+    Returns:
+        The result of the calculation or an error message
+    """
+    try:
+        import math
+        import re
+        # Clean the expression
+        expression = expression.strip()
+        # Replace common mathematical functions
+        expression = re.sub(r'\blog\b', 'math.log10', expression)
+        expression = re.sub(r'\bln\b', 'math.log', expression)
+        expression = re.sub(r'\bsqrt\b', 'math.sqrt', expression)
+        expression = re.sub(r'\bsin\b', 'math.sin', expression)
+        expression = re.sub(r'\bcos\b', 'math.cos', expression)
+        expression = re.sub(r'\btan\b', 'math.tan', expression)
+        expression = re.sub(r'\babs\b', 'abs', expression)
+        expression = re.sub(r'\bpi\b', 'math.pi', expression)
+        expression = re.sub(r'\be\b', 'math.e', expression)
+        # Define safe functions for eval
+        safe_dict = {
+            "__builtins__": {},
+            "math": math,
+            "abs": abs,
+            "round": round,
+            "min": min,
+            "max": max,
+            "sum": sum,
+            "len": len,
+            "pow": pow,
+        }
+        result = eval(expression, safe_dict)
+        return str(result)
+    except Exception as e:
+        return f"Error in calculation: {str(e)}"
+@tool
+def analyze_data(data: str, operation: str = "summary") -> str:
+    """Analyzes numerical data and performs statistical operations.
+    Args:
+        data: Comma-separated numerical data or JSON array
+        operation: Type of analysis ("summary", "mean", "median", "std", "count", "sum", "min", "max")
+    Returns:
+        The result of the data analysis
+    """
+    try:
+        import json
+        import statistics
+        # Parse the data
+        if data.startswith('[') and data.endswith(']'):
+            # JSON array format
+            numbers = json.loads(data)
+        else:
+            # Comma-separated format
+            numbers = [float(x.strip()) for x in data.split(',') if x.strip()]
+        if not numbers:
+            return "No valid numerical data provided"
+        if operation == "summary":
+            result = {
+                "count": len(numbers),
+                "sum": sum(numbers),
+                "mean": statistics.mean(numbers),
+                "median": statistics.median(numbers),
+                "min": min(numbers),
+                "max": max(numbers)
+            }
+            if len(numbers) > 1:
+                result["std"] = statistics.stdev(numbers)
+            return json.dumps(result, indent=2)
+        elif operation == "mean":
+            return str(statistics.mean(numbers))
+        elif operation == "median":
+            return str(statistics.median(numbers))
+        elif operation == "std":
+            return str(statistics.stdev(numbers)) if len(numbers) > 1 else "0"
+        elif operation == "count":
+            return str(len(numbers))
+        elif operation == "sum":
+            return str(sum(numbers))
+        elif operation == "min":
+            return str(min(numbers))
+        elif operation == "max":
+            return str(max(numbers))
+        else:
+            return f"Unknown operation: {operation}"
+    except Exception as e:
+        return f"Error in data analysis: {str(e)}"
+@tool
+def extract_numbers(text: str) -> str:
+    """Extracts all numbers from a text string.
+    Args:
+        text: Text containing numbers
+    Returns:
+        Comma-separated list of extracted numbers
+    """
+    try:
+        import re
+        # Pattern to match integers and floats (including negative numbers)
+        pattern = r'-?\d+(?:\.\d+)?'
+        numbers = re.findall(pattern, text)
+        if not numbers:
+            return "No numbers found in the text"
+        return ', '.join(numbers)
+    except Exception as e:
+        return f"Error extracting numbers: {str(e)}"
+@tool
+def count_items(text: str, item_type: str = "words") -> str:
+    """Counts different types of items in text.
+    Args:
+        text: The text to analyze
+        item_type: What to count ("words", "characters", "lines", "sentences")
+    Returns:
+        The count as a string
+    """
+    try:
+        if item_type == "words":
+            words = text.split()
+            return str(len(words))
+        elif item_type == "characters":
+            return str(len(text))
+        elif item_type == "lines":
+            lines = text.split('\n')
+            return str(len(lines))
+        elif item_type == "sentences":
+            import re
+            sentences = re.split(r'[.!?]+', text)
+            sentences = [s.strip() for s in sentences if s.strip()]
+            return str(len(sentences))
+        else:
+            return f"Unknown item type: {item_type}"
+    except Exception as e:
+        return f"Error counting items: {str(e)}"
+# --- Enhanced Agent Definition ---
+class GAIAAgent:
     def __init__(self):
+        print("GAIAAgent initializing with smolagents...")
+        # Initialize the model (using HuggingFace free inference API)
+        try:
+            self.model = HfApiModel()
+            print("✅ Model initialized successfully")
+        except Exception as e:
+            print(f"❌ Error initializing model: {e}")
+            # Fallback to a basic model
+            self.model = HfApiModel()
+        # Initialize tools
+        self.tools = [
+            DuckDuckGoSearchTool(),
+            visit_webpage,
+            calculate_math,
+            analyze_data,
+            extract_numbers,
+            count_items
+        ]
+        # Create the CodeAgent with enhanced capabilities
+        try:
+            self.agent = CodeAgent(
+                tools=self.tools,
+                model=self.model,
+                additional_authorized_imports=[
+                    'requests', 'bs4', 'json', 'csv', 'math', 'statistics',
+                    're', 'urllib.parse', 'base64', 'datetime', 'calendar'
+                ],
+                max_steps=10,  # Allow multiple reasoning steps
+                verbosity_level=1  # Reduce verbosity for cleaner output
+            )
+            print("✅ GAIA Agent initialized successfully with enhanced tools")
+        except Exception as e:
+            print(f"❌ Error initializing agent: {e}")
+            raise e
     def __call__(self, question: str) -> str:
+        """Process a question and return the answer."""
+        try:
+            print(f"🤖 Processing question: {question[:100]}...")
+            # Enhanced prompt with specific instructions for GAIA
+            enhanced_prompt = f"""You are a helpful AI assistant designed to answer questions accurately and concisely.
+IMPORTANT INSTRUCTIONS:
+1. Read the question carefully and understand what is being asked
+2. Use the available tools when you need external information or calculations
+3. For mathematical problems, use the calculate_math tool or write Python code
+4. For web searches, use DuckDuckGoSearchTool and visit_webpage when needed
+5. Break down complex problems into steps
+6. Give ONLY the final answer - no explanations, no "FINAL ANSWER:" prefix
+7. Be precise with numbers and dates
+8. If the answer is a number, return just the number
+9. If the answer is text, return just the text without quotes
+Question: {question}
+Answer:"""
+            # Run the agent
+            result = self.agent.run(enhanced_prompt)
+            # Clean up the result to ensure it's just the answer
+            if isinstance(result, str):
+                # Remove common prefixes and suffixes
+                result = result.strip()
+                # Remove "FINAL ANSWER:" if present
+                result = re.sub(r'^(FINAL\s*ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
+                result = re.sub(r'^(ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
+                result = re.sub(r'^(RESULT\s*:?\s*)', '', result, flags=re.IGNORECASE)
+                # Remove quotes if the entire answer is wrapped in quotes
+                if (result.startswith('"') and result.endswith('"')) or (result.startswith("'") and result.endswith("'")):
+                    result = result[1:-1]
+                result = result.strip()
+                print(f"✅ Agent response: {result}")
+                return result
+            else:
+                print(f"✅ Agent response: {str(result)}")
+                return str(result)
+        except Exception as e:
+            error_msg = f"Error processing question: {str(e)}"
+            print(f"❌ {error_msg}")
+            return error_msg
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
+    Fetches all questions, runs the GAIAAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Enhanced Agent
     try:
+        print("🚀 Initializing GAIA Agent with smolagents...")
+        agent = GAIAAgent()
+        print("✅ Enhanced agent ready for GAIA benchmark!")
     except Exception as e:
+        error_msg = f"Error initializing agent: {e}"
+        print(f"❌ {error_msg}")
+        return error_msg, None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(f"Agent code link: {agent_code}")
     # 2. Fetch Questions
+    print(f"📥 Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         if not questions_data:
              print("Fetched questions list is empty.")
              return "Fetched questions list is empty or invalid format.", None
+        print(f"✅ Fetched {len(questions_data)} questions from GAIA benchmark.")
     except requests.exceptions.RequestException as e:
+        print(f"❌ Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+         print(f"❌ Error decoding JSON response from questions endpoint: {e}")
          print(f"Response text: {response.text[:500]}")
          return f"Error decoding server response for questions: {e}", None
     except Exception as e:
+        print(f"❌ An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run Enhanced Agent
     results_log = []
     answers_payload = []
+    print(f"🤖 Running enhanced GAIA agent on {len(questions_data)} questions...")
+    for i, item in enumerate(questions_data, 1):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
+            print(f"⚠️ Skipping item with missing task_id or question: {item}")
             continue
+        print(f"\n📝 Processing question {i}/{len(questions_data)} (ID: {task_id})")
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": submitted_answer
+            })
+            print(f"✅ Answer for {task_id}: {submitted_answer}")
         except Exception as e:
+             error_msg = f"AGENT ERROR: {e}"
+             print(f"❌ Error running agent on task {task_id}: {e}")
+             answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
+             results_log.append({
+                 "Task ID": task_id,
+                 "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                 "Submitted Answer": error_msg
+             })
     if not answers_payload:
+        print("❌ Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"🚀 Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     # 5. Submit
+    print(f"📤 Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
+        score = result_data.get('score', 'N/A')
+        correct_count = result_data.get('correct_count', '?')
+        total_attempted = result_data.get('total_attempted', '?')
         final_status = (
+            f"🎉 Submission Successful!\n"
+            f"👤 User: {result_data.get('username')}\n"
+            f"📊 Overall Score: {score}% ({correct_count}/{total_attempted} correct)\n"
+            f"🎯 Target: >30% for certification\n"
+            f"💬 Message: {result_data.get('message', 'No message received.')}"
         )
+        if isinstance(score, (int, float)) and score >= 30:
+            final_status += f"\n🏆 CONGRATULATIONS! You've achieved the target score of 30%!"
+        elif isinstance(score, (int, float)):
+            final_status += f"\n📈 Keep improving! You need {30-score:.1f}% more to reach the target."
+        print("✅ Submission successful!")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
         except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"❌ Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.Timeout:
+        status_message = "❌ Submission Failed: The request timed out."
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.RequestException as e:
+        status_message = f"❌ Submission Failed: Network error - {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
+        status_message = f"❌ An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
+with gr.Blocks(title="GAIA Agent Evaluation") as demo:
+    gr.Markdown("# 🤖 Enhanced GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
+        **Enhanced Agent for GAIA Benchmark Certification**
+        This enhanced agent uses Hugging Face's **smolagents** framework with multiple specialized tools:
+        - 🔍 **Web Search**: DuckDuckGoSearchTool for finding information
+        - 🌐 **Web Scraping**: Custom webpage visitor for content extraction
+        - 🧮 **Mathematics**: Advanced calculation capabilities
+        - 📊 **Data Analysis**: Statistical analysis of numerical data
+        - 🔢 **Number Extraction**: Intelligent number parsing from text
+        - 📝 **Text Analysis**: Counting and text processing utilities
+        **Instructions:**
+        1. 🔄 **Clone this space** and customize the agent as needed
+        2. 🔑 **Log in** to your Hugging Face account using the button below
+        3. 🚀 **Click 'Run Evaluation'** to test your agent on GAIA benchmark questions
+        4. 🎯 **Target**: Score >30% for course certification
+        **Goal**: Answer GAIA level 1 validation questions with exact match precision.
         ---
+        ⚠️ **Note**: Processing all questions may take several minutes due to the complexity of reasoning required.
         """
     )
     gr.LoginButton()
+    run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary", size="lg")
+    status_output = gr.Textbox(
+        label="📊 Evaluation Status & Results",
+        lines=8,
+        interactive=False,
+        placeholder="Click the button above to start the evaluation..."
+    )
+    results_table = gr.DataFrame(
+        label="📋 Questions and Agent Responses",
+        wrap=True,
+        headers=["Task ID", "Question", "Submitted Answer"]
+    )
     run_button.click(
         fn=run_and_submit_all,
     )
 if __name__ == "__main__":
+    print("\n" + "="*60)
+    print("🤖 ENHANCED GAIA AGENT STARTING UP")
+    print("="*60)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   🌐 Runtime URL: https://{space_host_startup}.hf.space")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:
         print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   📁 Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   🔗 Code URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?).")
+    print("="*60)
+    print("🚀 Launching Enhanced GAIA Agent Interface...")
+    print("🎯 Target: >30% score on GAIA benchmark")
+    print("="*60 + "\n")
     demo.launch(debug=True, share=False)