Final_Assignment_Template

Sleeping

App Files Files Community

SantoshKumar1310 commited on Oct 27, 2025

Commit

6bfe482

verified ·

1 Parent(s): 82eec24

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -496

app.py CHANGED Viewed

@@ -2,551 +2,128 @@ import os
 import gradio as gr
 import requests
 import pandas as pd
-import re
-from typing import Dict, List, Any, Optional
-import json
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Enhanced GAIA Agent ---
-class GAIAAgent:
-    """
-    Enhanced agent optimized for GAIA Level 1 questions.
-    Targets 30%+ accuracy through multi-tool integration.
-    """
     def __init__(self):
-        print("✅ GAIA Agent initialized with enhanced capabilities.")
-        self.api_url = DEFAULT_API_URL
-    def __call__(self, question: str, task_id: str = None) -> str:
-        """
-        Main entry point - processes a question and returns a precise answer.
-        """
-        print(f"\n{'='*60}")
-        print(f"🧠 Processing Task: {task_id}")
-        print(f"📝 Question: {question[:100]}...")
-        print(f"{'='*60}")
-        try:
-            # Step 1: Classify question type
-            q_type = self._classify_question(question)
-            print(f"📊 Question Type: {q_type}")
-            # Step 2: Route to specialized handler
-            answer = self._route_to_handler(question, q_type, task_id)
-            # Step 3: Clean and format answer
-            final_answer = self._clean_answer(answer, question)
-            print(f"✅ Final Answer: {final_answer}")
-            return final_answer
-        except Exception as e:
-            print(f"❌ Error: {e}")
-            # Return a safe fallback
-            return "Unable to determine answer"
-    def _classify_question(self, question: str) -> str:
-        """Classify question to route to appropriate handler"""
-        q_lower = question.lower()
-        # Math/calculation questions
-        if any(word in q_lower for word in ["calculate", "sum", "total", "multiply", "divide", "average", "mean"]):
-            return "math"
-        # Questions with numbers/operators
-        if any(op in question for op in ["+", "-", "×", "÷", "*", "/"]) and any(c.isdigit() for c in question):
-            return "math"
-        # Counting questions
-        if any(word in q_lower for word in ["how many", "count", "number of"]):
-            return "counting"
-        # Date/time questions
-        if any(word in q_lower for word in ["year", "date", "when", "month", "day"]):
-            return "date"
-        # Location questions
-        if any(word in q_lower for word in ["where", "location", "city", "country", "capital"]):
-            return "location"
-        # Definition/what is questions
-        if q_lower.startswith("what is") or q_lower.startswith("what's"):
-            return "definition"
-        # Who questions
-        if q_lower.startswith("who"):
-            return "person"
-        # File-based questions
-        if any(word in q_lower for word in ["file", "document", "image", "picture", "photo"]):
-            return "file"
-        return "general"
-    def _route_to_handler(self, question: str, q_type: str, task_id: str) -> str:
-        """Route question to appropriate specialized handler"""
-        if q_type == "math":
-            return self._handle_math(question)
-        elif q_type == "counting":
-            return self._handle_counting(question)
-        elif q_type == "date":
-            return self._handle_date(question)
-        elif q_type == "location":
-            return self._handle_location(question)
-        elif q_type == "definition":
-            return self._handle_definition(question)
-        elif q_type == "person":
-            return self._handle_person(question)
-        elif q_type == "file":
-            return self._handle_file(question, task_id)
-        else:
-            return self._handle_general(question)
-    def _handle_math(self, question: str) -> str:
-        """Handle mathematical calculations"""
-        try:
-            # Extract numbers
-            numbers = re.findall(r'-?\d+\.?\d*', question)
-            if not numbers:
-                return "0"
-            nums = [float(n) for n in numbers]
-            q_lower = question.lower()
-            # Detect operation
-            if "sum" in q_lower or "total" in q_lower or "+" in question or "add" in q_lower:
-                result = sum(nums)
-            elif "difference" in q_lower or "-" in question or "subtract" in q_lower:
-                result = nums[0] - sum(nums[1:]) if len(nums) > 1 else nums[0]
-            elif "product" in q_lower or "*" in question or "×" in question or "multiply" in q_lower:
-                result = 1
-                for n in nums:
-                    result *= n
-            elif "divide" in q_lower or "/" in question or "÷" in question:
-                result = nums[0] / nums[1] if len(nums) >= 2 and nums[1] != 0 else nums[0]
-            elif "average" in q_lower or "mean" in q_lower:
-                result = sum(nums) / len(nums)
-            else:
-                # Try to evaluate the expression safely
-                expr = re.sub(r'[^0-9+\-*/().\s]', '', question)
-                result = eval(expr, {"__builtins__": {}}, {})
-            # Format result
-            if result == int(result):
-                return str(int(result))
-            else:
-                return f"{result:.2f}"
-        except Exception as e:
-            print(f"Math error: {e}")
-            return "0"
-    def _handle_counting(self, question: str) -> str:
-        """Handle counting questions"""
-        # Extract the first number found (often the answer)
-        numbers = re.findall(r'\d+', question)
-        return numbers[0] if numbers else "0"
-    def _handle_date(self, question: str) -> str:
-        """Handle date/year questions"""
-        # Look for 4-digit years
-        years = re.findall(r'\b(19|20)\d{2}\b', question)
-        if years:
-            return years[0]
-        # Look for dates
-        dates = re.findall(r'\b\d{1,2}/\d{1,2}/\d{4}\b', question)
-        if dates:
-            return dates[0]
-        return "Unknown"
-    def _handle_location(self, question: str) -> str:
-        """Handle location questions using knowledge base"""
-        q_lower = question.lower()
-        # Common capitals and locations
-        location_kb = {
-            "france": "Paris",
-            "paris": "France",
-            "england": "London",
-            "london": "England",
-            "usa": "Washington D.C.",
-            "united states": "Washington D.C.",
-            "japan": "Tokyo",
-            "tokyo": "Japan",
-            "germany": "Berlin",
-            "berlin": "Germany",
-            "italy": "Rome",
-            "rome": "Italy",
-            "spain": "Madrid",
-            "madrid": "Spain",
-        }
-        for key, value in location_kb.items():
-            if key in q_lower:
-                return value
-        return "Unknown"
-    def _handle_definition(self, question: str) -> str:
-        """Handle 'What is' questions"""
-        # Extract the subject
-        match = re.search(r"what (?:is|was|are) (?:the |an? )?(.+?)(?:\?|$)", question, re.IGNORECASE)
-        if match:
-            subject = match.group(1).strip()
-            return f"{subject}"
-        return "Unknown"
-    def _handle_person(self, question: str) -> str:
-        """Handle 'Who' questions using knowledge base"""
-        q_lower = question.lower()
-        # Famous people knowledge base
-        people_kb = {
-            "romeo and juliet": "William Shakespeare",
-            "hamlet": "William Shakespeare",
-            "mona lisa": "Leonardo da Vinci",
-            "starry night": "Vincent van Gogh",
-            "theory of relativity": "Albert Einstein",
-            "evolution": "Charles Darwin",
-            "telephone": "Alexander Graham Bell",
-            "light bulb": "Thomas Edison",
-            "first president": "George Washington",
-        }
-        for key, value in people_kb.items():
-            if key in q_lower:
-                return value
-        return "Unknown"
-    def _handle_file(self, question: str, task_id: str) -> str:
-        """Handle questions that require file access"""
-        if not task_id:
-            return "No file available"
-        try:
-            # Download the file from API
-            file_url = f"{self.api_url}/files/{task_id}"
-            print(f"📥 Downloading file from: {file_url}")
-            response = requests.get(file_url, timeout=30)
-            if response.status_code == 200:
-                # Process file based on type
-                content_type = response.headers.get('Content-Type', '')
-                if 'text' in content_type or 'json' in content_type:
-                    # Text-based file
-                    content = response.text
-                    return self._analyze_text_file(content, question)
-                elif 'image' in content_type:
-                    # Image file
-                    return "Image analysis not implemented"
-                else:
-                    return "Unknown file type"
-            else:
-                print(f"File download failed: {response.status_code}")
-                return "File not found"
-        except Exception as e:
-            print(f"File handling error: {e}")
-            return "File processing failed"
-    def _analyze_text_file(self, content: str, question: str) -> str:
-        """Analyze text file content to answer question"""
-        q_lower = question.lower()
-        # Counting items in file
-        if "how many" in q_lower:
-            lines = content.strip().split('\n')
-            return str(len(lines))
-        # Finding specific text
-        if "find" in q_lower or "search" in q_lower:
-            # Extract search term
-            match = re.search(r"(?:find|search for) ['\"](.+?)['\"]", question, re.IGNORECASE)
-            if match:
-                term = match.group(1)
-                if term in content:
-                    return "Found"
-                else:
-                    return "Not found"
-        # Return first line as fallback
-        lines = content.strip().split('\n')
-        return lines[0] if lines else "Empty file"
-    def _handle_general(self, question: str) -> str:
-        """Handle general questions with basic reasoning"""
-        # Try to extract any numbers or dates
-        numbers = re.findall(r'\d+', question)
-        if numbers:
-            return numbers[0]
-        # Look for yes/no questions
-        if question.strip().endswith('?') and any(word in question.lower() for word in ['is', 'are', 'was', 'were', 'can', 'could', 'will', 'would']):
-            return "Yes"
-        return "Unable to determine"
-    def _clean_answer(self, answer: str, question: str) -> str:
-        """
-        Clean and format answer according to GAIA requirements.
-        GAIA requires exact matches, so formatting is critical.
-        """
-        # Remove extra whitespace
-        answer = answer.strip()
-        # Remove "The answer is" or similar phrases
-        answer = re.sub(r'^(?:the answer is|it is|result is)[:\s]+', '', answer, flags=re.IGNORECASE)
-        # Remove trailing punctuation (except for decimals)
-        answer = re.sub(r'[.!?,;]+$', '', answer)
-        # Handle comma-separated lists
-        if "comma-separated" in question.lower() or "list" in question.lower():
-            # Ensure proper comma-space formatting
-            answer = re.sub(r'\s*,\s*', ', ', answer)
-        # Handle number formatting
-        if re.match(r'^-?\d+\.?\d*$', answer):
-            # It's a number
-            num = float(answer)
-            # If it's a whole number, format without decimals
-            if num == int(num):
-                answer = str(int(num))
-            else:
-                # Keep minimal decimal places
-                answer = f"{num:.10g}"
-        return answer
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """
-    Fetch all questions, run the agent, submit answers, and show results.
-    """
-    space_id = os.getenv("SPACE_ID")
     if profile:
-        username = profile.username
-        print(f"👤 User logged in: {username}")
     else:
-        print("❌ User not logged in.")
-        return "❌ Please login to Hugging Face first.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # Create Agent
     try:
-        agent = GAIAAgent()
     except Exception as e:
-        return f"❌ Agent initialization failed: {e}", None
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "No_Space_ID"
-    print(f"📁 Agent code link: {agent_code}")
-    # Fetch Questions
     try:
-        print("📡 Fetching questions from API...")
-        response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-            return "⚠️ No questions received from API.", None
-        print(f"✅ Retrieved {len(questions_data)} questions.")
-    except requests.exceptions.RequestException as e:
-        return f"❌ Error fetching questions: {e}\n\nPlease check if the API is available.", None
-    # Run Agent on all questions
     results_log = []
     answers_payload = []
-    print(f"\n🤖 Running agent on {len(questions_data)} questions...\n")
-    for i, item in enumerate(questions_data, 1):
         task_id = item.get("task_id")
         question_text = item.get("question")
-        if not task_id or not question_text:
             continue
         try:
-            print(f"\n[{i}/{len(questions_data)}] Processing: {task_id}")
-            submitted_answer = agent(question_text, task_id)
-            answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": submitted_answer
-            })
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
-                "Your Answer": submitted_answer
-            })
         except Exception as e:
-            error_msg = f"ERROR: {e}"
-            print(f"❌ {error_msg}")
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
-                "Your Answer": error_msg
-            })
     if not answers_payload:
-        return "⚠️ No answers generated.", pd.DataFrame(results_log)
-    results_df = pd.DataFrame(results_log)
-    # Submit Answers
-    submission_data = {
-        "username": username.strip(),
-        "agent_code": agent_code,
-        "answers": answers_payload
-    }
     try:
-        print(f"\n📤 Submitting {len(answers_payload)} answers to API...")
-        response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
-        score = result_data.get('score', 0)
-        correct = result_data.get('correct_count', 0)
-        total = result_data.get('total_attempted', len(answers_payload))
-        # Determine emoji based on score
-        if score >= 30:
-            emoji = "🎉🏆"
-        elif score >= 20:
-            emoji = "🎯"
-        elif score >= 10:
-            emoji = "📈"
-        else:
-            emoji = "💪"
         final_status = (
-            f"{emoji} Submission Complete!\n\n"
-            f"👤 Username: {result_data.get('username')}\n"
-            f"🏁 Score: {score}% ({correct}/{total} correct)\n"
-            f"📊 Target: 30% for certification\n\n"
-            f"📝 {result_data.get('message', '')}\n\n"
-            f"🔗 Check the leaderboard: https://huggingface.co/spaces/agents-course/agents-course-unit4-leaderboard"
         )
-        return final_status, results_df
-    except requests.exceptions.RequestException as e:
-        return f"❌ Submission failed: {e}\n\n✅ Generated {len(answers_payload)} answers (see table)", results_df
-# --- Gradio Interface ---
-with gr.Blocks(theme=gr.themes.Soft(), title="GAIA Agent Evaluation") as demo:
-    gr.Markdown(
-        """
-        # 🤖 GAIA Agent Evaluation System
-        ### 🎯 Goal: Achieve 30%+ accuracy on GAIA Level 1 questions
-        This agent evaluates your AI assistant on 20 carefully selected questions from GAIA's validation set.
-        The questions test reasoning, calculation, factual knowledge, and tool usage.
-        ---
-        ### 📋 How to Submit:
-        1. **Clone this Space** to your Hugging Face profile
-        2. **Keep your Space public** (required for leaderboard verification)
-        3. **Login** using the button below
-        4. **Click "Run Evaluation"** and wait for results
-        5. **Check your score** on the [leaderboard](https://huggingface.co/spaces/agents-course/agents-course-unit4-leaderboard)
-        ---
-        ### 💡 Tips for Improvement:
-        - Study the question types and patterns
-        - Add web search capabilities (DuckDuckGo, Wikipedia)
-        - Implement better answer formatting
-        - Test individual questions using `/random-question` endpoint
-        - Focus on precise, exact-match answers
-        ---
-        ### ⚠️ Important Notes:
-        - Processing takes 2-5 minutes (20 questions)
-        - Answers must be **exact matches** (case-sensitive, format-sensitive)
-        - Keep your Space public for leaderboard verification
-        - The SPACE_ID environment variable is set automatically by HF Spaces
-        """
-    )
-    with gr.Row():
-        gr.LoginButton()
-    gr.Markdown("---")
-    run_button = gr.Button(
-        "🚀 Run Evaluation & Submit All Answers",
-        variant="primary",
-        size="lg"
-    )
-    status_output = gr.Textbox(
-        label="📊 Evaluation Results",
-        lines=12,
-        interactive=False,
-        show_copy_button=True
-    )
-    results_table = gr.DataFrame(
-        label="📝 Questions and Your Answers",
-        wrap=True,
-        interactive=False
-    )
     gr.Markdown(
         """
         ---
-        ### 🔗 Resources:
-        - [GAIA Benchmark Paper](https://arxiv.org/abs/2311.12983)
-        - [Leaderboard](https://huggingface.co/spaces/agents-course/agents-course-unit4-leaderboard)
-        - [Course Materials](https://huggingface.co/learn/cookbook/agents)
-        - [API Documentation](https://agents-course-unit4-scoring.hf.space/docs)
-        ### 🏆 Score Interpretation:
-        - **30%+**: Excellent! You've achieved certification level ✅
-        - **20-29%**: Good progress! Keep improving 📈
-        - **10-19%**: On the right track! Add more tools 🔧
-        - **0-9%**: Keep experimenting! Study the questions 💪
-        Remember: Human performance is ~92%, GPT-4 with plugins is ~15%. You're competing with AI systems!
         """
     )
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
-    print("🚀 Launching GAIA Agent Evaluation Interface...")
-    demo.launch(debug=True, share=False)

 import gradio as gr
 import requests
 import pandas as pd
 # --- Constants ---
+# ✅ correct backend API base URL
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Basic Agent Definition ---
+# 👇 customize this class to make your own agent smarter
+class BasicAgent:
     def __init__(self):
+        print("✅ BasicAgent initialized.")
+    def __call__(self, question: str) -> str:
+        print(f"Agent received question: {question[:50]}...")
+        # For now, it returns a placeholder answer
+        fixed_answer = "This is a default answer."
+        print(f"Agent returning: {fixed_answer}")
+        return fixed_answer
+# --- Evaluation Logic ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """Fetches all questions, runs agent, submits answers, shows results."""
+    space_id = os.getenv("SPACE_ID")  # for linking to code repo
     if profile:
+        username = f"{profile.username}"
+        print(f"👤 Logged in as: {username}")
     else:
+        return "Please log in with your Hugging Face account.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # --- Instantiate your agent ---
     try:
+        agent = BasicAgent()
     except Exception as e:
+        return f"Error initializing agent: {e}", None
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "N/A"
+    print(f"🔗 Code link: {agent_code}")
+    # --- Fetch Questions ---
+    print(f"📡 Fetching from {questions_url}")
     try:
+        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            return "No questions fetched.", None
+        print(f"✅ {len(questions_data)} questions retrieved.")
+    except Exception as e:
+        return f"Error fetching questions: {e}", None
+    # --- Run Agent ---
     results_log = []
     answers_payload = []
+    print(f"🤖 Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        if not task_id or question_text is None:
             continue
         try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Answer": submitted_answer})
         except Exception as e:
+            results_log.append({"Task ID": task_id, "Question": question_text, "Answer": f"ERROR: {e}"})
     if not answers_payload:
+        return "No answers produced by the agent.", pd.DataFrame(results_log)
+    # --- Prepare Submission ---
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    print(f"🚀 Submitting {len(answers_payload)} answers...")
+    # --- Submit ---
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
+            f"✅ Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Score: {result_data.get('score', 'N/A')}%\n"
+            f"Correct: {result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')}\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
         )
+        return final_status, pd.DataFrame(results_log)
+    except Exception as e:
+        return f"Submission failed: {e}", pd.DataFrame(results_log)
+# --- Build Gradio Interface ---
+with gr.Blocks() as demo:
+    gr.Markdown("# 🧠 Basic Agent Evaluation Runner")
     gr.Markdown(
         """
+        ### Instructions
+        1️⃣ Clone this space on your Hugging Face profile.
+        2️⃣ Modify the `BasicAgent` class to add your logic.
+        3️⃣ Log in below, then click **Run Evaluation & Submit All Answers**.
         ---
+        The process might take a few minutes while the agent runs all questions.
+        You can enhance your agent with reasoning, web tools, or retrieval modules.
         """
     )
+    gr.LoginButton()
+    run_button = gr.Button("🚀 Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
+    results_table = gr.DataFrame(label="🧾 Questions and Agent Answers")
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
+# --- Run ---
 if __name__ == "__main__":
+    print("\n" + "-" * 40)
+    print("🌐 App Starting")
+    print("-" * 40)
+    demo.launch(debug=True, share=False)