Final_Assignment_Template2

Sleeping

App Files Files Community

lethaq commited on May 24

Commit

c0961ba

verified ·

1 Parent(s): 8021035

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -379

app.py CHANGED Viewed

@@ -1,444 +1,128 @@
-"""Enhanced Agent Evaluation Runner with improved capabilities"""
 import os
-import re
 import time
 import gradio as gr
 import requests
 import pandas as pd
-import google.generativeai as genai
 from dotenv import load_dotenv
-from urllib.parse import urlparse, parse_qs
-import json
-from agent import Agent
-agent = Agent()
-# Load environment variables
 load_dotenv()
-# Configure Gemini
-genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
-# Constants
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-class EnhancedAgent:
-    """An enhanced agent using Google Gemini with improved capabilities."""
-    def __init__(self):
-        print("EnhancedAgent initialized.")
-        # Use gemini-1.5-pro for better performance, fallback to flash
-        try:
-            self.model = genai.GenerativeModel('gemini-2.0-flash')
-        except:
-            self.model = genai.GenerativeModel('gemini-1.5-pro')
-        # Rate limiting
-        self.last_request_time = 0
-        self.min_request_interval = 1.0  # 1 second between requests
-    def _rate_limit(self):
-        """Simple rate limiting to avoid quota issues."""
-        current_time = time.time()
-        time_since_last = current_time - self.last_request_time
-        if time_since_last < self.min_request_interval:
-            time.sleep(self.min_request_interval - time_since_last)
-        self.last_request_time = time.time()
-    def _extract_youtube_info(self, question: str) -> str:
-        """Extract information about YouTube videos mentioned in questions."""
-        youtube_patterns = [
-            r'youtube\.com/watch\?v=([a-zA-Z0-9_-]+)',
-            r'youtu\.be/([a-zA-Z0-9_-]+)'
-        ]
-        for pattern in youtube_patterns:
-            match = re.search(pattern, question)
-            if match:
-                video_id = match.group(1)
-                return f"YouTube video ID: {video_id}. Note: Cannot access video content directly, but can make educated guesses based on context."
-        return ""
-    def _analyze_question_type(self, question: str) -> str:
-        """Analyze the type of question and provide specific guidance."""
-        question_lower = question.lower()
-        # Different question types and their handling strategies
-        if any(word in question_lower for word in ['youtube', 'video', 'watch']):
-            return "VIDEO_ANALYSIS"
-        elif any(word in question_lower for word in ['excel', 'spreadsheet', 'file', 'csv']):
-            return "FILE_ANALYSIS"
-        elif any(word in question_lower for word in ['how many', 'count', 'number of']):
-            return "COUNTING"
-        elif any(word in question_lower for word in ['who', 'what', 'where', 'when']):
-            return "FACTUAL"
-        elif any(word in question_lower for word in ['calculate', 'compute', 'math']):
-            return "CALCULATION"
-        elif any(word in question_lower for word in ['list', 'name', 'identify']):
-            return "LIST"
-        else:
-            return "GENERAL"
-    def _get_enhanced_prompt(self, question: str, question_type: str) -> str:
-        """Generate an enhanced system prompt based on question type."""
-        base_prompt = """You are an expert assistant with broad knowledge across many domains including:
-- Music, entertainment, and media
-- Sports statistics and history
-- Science and mathematics
-- Geography and world facts
-- Technology and computing
-- Literature and culture
-CRITICAL INSTRUCTIONS:
-1. Always provide your best educated guess even if you're not 100% certain
-2. For numerical answers, provide ONLY the number (no commas, currency symbols, or units unless specified)
-3. For names/words, provide the exact spelling
-4. For lists, use comma-separated format
-5. End with: FINAL ANSWER: [your concise answer]
-"""
-        if question_type == "VIDEO_ANALYSIS":
-            base_prompt += """
-For video-related questions:
-- If you cannot access the video content, make educated guesses based on:
-  - Video title/URL context
-  - Common knowledge about the topic
-  - Typical content patterns
-- Provide your best estimate rather than saying "cannot access"
-"""
-        elif question_type == "FILE_ANALYSIS":
-            base_prompt += """
-For file-related questions:
-- If you cannot access files directly, make reasonable assumptions
-- Use general knowledge about typical data in such contexts
-- Provide educated estimates based on the question context
-"""
-        elif question_type == "COUNTING":
-            base_prompt += """
-For counting questions:
-- Provide specific numbers when possible
-- If exact count unknown, provide reasonable estimates
-- Consider historical data and typical ranges
-"""
-        elif question_type == "FACTUAL":
-            base_prompt += """
-For factual questions:
-- Use your knowledge base to provide accurate information
-- If multiple possibilities exist, choose the most likely one
-- Be specific with names, dates, and details
-"""
-        return base_prompt
-    def _make_api_call_with_retry(self, prompt: str, max_retries: int = 3) -> str:
-        """Make API call with retry logic and error handling."""
-        for attempt in range(max_retries):
-            try:
-                self._rate_limit()  # Apply rate limiting
-                # Generate response using Gemini
-                response = self.model.generate_content(
-                    prompt,
-                    generation_config=genai.types.GenerationConfig(
-                        temperature=0.1,  # Lower temperature for more consistent answers
-                        max_output_tokens=1000,
-                    )
-                )
-                if response.text:
-                    return response.text
-                else:
-                    raise Exception("Empty response from API")
-            except Exception as e:
-                error_msg = str(e).lower()
-                if "quota" in error_msg or "429" in error_msg:
-                    if attempt < max_retries - 1:
-                        wait_time = (2 ** attempt) * 5  # Exponential backoff
-                        print(f"Quota exceeded, waiting {wait_time} seconds...")
-                        time.sleep(wait_time)
-                        continue
-                    else:
-                        return "Error: API quota exceeded"
-                elif "safety" in error_msg:
-                    return "Error: Content safety filter triggered"
-                else:
-                    if attempt < max_retries - 1:
-                        time.sleep(2)  # Wait before retry
-                        continue
-                    else:
-                        return f"Error: {str(e)}"
-        return "Error: Max retries exceeded"
-    def __call__(self, question: str) -> str:
-        """Process a question and return an answer."""
-        print(f"Agent processing: {question[:100]}...")
-        # Analyze question type
-        question_type = self._analyze_question_type(question)
-        print(f"Question type identified: {question_type}")
-        # Extract additional context
-        youtube_info = self._extract_youtube_info(question)
-        # Build enhanced prompt
-        system_prompt = self._get_enhanced_prompt(question, question_type)
-        # Add context if available
-        context = ""
-        if youtube_info:
-            context += f"\nContext: {youtube_info}\n"
-        # Combine everything
-        full_prompt = f"{system_prompt}\n{context}\nQuestion: {question}\n\nProvide your best answer:"
-        # Make API call with retry
-        response = self._make_api_call_with_retry(full_prompt)
-        # Extract final answer
-        return self._extract_final_answer(response, question_type)
-    def _extract_final_answer(self, response: str, question_type: str) -> str:
-        """Extract the final answer from the response."""
-        if response.startswith("Error:"):
-            return response
-        # Look for FINAL ANSWER: pattern
-        final_answer_match = re.search(r'FINAL ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
-        if final_answer_match:
-            answer = final_answer_match.group(1).strip()
-            return self._clean_answer(answer, question_type)
-        # Fallback: extract from end of response
-        lines = response.strip().split('\n')
-        for line in reversed(lines):
-            line = line.strip()
-            if line and len(line) < 200:  # Reasonable answer length
-                return self._clean_answer(line, question_type)
-        # Last resort: return first part of response
-        return self._clean_answer(response[:100], question_type)
-    def _clean_answer(self, answer: str, question_type: str) -> str:
-        """Clean and format the final answer."""
-        answer = answer.strip()
-        # Remove common prefixes
-        prefixes_to_remove = [
-            "the answer is", "answer:", "final answer:",
-            "result:", "solution:", "therefore",
-            "in conclusion", "to summarize"
-        ]
-        for prefix in prefixes_to_remove:
-            if answer.lower().startswith(prefix):
-                answer = answer[len(prefix):].strip()
-        # Clean punctuation from the end
-        answer = answer.rstrip('.,;:!')
-        # For counting questions, ensure we return just the number
-        if question_type == "COUNTING":
-            number_match = re.search(r'\b(\d+(?:,\d{3})*(?:\.\d+)?)\b', answer)
-            if number_match:
-                return number_match.group(1).replace(',', '')
-        return answer
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    agent = Agent()
     """
-    Fetches all questions, runs the EnhancedAgent on them, submits all answers,
     and displays the results.
     """
-    # Check if user is logged in
-    if profile:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
-    # Get space info
-    space_id = os.getenv("SPACE_ID")
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    # 1. Initialize Agent
-    try:
-        agent = EnhancedAgent()
-    except Exception as e:
-        print(f"Error initializing agent: {e}")
-        return f"Error initializing agent: {e}", None
-    # 2. Fetch Questions
-    print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
         if not questions_data:
             return "No questions received from server.", None
-        print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
-        print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
-     submitted_answer = agent(question_text)
-    # 3. Process Questions
-    results_log = []
     answers_payload = []
-    print(f"Processing {len(questions_data)} questions...")
-    for i, item in enumerate(questions_data):
-        task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
-            print(f"Skipping invalid item: {item}")
             continue
-        print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
         try:
-            # Get answer from agent
             submitted_answer = agent(question_text)
-            # Store results
             answers_payload.append({
-                "task_id": task_id,
                 "submitted_answer": submitted_answer
             })
             results_log.append({
                 "Task ID": task_id,
-                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
                 "Submitted Answer": submitted_answer
             })
-            # Small delay between questions to avoid rate limiting
             time.sleep(0.5)
         except Exception as e:
-            error_msg = f"ERROR: {str(e)}"
-            print(f"Error processing task {task_id}: {e}")
             results_log.append({
                 "Task ID": task_id,
-                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-                "Submitted Answer": error_msg
             })
     if not answers_payload:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Submit Results
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload
     }
-    print(f"Submitting {len(answers_payload)} answers...")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        # Format success message
-        final_status = (
             f"✅ Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No additional message.')}"
         )
-        print("Submission successful!")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
     except Exception as e:
-        error_msg = f"❌ Submission Failed: {str(e)}"
-        print(error_msg)
-        results_df = pd.DataFrame(results_log)
-        return error_msg, results_df
-# Build Gradio Interface
-with gr.Blocks(title="Enhanced Agent Evaluation") as demo:
-    gr.Markdown("# Enhanced Agent Evaluation Runner")
     gr.Markdown("""
     **Instructions:**
-    1. Make sure you have set up your `GOOGLE_API_KEY` in the environment variables
-    2. Log in to your Hugging Face account using the button below
-    3. Click 'Run Evaluation & Submit All Answers' to start the evaluation
-    **Enhanced Features:**
-    - Improved question analysis and categorization
-    - Better handling of different question types
-    - Rate limiting to avoid API quota issues
-    - Retry logic for failed requests
-    - Enhanced prompting for better accuracy
     """)
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
-    status_output = gr.Textbox(
-        label="Status / Results",
-        lines=6,
-        interactive=False
-    )
-    results_table = gr.DataFrame(
-        label="Questions and Answers",
-        wrap=True
-    )
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
-    print("=" * 50)
-    print("🚀 Starting Enhanced Agent Evaluation Runner")
-    print("=" * 50)
-    # Check environment variables
-    if not os.getenv("GOOGLE_API_KEY"):
-        print("⚠️  WARNING: GOOGLE_API_KEY not found in environment variables!")
-        print("   Please set your Google API key to use Gemini.")
-    else:
-        print("✅ GOOGLE_API_KEY found")
-    space_host = os.getenv("SPACE_HOST")
-    space_id = os.getenv("SPACE_ID")
-    if space_host:
-        print(f"✅ Running on Hugging Face Space")
-        print(f"   URL: https://{space_host}.hf.space")
-    if space_id:
-        print(f"✅ Space ID: {space_id}")
-    print("=" * 50)
     demo.launch(debug=True, share=False)

+```python
+"""Enhanced Agent Evaluation Runner with simplified Agent integration"""
 import os
 import time
 import gradio as gr
 import requests
 import pandas as pd
 from dotenv import load_dotenv
+from agent import Agent  # 引入你自己写的简易 agent.py
+# 加载 .env 中的 GOOGLE_API_KEY（agent.py 会使用）
 load_dotenv()
+# 常量
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
+    Fetches all questions, runs the Agent on them, submits all answers,
     and displays the results.
     """
+    # 登录检查
+    if not profile:
         return "Please Login to Hugging Face with the button.", None
+    username = profile.username
+    # 初始化你的简易 Agent
+    agent = Agent()
+    # 组装提交相关 URL
+    space_id    = os.getenv("SPACE_ID")
+    agent_code  = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
+    questions_url = f"{DEFAULT_API_URL}/questions"
+    submit_url    = f"{DEFAULT_API_URL}/submit"
+    # 1. 拉取题目
     try:
+        resp = requests.get(questions_url, timeout=20)
+        resp.raise_for_status()
+        questions_data = resp.json()
         if not questions_data:
             return "No questions received from server.", None
     except Exception as e:
         return f"Error fetching questions: {e}", None
+    # 2. 遍历题目并调用 Agent 获取答案
+    results_log     = []
     answers_payload = []
+    for item in questions_data:
+        task_id      = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             continue
         try:
+            # 调用你在 agent.py 中定义的 Agent
             submitted_answer = agent(question_text)
             answers_payload.append({
+                "task_id": task_id,
                 "submitted_answer": submitted_answer
             })
             results_log.append({
                 "Task ID": task_id,
+                "Question": question_text,
                 "Submitted Answer": submitted_answer
             })
+            # 避免 API 速率限制
             time.sleep(0.5)
         except Exception as e:
+            err = f"ERROR: {e}"
             results_log.append({
                 "Task ID": task_id,
+                "Question": question_text,
+                "Submitted Answer": err
             })
     if not answers_payload:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 3. 提交答案
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload
     }
     try:
+        post = requests.post(submit_url, json=submission_data, timeout=60)
+        post.raise_for_status()
+        data = post.json()
+        status = (
             f"✅ Submission Successful!\n"
+            f"User: {data.get('username')}\n"
+            f"Score: {data.get('score','N/A')}% "
+            f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})\n"
+            f"Message: {data.get('message','No additional message.')}"
         )
+        return status, pd.DataFrame(results_log)
     except Exception as e:
+        return f"❌ Submission Failed: {e}", pd.DataFrame(results_log)
+# --- Gradio 界面 ---
+with gr.Blocks(title="Simplified GAIA Agent Evaluation") as demo:
+    gr.Markdown("# Simplified GAIA Agent Evaluation Runner")
     gr.Markdown("""
     **Instructions:**
+    1. Set your `GOOGLE_API_KEY` in the environment variables.
+    2. Log in to your Hugging Face account using the button below.
+    3. Click **Run Evaluation & Submit All Answers** to start.
+    This runner uses:
+    - A custom `agent.py` for answering GAIA questions.
+    - Gradio for UI.
+    - HTTP requests to fetch & submit answers.
     """)
     gr.LoginButton()
+    run_btn     = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
+    status_out  = gr.Textbox(label="Status / Results", lines=6, interactive=False)
+    table_out   = gr.DataFrame(label="Questions and Answers", wrap=True)
+    run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])
 if __name__ == "__main__":
     demo.launch(debug=True, share=False)
+```