Final_Assignment_Template

Sleeping

App Files Files Community

vissutagunawan commited on Jun 29, 2025

Commit

4691df3

verified ·

1 Parent(s): 21886e6

Update app.py

Browse files

Files changed (1) hide show

app.py +415 -47

app.py CHANGED Viewed

@@ -204,35 +204,126 @@ def extract_numbers(text: str) -> str:
         return f"Error extracting numbers: {str(e)}"
 @tool
-def count_items(text: str, item_type: str = "words") -> str:
-    """Counts different types of items in text.
     Args:
-        text: The text to analyze
-        item_type: What to count ("words", "characters", "lines", "sentences")
     Returns:
-        The count as a string
     """
     try:
-        if item_type == "words":
-            words = text.split()
-            return str(len(words))
-        elif item_type == "characters":
-            return str(len(text))
-        elif item_type == "lines":
-            lines = text.split('\n')
-            return str(len(lines))
-        elif item_type == "sentences":
-            import re
-            sentences = re.split(r'[.!?]+', text)
-            sentences = [s.strip() for s in sentences if s.strip()]
-            return str(len(sentences))
         else:
-            return f"Unknown item type: {item_type}"
     except Exception as e:
-        return f"Error counting items: {str(e)}"
 def setup_authentication():
     """Setup HuggingFace authentication for the app."""
@@ -300,7 +391,9 @@ class GAIAAgent:
             calculate_math,
             analyze_data,
             extract_numbers,
-            count_items
         ]
         # Create the CodeAgent with enhanced capabilities
@@ -311,12 +404,13 @@ class GAIAAgent:
                 add_base_tools=True,  # Adds DuckDuckGoSearchTool and other base tools
                 additional_authorized_imports=[
                     'requests', 'bs4', 'json', 'csv', 'math', 'statistics',
-                    're', 'urllib.parse', 'base64', 'datetime', 'calendar'
                 ],
-                max_steps=10,  # Allow multiple reasoning steps
                 verbosity_level=1  # Reduce verbosity for cleaner output
             )
-            print("✅ GAIA Agent initialized successfully with enhanced tools and base toolkit")
         except Exception as e:
             print(f"❌ Error initializing agent: {e}")
             raise e
@@ -326,49 +420,323 @@ class GAIAAgent:
         try:
             print(f"🤖 Processing question: {question[:100]}...")
-            # Enhanced prompt with specific instructions for GAIA
-            enhanced_prompt = f"""You are a helpful AI assistant designed to answer questions accurately and concisely.
-IMPORTANT INSTRUCTIONS:
-1. Read the question carefully and understand what is being asked
-2. Use the available tools when you need external information or calculations
-3. For mathematical problems, use the calculate_math tool or write Python code
-4. For web searches, use DuckDuckGoSearchTool and visit_webpage when needed
-5. Break down complex problems into steps
-6. Give ONLY the final answer - no explanations, no "FINAL ANSWER:" prefix
-7. Be precise with numbers and dates
-8. If the answer is a number, return just the number
-9. If the answer is text, return just the text without quotes
 Question: {question}
-Answer:"""
-            # Run the agent with error handling for quota issues
             try:
                 result = self.agent.run(enhanced_prompt)
             except Exception as api_error:
-                if "402" in str(api_error) or "Payment Required" in str(api_error) or "exceeded" in str(api_error):
-                    print(f"⚠️ API quota exceeded, trying simpler approach...")
-                    # Fallback: try to answer with basic tools only
-                    result = f"Unable to process due to API limits: {str(api_error)}"
                 else:
                     raise api_error
-            # Clean up the result to ensure it's just the answer
             if isinstance(result, str):
-                # Remove common prefixes and suffixes
                 result = result.strip()
-                # Remove "FINAL ANSWER:" if present
                 result = re.sub(r'^(FINAL\s*ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
                 result = re.sub(r'^(ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
                 result = re.sub(r'^(RESULT\s*:?\s*)', '', result, flags=re.IGNORECASE)
-                # Remove quotes if the entire answer is wrapped in quotes
                 if (result.startswith('"') and result.endswith('"')) or (result.startswith("'") and result.endswith("'")):
                     result = result[1:-1]
                 result = result.strip()
                 print(f"✅ Agent response: {result}")

         return f"Error extracting numbers: {str(e)}"
 @tool
+def process_file_content(file_url: str) -> str:
+    """Downloads and processes content from a file URL, supporting various formats.
     Args:
+        file_url: URL to a file (PDF, CSV, TXT, etc.)
     Returns:
+        The processed content of the file as text
     """
     try:
+        import requests
+        from urllib.parse import urlparse
+        import mimetypes
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+        }
+        response = requests.get(file_url, headers=headers, timeout=30)
+        response.raise_for_status()
+        # Get content type
+        content_type = response.headers.get('content-type', '').lower()
+        # Process based on content type
+        if 'text/' in content_type or 'csv' in content_type:
+            return response.text
+        elif 'json' in content_type:
+            return json.dumps(response.json(), indent=2)
+        else:
+            # For binary files, return info about the file
+            return f"Binary file detected. Size: {len(response.content)} bytes. Content-Type: {content_type}"
+    except Exception as e:
+        return f"Error processing file: {str(e)}"
+@tool
+def solve_equation(equation: str) -> str:
+    """Solves mathematical equations and expressions symbolically.
+    Args:
+        equation: Mathematical equation to solve (e.g., "x^2 + 2*x - 3 = 0")
+    Returns:
+        The solution to the equation
+    """
+    try:
+        import sympy as sp
+        import re
+        # Clean the equation
+        equation = equation.replace('=', '==')
+        # Define common variables
+        x, y, z, t = sp.symbols('x y z t')
+        variables = {'x': x, 'y': y, 'z': z, 't': t}
+        # Replace common math functions
+        equation = re.sub(r'\bsqrt\b', 'sp.sqrt', equation)
+        equation = re.sub(r'\bsin\b', 'sp.sin', equation)
+        equation = re.sub(r'\bcos\b', 'sp.cos', equation)
+        equation = re.sub(r'\btan\b', 'sp.tan', equation)
+        equation = re.sub(r'\blog\b', 'sp.log', equation)
+        equation = re.sub(r'\bexp\b', 'sp.exp', equation)
+        # Parse and solve
+        expr = eval(equation, {"sp": sp, "x": x, "y": y, "z": z, "t": t})
+        if '==' in equation:
+            # It's an equation to solve
+            solution = sp.solve(expr, x)
+            return str(solution)
+        else:
+            # It's an expression to simplify
+            simplified = sp.simplify(expr)
+            return str(simplified)
+    except Exception as e:
+        return f"Error solving equation: {str(e)}"
+@tool
+def parse_structured_data(data: str, format_type: str = "auto") -> str:
+    """Parses and analyzes structured data (CSV, JSON, etc.).
+    Args:
+        data: The structured data as a string
+        format_type: Format type ("csv", "json", "auto")
+    Returns:
+        Analysis of the structured data
+    """
+    try:
+        import pandas as pd
+        import json
+        from io import StringIO
+        if format_type == "auto":
+            # Auto-detect format
+            data_clean = data.strip()
+            if data_clean.startswith('{') or data_clean.startswith('['):
+                format_type = "json"
+            elif ',' in data_clean and '\n' in data_clean:
+                format_type = "csv"
+        if format_type == "json":
+            parsed = json.loads(data)
+            return json.dumps(parsed, indent=2)
+        elif format_type == "csv":
+            df = pd.read_csv(StringIO(data))
+            result = f"DataFrame shape: {df.shape}\n"
+            result += f"Columns: {list(df.columns)}\n"
+            result += f"First 5 rows:\n{df.head().to_string()}\n"
+            if df.select_dtypes(include=['number']).columns.any():
+                result += f"Numerical summary:\n{df.describe().to_string()}"
+            return result
         else:
+            return f"Unsupported format: {format_type}"
     except Exception as e:
+        return f"Error parsing data: {str(e)}"
 def setup_authentication():
     """Setup HuggingFace authentication for the app."""
             calculate_math,
             analyze_data,
             extract_numbers,
+            process_file_content,
+            solve_equation,
+            parse_structured_data
         ]
         # Create the CodeAgent with enhanced capabilities
                 add_base_tools=True,  # Adds DuckDuckGoSearchTool and other base tools
                 additional_authorized_imports=[
                     'requests', 'bs4', 'json', 'csv', 'math', 'statistics',
+                    're', 'urllib.parse', 'base64', 'datetime', 'calendar',
+                    'pandas', 'numpy', 'sympy', 'scipy'
                 ],
+                max_steps=15,  # Increased for complex multi-step reasoning
                 verbosity_level=1  # Reduce verbosity for cleaner output
             )
+            print("✅ GAIA Agent initialized successfully with PRO model and enhanced tools")
         except Exception as e:
             print(f"❌ Error initializing agent: {e}")
             raise e
         try:
             print(f"🤖 Processing question: {question[:100]}...")
+            # Enhanced GAIA-optimized prompt
+            enhanced_prompt = f"""You are an expert AI assistant designed to excel at the GAIA benchmark. You must answer questions with perfect accuracy using a systematic approach.
+CRITICAL INSTRUCTIONS FOR GAIA SUCCESS:
+1. ANALYZE THE QUESTION: Read carefully and identify what type of question this is:
+   - Mathematical calculation or equation
+   - Information retrieval from web/files
+   - Data analysis or statistics
+   - Multi-step reasoning problem
+   - Factual lookup
+2. CHOOSE YOUR APPROACH:
+   - For math: Use calculate_math tool or solve_equation for complex equations
+   - For web info: Use DuckDuckGoSearchTool then visit_webpage for details
+   - For files: Use process_file_content to download and analyze
+   - For data: Use analyze_data or parse_structured_data
+   - For numbers in text: Use extract_numbers first
+3. BE SYSTEMATIC:
+   - Break complex questions into steps
+   - Use multiple tools if needed
+   - Verify your reasoning
+   - Double-check calculations
+4. ANSWER FORMAT:
+   - Give ONLY the final answer
+   - No explanations, no "FINAL ANSWER:" prefix
+   - For numbers: just the number (e.g., "42", not "42.0")
+   - For text: just the text without quotes
+   - Be precise with units, dates, and formatting
+5. ACCURACY IS PARAMOUNT:
+   - GAIA requires exact matches
+   - Round numbers appropriately
+   - Use proper case and spelling
+   - Include units when relevant
 Question: {question}
+Think step by step, use the appropriate tools, and provide only the final answer:"""
+            # Run the agent with enhanced error handling
             try:
                 result = self.agent.run(enhanced_prompt)
             except Exception as api_error:
+                if "402" in str(api_error) or "Payment Required" in str(api_error):
+                    print(f"⚠️ API quota issue (you have Pro, this shouldn't happen): {api_error}")
+                    result = f"API Error: {str(api_error)}"
                 else:
                     raise api_error
+            # Enhanced answer cleaning for GAIA precision
             if isinstance(result, str):
                 result = result.strip()
+                # Remove any explanatory text before the answer
+                lines = result.split('\n')
+                for i, line in enumerate(lines):
+                    line = line.strip()
+                    if line and not line.startswith(('Step', 'First', 'Next', 'Then', 'Finally', 'Therefore', 'So,', 'Thus')):
+                        result = line
+                        break
+                # Remove common prefixes
                 result = re.sub(r'^(FINAL\s*ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
                 result = re.sub(r'^(ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
                 result = re.sub(r'^(RESULT\s*:?\s*)', '', result, flags=re.IGNORECASE)
+                result = re.sub(r'^(THE\s*ANSWER\s*IS\s*:?\s*)', '', result, flags=re.IGNORECASE)
+                # Remove quotes if the entire answer is wrapped
                 if (result.startswith('"') and result.endswith('"')) or (result.startswith("'") and result.endswith("'")):
                     result = result[1:-1]
+                # Clean up decimal numbers (e.g., "42.0" -> "42")
+                if re.match(r'^\d+\.0+
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the GAIAAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    if profile:
+        username = f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Enhanced Agent
+    try:
+        print("🚀 Initializing GAIA Agent with smolagents...")
+        agent = GAIAAgent()
+        print("✅ Enhanced agent ready for GAIA benchmark!")
+    except Exception as e:
+        error_msg = f"Error initializing agent: {e}"
+        print(f"❌ {error_msg}")
+        return error_msg, None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(f"Agent code link: {agent_code}")
+    # 2. Fetch Questions
+    print(f"📥 Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"✅ Fetched {len(questions_data)} questions from GAIA benchmark.")
+    except requests.exceptions.RequestException as e:
+        print(f"❌ Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"❌ Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"❌ An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run Enhanced Agent
+    results_log = []
+    answers_payload = []
+    print(f"🤖 Running enhanced GAIA agent on {len(questions_data)} questions...")
+    for i, item in enumerate(questions_data, 1):
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"⚠️ Skipping item with missing task_id or question: {item}")
+            continue
+        print(f"\n📝 Processing question {i}/{len(questions_data)} (ID: {task_id})")
+        try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": submitted_answer
+            })
+            print(f"✅ Answer for {task_id}: {submitted_answer}")
+        except Exception as e:
+             error_msg = f"AGENT ERROR: {e}"
+             print(f"❌ Error running agent on task {task_id}: {e}")
+             answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
+             results_log.append({
+                 "Task ID": task_id,
+                 "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                 "Submitted Answer": error_msg
+             })
+    if not answers_payload:
+        print("❌ Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"🚀 Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"📤 Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        score = result_data.get('score', 'N/A')
+        correct_count = result_data.get('correct_count', '?')
+        total_attempted = result_data.get('total_attempted', '?')
+        final_status = (
+            f"🎉 Submission Successful!\n"
+            f"👤 User: {result_data.get('username')}\n"
+            f"📊 Overall Score: {score}% ({correct_count}/{total_attempted} correct)\n"
+            f"🎯 Target: >30% for certification\n"
+            f"💬 Message: {result_data.get('message', 'No message received.')}"
+        )
+        if isinstance(score, (int, float)) and score >= 30:
+            final_status += f"\n🏆 CONGRATULATIONS! You've achieved the target score of 30%!"
+        elif isinstance(score, (int, float)):
+            final_status += f"\n📈 Keep improving! You need {30-score:.1f}% more to reach the target."
+        print("✅ Submission successful!")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"❌ Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "❌ Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"❌ Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"❌ An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks(title="GAIA Agent Evaluation") as demo:
+    gr.Markdown("# 🤖 Enhanced GAIA Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Enhanced Agent for GAIA Benchmark Certification**
+        This enhanced agent uses Hugging Face's **smolagents** framework with multiple specialized tools:
+        - 🔍 **Web Search**: DuckDuckGoSearchTool (from base toolkit) for finding information
+        - 🐍 **Python Interpreter**: Code execution capabilities (from base toolkit)
+        - 🌐 **Web Scraping**: Custom webpage visitor for content extraction
+        - 🧮 **Mathematics**: Advanced calculation capabilities
+        - 📊 **Data Analysis**: Statistical analysis of numerical data
+        - 🔢 **Number Extraction**: Intelligent number parsing from text
+        - 📝 **Text Analysis**: Counting and text processing utilities
+        - 🤖 **LLM Model**: Llama-3.1-8B-Instruct for advanced reasoning
+        **Instructions:**
+        1. 🔄 **Clone this space** and customize the agent as needed
+        2. 🔑 **Log in** to your Hugging Face account using the button below
+        3. 🚀 **Click 'Run Evaluation'** to test your agent on GAIA benchmark questions
+        4. 🎯 **Target**: Score >30% for course certification
+        **Goal**: Answer GAIA level 1 validation questions with exact match precision.
+        ---
+        ⚠️ **Note**: Processing all questions may take several minutes due to the complexity of reasoning required.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary", size="lg")
+    status_output = gr.Textbox(
+        label="📊 Evaluation Status & Results",
+        lines=8,
+        interactive=False,
+        placeholder="Click the button above to start the evaluation..."
+    )
+    results_table = gr.DataFrame(
+        label="📋 Questions and Agent Responses",
+        wrap=True,
+        headers=["Task ID", "Question", "Submitted Answer"]
+    )
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
+if __name__ == "__main__":
+    print("\n" + "="*60)
+    print("🤖 ENHANCED GAIA AGENT STARTING UP")
+    print("="*60)
+    # Setup authentication
+    print("🔐 Setting up HuggingFace authentication...")
+    auth_success = setup_authentication()
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   🌐 Runtime URL: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+        if not auth_success:
+            print("💡 For local testing, you may need to run:")
+            print("   from huggingface_hub import notebook_login")
+            print("   notebook_login()")
+    if space_id_startup:
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   📁 Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   🔗 Code URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?).")
+    print("="*60)
+    print("🚀 Launching Enhanced GAIA Agent Interface...")
+    print("🎯 Target: >30% score on GAIA benchmark")
+    print("="*60 + "\n")
+    demo.launch(debug=True, share=False), result):
+                    result = str(int(float(result)))
                 result = result.strip()
                 print(f"✅ Agent response: {result}")