Answeragent

Runtime error

App Files Files Community

Nitinguleria commited on May 31, 2025

Commit

79ad221

verified ·

1 Parent(s): 46188de

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -363

app.py CHANGED Viewed

@@ -1,284 +1,47 @@
 import os
 import gradio as gr
 import requests
 import pandas as pd
-import sympy
-import re
-from duckduckgo_search import DDGS
-from langgraph.graph import StateGraph, END
-from typing import TypedDict, Literal
-# Default API URL - you may need to update this
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Enhanced Tools for GAIA Benchmark ---
-def wikipedia_search_tool(input: str) -> str:
-    """Enhanced search tool with better result processing"""
-    try:
-        ddgs = DDGS()
-        results = ddgs.text(input, max_results=5)
-        if results:
-            # Combine multiple results for better coverage
-            combined_info = []
-            for i, result in enumerate(results[:3]):
-                body = result.get("body", "")
-                if body and len(body) > 10:
-                    combined_info.append(f"Source {i+1}: {body}")
-            if combined_info:
-                return "\n\n".join(combined_info)
-        return "No relevant information found."
-    except Exception as e:
-        return f"Search Error: {e}"
-def math_solver_tool(input: str) -> str:
-    """Enhanced math solver with better parsing"""
-    try:
-        # Clean and preprocess the input
-        cleaned_input = input.replace("^", "**").replace("÷", "/")
-        # Try to extract mathematical expressions
-        math_patterns = [
-            r'[\d\+\-\*/\^\(\)\.\s]+',
-            r'[a-zA-Z\d\+\-\*/\^\(\)\.\s]+=.*',
-        ]
-        for pattern in math_patterns:
-            matches = re.findall(pattern, cleaned_input)
-            if matches:
-                try:
-                    expr = sympy.sympify(matches[0])
-                    result = expr.evalf()
-                    return str(result)
-                except:
-                    continue
-        # Direct sympy attempt
-        expr = sympy.sympify(cleaned_input)
-        result = expr.evalf()
-        return str(result)
-    except Exception as e:
-        # Try basic eval as fallback (with safety checks)
-        try:
-            # Only allow safe mathematical operations
-            safe_chars = set('0123456789+-*/.() ')
-            if all(c in safe_chars for c in input.replace(' ', '')):
-                result = eval(input)
-                return str(result)
-        except:
-            pass
-        return f"Could not solve mathematical expression: {e}"
-def code_execution_tool(input: str) -> str:
-    """Enhanced code execution with better safety and Python support"""
-    try:
-        # Create a safe execution environment
-        safe_globals = {
-            '__builtins__': {
-                'len': len, 'str': str, 'int': int, 'float': float,
-                'list': list, 'dict': dict, 'tuple': tuple, 'set': set,
-                'sum': sum, 'max': max, 'min': min, 'abs': abs,
-                'round': round, 'range': range, 'enumerate': enumerate,
-                'zip': zip, 'sorted': sorted, 'reversed': reversed,
-                'print': print
-            },
-            'math': __import__('math'),
-            're': __import__('re'),
-        }
-        local_vars = {}
-        # Try to execute the code
-        if 'return ' in input or 'print(' in input:
-            exec(input, safe_globals, local_vars)
-            # Look for printed output or return values
-            if 'result' in local_vars:
-                return str(local_vars['result'])
-            return "Code executed successfully"
-        else:
-            # Try to evaluate as expression
-            result = eval(input, safe_globals, local_vars)
-            return str(result)
-    except Exception as e:
-        return f"Code execution error: {e}"
-def general_reasoning_tool(input: str) -> str:
-    """Tool for general reasoning and analysis"""
-    # This is a placeholder for more advanced reasoning
-    # In a real implementation, you might use an LLM here
-    # Simple keyword-based analysis
-    if any(word in input.lower() for word in ['compare', 'difference', 'similar', 'contrast']):
-        return f"Analysis: This appears to be a comparison question. Key factors to consider: {input[:200]}..."
-    elif any(word in input.lower() for word in ['cause', 'reason', 'why', 'because']):
-        return f"Reasoning: This is asking about causation. Consider multiple factors that might contribute to: {input[:200]}..."
-    else:
-        return f"General analysis: {input[:300]}..."
-# --- State definition ---
-class AgentState(TypedDict):
-    question: str
-    response: str
-    tool_used: str
-# --- Enhanced Routing logic for GAIA ---
-def route_question(state: AgentState) -> Literal["math", "code", "search", "reasoning"]:
-    """Enhanced routing for GAIA benchmark questions"""
-    q = state["question"].lower()
-    # Math-related keywords
-    math_keywords = [
-        "solve", "calculate", "evaluate", "compute", "sum", "multiply",
-        "divide", "percentage", "%", "=", "equation", "formula", "average",
-        "total", "cost", "price", "number", "how many", "how much"
-    ]
-    # Code-related keywords
-    code_keywords = [
-        "python", "code", "function", "return", "algorithm", "program",
-        "script", "execute", "run", "implementation"
-    ]
-    # Search-related keywords
-    search_keywords = [
-        "what", "who", "when", "where", "which", "capital", "country",
-        "invented", "created", "founded", "established", "located", "known for"
-    ]
-    # Check for mathematical expressions or numbers
-    if (any(k in q for k in math_keywords) or
-        re.search(r'\d+[\+\-\*/\^]\d+', q) or
-        re.search(r'\$\d+', q) or
-        '%' in q):
-        return "math"
-    elif any(k in q for k in code_keywords):
-        return "code"
-    elif any(k in q for k in search_keywords):
-        return "search"
-    else:
-        return "reasoning"
-# --- Node functions ---
-def math_node(state: AgentState) -> AgentState:
-    response = math_solver_tool(state["question"])
-    return {
-        "question": state["question"],
-        "response": response,
-        "tool_used": "math"
-    }
-def code_node(state: AgentState) -> AgentState:
-    response = code_execution_tool(state["question"])
-    return {
-        "question": state["question"],
-        "response": response,
-        "tool_used": "code"
-    }
-def search_node(state: AgentState) -> AgentState:
-    response = wikipedia_search_tool(state["question"])
-    return {
-        "question": state["question"],
-        "response": response,
-        "tool_used": "search"
-    }
-def reasoning_node(state: AgentState) -> AgentState:
-    response = general_reasoning_tool(state["question"])
-    return {
-        "question": state["question"],
-        "response": response,
-        "tool_used": "reasoning"
-    }
-# --- LangGraph setup with corrected API ---
-def create_agent_graph():
-    """Create the agent graph using the correct LangGraph API"""
-    # Create the state graph
-    workflow = StateGraph(AgentState)
-    # Add all the nodes
-    workflow.add_node("math", math_node)
-    workflow.add_node("code", code_node)
-    workflow.add_node("search", search_node)
-    workflow.add_node("reasoning", reasoning_node)
-    # Add conditional edges from entry point
-    workflow.add_conditional_edges(
-        "__start__",
-        route_question,
-        {
-            "math": "math",
-            "code": "code",
-            "search": "search",
-            "reasoning": "reasoning"
-        }
-    )
-    # All nodes end the workflow
-    workflow.add_edge("math", END)
-    workflow.add_edge("code", END)
-    workflow.add_edge("search", END)
-    workflow.add_edge("reasoning", END)
-    return workflow.compile()
-# Create the compiled graph
-app_graph = create_agent_graph()
-# --- Enhanced Agent wrapper ---
 class BasicAgent:
     def __init__(self):
-        self.graph = app_graph
-        print("Enhanced LangGraph Agent initialized for GAIA benchmark.")
     def __call__(self, question: str) -> str:
-        """Process a question and return an answer"""
-        try:
-            state = {
-                "question": question,
-                "response": "",
-                "tool_used": ""
-            }
-            result = self.graph.invoke(state)
-            # Post-process the response for better formatting
-            response = result.get("response", "No response generated")
-            tool_used = result.get("tool_used", "unknown")
-            # For math problems, try to extract just the numerical answer
-            if tool_used == "math" and response:
-                # Try to extract the final number
-                numbers = re.findall(r'-?\d+\.?\d*', response)
-                if numbers:
-                    return numbers[-1]  # Return the last number found
-            return str(response)
-        except Exception as e:
-            print(f"Error in agent processing: {e}")
-            return f"Error: Could not process the question - {e}"
-def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID")
     if profile:
-        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -288,15 +51,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local"
-    print(f"Agent code location: {agent_code}")
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
@@ -305,78 +68,56 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-            print("Fetched questions list is empty.")
-            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run Agent on all questions
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
-        print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
         try:
             submitted_answer = agent(question_text)
-            answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": submitted_answer
-            })
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-                "Submitted Answer": submitted_answer
-            })
         except Exception as e:
-            print(f"Error running agent on task {task_id}: {e}")
-            error_answer = f"AGENT ERROR: {e}"
-            answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": error_answer
-            })
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-                "Submitted Answer": error_answer
-            })
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {
-        "username": username.strip(),
-        "agent_code": agent_code,
-        "answers": answers_payload
-    }
-    print(f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'...")
-    # 5. Submit answers
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
-            f"User: {result_data.get('username', username)}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
@@ -384,96 +125,85 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
             error_json = e.response.json()
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except:
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
-        status_message = f"Submission error: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-# --- Gradio Interface ---
-with gr.Blocks(title="GAIA Benchmark Agent") as demo:
-    gr.Markdown("# Enhanced GAIA Benchmark Agent")
     gr.Markdown(
         """
-        **Enhanced Agent for GAIA Benchmark - Targeting 60% Accuracy**
-        **Features:**
-        - Enhanced mathematical problem solving with symbolic computation
-        - Improved search capabilities with multiple source aggregation
-        - Safe code execution environment
-        - Smart question routing (math/code/search/reasoning)
-        - Better answer formatting and extraction
         **Instructions:**
-        1. Log in to your Hugging Face account using the button below
-        2. Click 'Run Evaluation & Submit All Answers' to start the benchmark
-        3. The agent will process all questions and submit answers automatically
-        **Note:** Processing may take several minutes depending on the number of questions.
         """
     )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
-    status_output = gr.Textbox(
-        label="Status & Results",
-        lines=8,
-        interactive=False,
-        placeholder="Click the button above to start the evaluation..."
-    )
-    results_table = gr.DataFrame(
-        label="Questions and Agent Responses",
-        wrap=True,
-        interactive=False
-    )
     run_button.click(
         fn=run_and_submit_all,
-        inputs=[],
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
-    print("\n" + "="*50)
-    print("🚀 GAIA Benchmark Agent Starting")
-    print("="*50)
-    # Environment info
-    space_host = os.getenv("SPACE_HOST")
-    space_id = os.getenv("SPACE_ID")
-    if space_host:
-        print(f"✅ SPACE_HOST: {space_host}")
-        print(f"   Runtime URL: https://{space_host}.hf.space")
     else:
-        print("ℹ️  Running locally (SPACE_HOST not found)")
-    if space_id:
-        print(f"✅ SPACE_ID: {space_id}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id}")
     else:
-        print("ℹ️  SPACE_ID not found")
-    print("="*50 + "\n")
-    print("🎯 Target: 60% accuracy on GAIA benchmark")
-    print("🔧 Enhanced tools: Math, Code, Search, Reasoning")
-    print("\nLaunching Gradio interface...")
     demo.launch(debug=True, share=False)

+""" Basic Agent Evaluation Runner"""
 import os
+import inspect
 import gradio as gr
 import requests
 import pandas as pd
+from langchain_core.messages import HumanMessage
+from agent import build_graph
+# (Keep Constants as is)
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Basic Agent Definition ---
+# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
+    """A langgraph agent."""
     def __init__(self):
+        print("BasicAgent initialized.")
+        self.graph = build_graph()
     def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        messages = [HumanMessage(content=question)]
+        result = self.graph.invoke({"messages": messages})
+        answer = result['messages'][-1].content
+        return answer  # kein [14:] mehr nötig!
+def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
+        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
             error_json = e.response.json()
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
     except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
     gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup: # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)