Final_Assignment_Template

Sleeping

App Files Files Community

pmeyhoefer commited on May 2, 2025

Commit

fcc0bb0

verified ·

1 Parent(s): 15fa167

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -92

app.py CHANGED Viewed

@@ -31,9 +31,15 @@ except Exception as e:
     logger.error(f"Failed to instantiate DuckDuckGoSearchTool: {e}. Web search will not work.")
     search_tool_instance = None
 @tool
 def web_search(query: str) -> str:
-    """Performs a web search using DuckDuckGo."""
     logger.info(f"Executing web_search with query: '{query[:100]}...'")
     if search_tool_instance is None:
         return "Search Error: Tool not initialized."
@@ -46,50 +52,60 @@ def web_search(query: str) -> str:
         logger.exception(f"web_search failed for query: {query}")
         return f"Search Error: {e}"
 @tool
 def wikipedia_lookup(page_title: str) -> str:
-    """Fetches the summary introduction text of an English Wikipedia page."""
     page_safe = page_title.replace(" ", "_")
     logger.info(f"Executing wikipedia_lookup for page: '{page_title}' (URL: {page_safe})")
     try:
         url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_safe}"
-        headers = {'User-Agent': f'GAIAgent/1.1 ({os.getenv("SPACE_ID", "unknown")})'}
         r = requests.get(url, headers=headers, timeout=15)
         r.raise_for_status()
         data = r.json()
         extract = data.get("extract", "")
         if extract:
             return extract
         else:
             page_type = data.get("type", "standard")
             title = data.get("title", page_title)
             if page_type == "disambiguation":
                 return f"Wikipedia Error: '{title}' is a disambiguation page. Try a more specific title."
             else:
                  return f"Wikipedia Error: Page '{title}' found but has no summary."
     except requests.exceptions.HTTPError as e:
         if e.response.status_code == 404:
             return f"Wikipedia Error: Page '{page_safe}' not found."
         else:
             return f"Wikipedia Error: HTTP {e.response.status_code} for page '{page_safe}'."
     except Exception as e:
         logger.exception(f"wikipedia_lookup failed for page: {page_safe}")
         return f"Wikipedia Error: Unexpected error: {e}"
 # --- The ReACT Prompt (ensure this is the *only* main prompt definition) ---
-# Define the *exact* instructions for the LLM, listing the *actual* tool function names.
-REACT_INSTRUCTION_PROMPT = """You are a helpful assistant that answers questions using the provided tools.
 Available Tools:
-- web_search(query: str): Use this for searching the web.
-- wikipedia_lookup(page_title: str): Use this to get information from a specific English Wikipedia page (e.g., 'Berlin', 'Python_(programming_language)').
 Follow these steps:
-1. Thought: Plan which tool to use.
 2. Action: Call ONE tool (e.g., web_search(query="...") or wikipedia_lookup(page_title="...")).
 3. Observation: Record the result.
-4. Thought: Analyze the result. If answer found, prepare it. If not, plan next step.
-5. Repeat Action/Observation/Thought until answer is found or determined impossible.
 6. Thought: Summarize findings based ONLY on observations.
 7. Final Answer: Provide the answer starting exactly with "FINAL ANSWER: " using the required format (number, short string, or comma-separated list).
@@ -117,9 +133,8 @@ except Exception as e:
 logger.info("Initializing CodeAgent...")
 try:
-    # Pass the list of actual tool functions
     agent = CodeAgent(
-        tools=[web_search, wikipedia_lookup],
         model=llm_model
     )
     logger.info("CodeAgent initialized OK.")
@@ -131,17 +146,16 @@ except Exception as e:
 def run_agent_on_question(question: str) -> str:
     """Runs the agent with the CORRECT prompt."""
     question = question.strip()
-    if not question:
-        return "AGENT_ERROR: Question cannot be empty."
     # *** CRITICAL: Construct the prompt HERE using the correct variable ***
     full_prompt = REACT_INSTRUCTION_PROMPT.strip() + "\n\nQUESTION: " + question
     logger.info(f"--- Running Agent for Question: '{question}' ---")
-    # Add debug log to show the start of the prompt being used
-    logger.info(f"DEBUG: Using prompt starting with: {full_prompt[:300]}...") # Log beginning of prompt
     try:
-        raw_result = agent.run(full_prompt) # Pass the correctly constructed prompt
         logger.info(f"Agent run completed. Output length: {len(raw_result)}")
         return raw_result
     except Exception as e:
@@ -149,35 +163,15 @@ def run_agent_on_question(question: str) -> str:
         return f"AGENT_ERROR: Exception during run: {e}\n{traceback.format_exc()}"
 # --- Gradio Interface & Submission Logic ---
-# FIX: Define evaluate_and_submit WITHOUT the hf_profile argument initially
-# We will get the profile *inside* the function if needed.
 def evaluate_and_submit():
     """Gradio action: Fetches questions, runs agent, submits results."""
     logger.info("🚀 Starting evaluation run...")
-    # Get profile info *inside* the function - this avoids the TypeError
-    # Note: This requires the user to be logged in via the button *before* clicking Run.
-    try:
-        # This method of getting profile might need adjustment depending on Gradio version/context
-        # Placeholder: Assuming we can get username some other way if direct profile access fails.
-        # For now, let's hardcode or retrieve differently if `gr.OAuthProfile()` isn't available here.
-        # Let's proceed without username for now if OAuthProfile is problematic.
-        # A better approach might involve JavaScript interaction or different Gradio auth flow.
-        username = os.getenv("HF_USERNAME", "unknown_user") # Fallback to env var or default
-        if username == "unknown_user":
-             logger.warning("Could not determine Hugging Face username reliably. Using fallback.")
-        # Alternative: Could try reading from OAuth info if available in request context (advanced)
-    except Exception as auth_e:
-         logger.error(f"Could not get user profile: {auth_e}. Using fallback username.")
-         username = "unknown_user_error"
     logger.info(f"Running as user (best effort): {username}")
     # 1. Fetch Questions
-    questions = []
     try:
         resp = requests.get(f"{SUBMISSION_URL}/questions", timeout=20)
         resp.raise_for_status()
@@ -186,91 +180,58 @@ def evaluate_and_submit():
         logger.info(f"✅ Fetched {len(questions)} questions.")
     except Exception as e:
         logger.exception("Failed to fetch questions")
-        return f"❌ Error fetching questions: {e}", pd.DataFrame() # Return empty DF on fetch error
-    if not questions:
-        return "ℹ️ No questions fetched.", pd.DataFrame()
     # 2. Run Agent & Collect Results
     results_log = []
     answers_payload = []
     for i, item in enumerate(questions):
-        task_id = item.get("task_id")
-        question_text = item.get("question")
         if not task_id or not question_text: continue
         logger.info(f"Processing Q {i+1}/{len(questions)} (ID: {task_id})...")
         raw_agent_output = run_agent_on_question(question_text)
-        final_answer = "AGENT_ERROR: No 'FINAL ANSWER:' marker." # Default
-        marker = "FINAL ANSWER:"
-        if marker in raw_agent_output:
-            final_answer = raw_agent_output.split(marker, 1)[1].strip()
-        elif "AGENT_ERROR:" in raw_agent_output:
-            final_answer = raw_agent_output # Submit the error
-        results_log.append({
-            "Task ID": task_id, "Question": question_text,
-            "Submitted Answer": final_answer, "Full Output": raw_agent_output
-        })
         answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})
     results_df = pd.DataFrame(results_log)
-    if not answers_payload:
-        return "⚠️ Agent ran but produced no answers.", results_df
     # 3. Submit Answers
     logger.info(f"Submitting {len(answers_payload)} answers...")
-    space_id = os.getenv("SPACE_ID", "SPACE_ID_NOT_SET")
-    agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main" if "NOT_SET" not in space_id else "URL_NA"
     submit_data = {"username": username, "agent_code": agent_code_url, "answers": answers_payload}
     try:
         response = requests.post(f"{SUBMISSION_URL}/submit", json=submit_data, timeout=90)
-        response.raise_for_status()
-        result = response.json()
         logger.info(f"✅ Submission successful! Response: {result}")
-        score = result.get('score', 'N/A')
-        score_str = f"{float(score):.2f}%" if isinstance(score, (int, float)) else str(score)
-        status = (f"✅ Success! Score: {score_str} "
-                  f"({result.get('correct_count','?')}/{result.get('total_attempted','?')}). "
-                  f"Msg: {result.get('message','')}")
         return status, results_df
     except Exception as e:
         logger.exception("Submission failed")
         err_msg = f"❌ Submission Failed: {e}"
-        if hasattr(e, 'response') and e.response is not None:
-            err_msg += f" | Response: {e.response.text[:300]}"
         return err_msg, results_df
 # --- Build Gradio App ---
 logger.info("Setting up Gradio interface...")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🚀 Agent Evaluation Runner 🚀")
-    gr.Markdown("Ensure `GITHUB_TOKEN` secret is set. Click Run to start.")
-    # Removed LoginButton to simplify and avoid TypeError for now
-    # gr.LoginButton()
     run_button = gr.Button("▶️ Run Evaluation & Submit All Answers", variant="primary")
     status_textbox = gr.Textbox(label="📊 Status", lines=4, interactive=False)
-    results_df_display = gr.DataFrame(
-        label="📋 Detailed Log",
-        headers=["Task ID", "Question", "Submitted Answer", "Full Output"],
-        wrap=True, column_widths=["10%", "25%", "20%", "45%"]
-    )
-    # Connect button click to the function WITHOUT inputs arg for now
-    run_button.click(
-        fn=evaluate_and_submit,
-        inputs=None, # No direct inputs from UI components
-        outputs=[status_textbox, results_df_display]
-    )
 logger.info("Gradio interface setup complete.")
 # --- Launch ---
 if __name__ == "__main__":
     logger.info("Launching Gradio application...")
-    demo.launch(debug=True, share=False) # share=False is fine for HF Spaces internally
     logger.info("Gradio application launched.")

     logger.error(f"Failed to instantiate DuckDuckGoSearchTool: {e}. Web search will not work.")
     search_tool_instance = None
+# *** FIX: Added Args description to docstrings ***
 @tool
 def web_search(query: str) -> str:
+    """
+    Performs a web search using DuckDuckGo. Use this for general questions or current info.
+    Args:
+        query (str): The search query string.
+    """
     logger.info(f"Executing web_search with query: '{query[:100]}...'")
     if search_tool_instance is None:
         return "Search Error: Tool not initialized."
         logger.exception(f"web_search failed for query: {query}")
         return f"Search Error: {e}"
+# *** FIX: Added Args description to docstrings ***
 @tool
 def wikipedia_lookup(page_title: str) -> str:
+    """
+    Fetches the summary intro text of an English Wikipedia page. Use exact titles.
+    Args:
+        page_title (str): The exact title of the Wikipedia page (e.g., 'Albert Einstein').
+    """
     page_safe = page_title.replace(" ", "_")
     logger.info(f"Executing wikipedia_lookup for page: '{page_title}' (URL: {page_safe})")
     try:
         url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_safe}"
+        headers = {'User-Agent': f'GAIAgent/1.2 ({os.getenv("SPACE_ID", "unknown")})'}
         r = requests.get(url, headers=headers, timeout=15)
         r.raise_for_status()
         data = r.json()
         extract = data.get("extract", "")
         if extract:
+            logger.info(f"Wikipedia found summary ({len(extract)} chars) for '{page_title}'.")
             return extract
         else:
             page_type = data.get("type", "standard")
             title = data.get("title", page_title)
             if page_type == "disambiguation":
+                logger.warning(f"Wikipedia page '{title}' is disambiguation.")
                 return f"Wikipedia Error: '{title}' is a disambiguation page. Try a more specific title."
             else:
+                 logger.warning(f"Wikipedia page '{title}' found but has no summary.")
                  return f"Wikipedia Error: Page '{title}' found but has no summary."
     except requests.exceptions.HTTPError as e:
         if e.response.status_code == 404:
+            logger.warning(f"Wikipedia page not found: {page_safe}")
             return f"Wikipedia Error: Page '{page_safe}' not found."
         else:
+            logger.error(f"Wikipedia HTTP error {e.response.status_code} for {page_safe}")
             return f"Wikipedia Error: HTTP {e.response.status_code} for page '{page_safe}'."
     except Exception as e:
         logger.exception(f"wikipedia_lookup failed for page: {page_safe}")
         return f"Wikipedia Error: Unexpected error: {e}"
 # --- The ReACT Prompt (ensure this is the *only* main prompt definition) ---
+REACT_INSTRUCTION_PROMPT = """You are a helpful assistant using tools to answer questions.
 Available Tools:
+- web_search(query: str): Searches the web. Use for general info or current events.
+- wikipedia_lookup(page_title: str): Looks up a specific English Wikipedia page. Use exact titles (e.g., 'Berlin').
 Follow these steps:
+1. Thought: Plan which tool to use and why.
 2. Action: Call ONE tool (e.g., web_search(query="...") or wikipedia_lookup(page_title="...")).
 3. Observation: Record the result.
+4. Thought: Analyze result. If answered, prepare final answer. If not, plan next step.
+5. Repeat Action/Observation/Thought until answered or determined impossible.
 6. Thought: Summarize findings based ONLY on observations.
 7. Final Answer: Provide the answer starting exactly with "FINAL ANSWER: " using the required format (number, short string, or comma-separated list).
 logger.info("Initializing CodeAgent...")
 try:
     agent = CodeAgent(
+        tools=[web_search, wikipedia_lookup], # Pass the functions decorated with @tool
         model=llm_model
     )
     logger.info("CodeAgent initialized OK.")
 def run_agent_on_question(question: str) -> str:
     """Runs the agent with the CORRECT prompt."""
     question = question.strip()
+    if not question: return "AGENT_ERROR: Question cannot be empty."
     # *** CRITICAL: Construct the prompt HERE using the correct variable ***
     full_prompt = REACT_INSTRUCTION_PROMPT.strip() + "\n\nQUESTION: " + question
     logger.info(f"--- Running Agent for Question: '{question}' ---")
+    # *** Add more prominent logging to verify the prompt ***
+    logger.info(f"CRITICAL_DEBUG: Using prompt beginning:\n{full_prompt[:400]}\n...") # Log first 400 chars
     try:
+        raw_result = agent.run(full_prompt)
         logger.info(f"Agent run completed. Output length: {len(raw_result)}")
         return raw_result
     except Exception as e:
         return f"AGENT_ERROR: Exception during run: {e}\n{traceback.format_exc()}"
 # --- Gradio Interface & Submission Logic ---
+# Using the version without direct profile input to avoid potential TypeErrors
 def evaluate_and_submit():
     """Gradio action: Fetches questions, runs agent, submits results."""
     logger.info("🚀 Starting evaluation run...")
+    username = os.getenv("HF_USERNAME", "unknown_user") # Fallback username
+    if username == "unknown_user": logger.warning("Could not get HF username reliably.")
     logger.info(f"Running as user (best effort): {username}")
     # 1. Fetch Questions
     try:
         resp = requests.get(f"{SUBMISSION_URL}/questions", timeout=20)
         resp.raise_for_status()
         logger.info(f"✅ Fetched {len(questions)} questions.")
     except Exception as e:
         logger.exception("Failed to fetch questions")
+        return f"❌ Error fetching questions: {e}", pd.DataFrame()
+    if not questions: return "ℹ️ No questions fetched.", pd.DataFrame()
     # 2. Run Agent & Collect Results
     results_log = []
     answers_payload = []
     for i, item in enumerate(questions):
+        task_id = item.get("task_id"); question_text = item.get("question")
         if not task_id or not question_text: continue
         logger.info(f"Processing Q {i+1}/{len(questions)} (ID: {task_id})...")
         raw_agent_output = run_agent_on_question(question_text)
+        final_answer = "AGENT_ERROR: No 'FINAL ANSWER:' marker."
+        marker = "FINAL ANSWER:";
+        if marker in raw_agent_output: final_answer = raw_agent_output.split(marker, 1)[1].strip()
+        elif "AGENT_ERROR:" in raw_agent_output: final_answer = raw_agent_output
+        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer, "Full Output": raw_agent_output})
         answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})
     results_df = pd.DataFrame(results_log)
+    if not answers_payload: return "⚠️ Agent ran but produced no answers.", results_df
     # 3. Submit Answers
     logger.info(f"Submitting {len(answers_payload)} answers...")
+    space_id = os.getenv("SPACE_ID", "NA"); agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id != "NA" else "NA"
     submit_data = {"username": username, "agent_code": agent_code_url, "answers": answers_payload}
     try:
         response = requests.post(f"{SUBMISSION_URL}/submit", json=submit_data, timeout=90)
+        response.raise_for_status(); result = response.json()
         logger.info(f"✅ Submission successful! Response: {result}")
+        score = result.get('score', 'N/A'); score_str = f"{float(score):.2f}%" if isinstance(score, (int, float)) else str(score)
+        status = (f"✅ Success! Score: {score_str} ({result.get('correct_count','?')}/{result.get('total_attempted','?')}). Msg: {result.get('message','')}")
         return status, results_df
     except Exception as e:
         logger.exception("Submission failed")
         err_msg = f"❌ Submission Failed: {e}"
+        if hasattr(e, 'response') and e.response is not None: err_msg += f" | Response: {e.response.text[:300]}"
         return err_msg, results_df
 # --- Build Gradio App ---
 logger.info("Setting up Gradio interface...")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 Agent Evaluation Runner 🚀\nEnsure `GITHUB_TOKEN` secret is set. Click Run to start.")
     run_button = gr.Button("▶️ Run Evaluation & Submit All Answers", variant="primary")
     status_textbox = gr.Textbox(label="📊 Status", lines=4, interactive=False)
+    results_df_display = gr.DataFrame(label="📋 Detailed Log", headers=["Task ID", "Question", "Submitted Answer", "Full Output"], wrap=True, column_widths=["10%", "25%", "20%", "45%"])
+    run_button.click(fn=evaluate_and_submit, inputs=None, outputs=[status_textbox, results_df_display])
 logger.info("Gradio interface setup complete.")
 # --- Launch ---
 if __name__ == "__main__":
     logger.info("Launching Gradio application...")
+    # Setting share=False as recommended for HF Spaces, debug=True for detailed Gradio logs
+    demo.launch(debug=True, share=False)
     logger.info("Gradio application launched.")