Final_Assignment_Template

Sleeping

App Files Files Community

pmeyhoefer commited on May 3, 2025

Commit

d7730f0

verified ·

1 Parent(s): a0349ea

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -97

app.py CHANGED Viewed

@@ -4,13 +4,14 @@ import traceback
 import gradio as gr
 import requests
 import pandas as pd
-from openai import OpenAI
 from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
 from smolagents.models import OpenAIServerModel
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 logger = logging.getLogger(__name__)
 SUBMISSION_URL = "https://agents-course-unit4-scoring.hf.space"
 GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
 if not GITHUB_TOKEN:
@@ -18,71 +19,53 @@ if not GITHUB_TOKEN:
 GITHUB_ENDPOINT = "https://models.github.ai/inference"
 MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini")
 try:
     search_tool_instance = DuckDuckGoSearchTool()
     logger.info("DuckDuckGoSearchTool initialized successfully.")
 except Exception as e:
-    logger.error(f"Failed to instantiate DuckDuckGoSearchTool: {e}. Web search will not work.")
     search_tool_instance = None
 @tool
 def web_search(query: str) -> str:
-    """
-    Performs a web search using DuckDuckGo. Use this for general questions or current info.
-    Args:
-        query (str): The search query string.
-    """
-    logger.info(f"Executing web_search with query: '{query[:100]}...'")
     if search_tool_instance is None:
         return "Search Error: Tool not initialized."
     try:
         result = search_tool_instance(query=query)
-        logger.info(f"web_search returned {len(result)} chars.")
-        max_len = 3000
-        return result[:max_len] + "... (truncated)" if len(result) > max_len else result
     except Exception as e:
-        logger.exception(f"web_search failed for query: {query}")
         return f"Search Error: {e}"
 @tool
 def wikipedia_lookup(page_title: str) -> str:
-    """
-    Fetches the summary intro text of an English Wikipedia page. Use exact titles.
-    Args:
-        page_title (str): The exact title of the Wikipedia page (e.g., 'Albert Einstein').
-    """
     page_safe = page_title.replace(" ", "_")
-    logger.info(f"Executing wikipedia_lookup for page: '{page_title}' (URL: {page_safe})")
     try:
         url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_safe}"
         headers = {'User-Agent': f'GAIAgent/1.2 ({os.getenv("SPACE_ID", "unknown")})'}
         r = requests.get(url, headers=headers, timeout=15)
         r.raise_for_status()
         data = r.json()
-        extract = data.get("extract", "")
-        if extract:
-            logger.info(f"Wikipedia found summary ({len(extract)} chars) for '{page_title}'.")
             return extract
-        else:
-            page_type = data.get("type", "standard")
-            title = data.get("title", page_title)
-            if page_type == "disambiguation":
-                logger.warning(f"Wikipedia page '{title}' is disambiguation.")
-                return f"Wikipedia Error: '{title}' is a disambiguation page. Try a more specific title."
-            else:
-                 logger.warning(f"Wikipedia page '{title}' found but has no summary.")
-                 return f"Wikipedia Error: Page '{title}' found but has no summary."
     except requests.exceptions.HTTPError as e:
-        if e.response.status_code == 404:
-            logger.warning(f"Wikipedia page not found: {page_safe}")
-            return f"Wikipedia Error: Page '{page_safe}' not found."
-        else:
-            logger.error(f"Wikipedia HTTP error {e.response.status_code} for {page_safe}")
-            return f"Wikipedia Error: HTTP {e.response.status_code} for page '{page_safe}'."
     except Exception as e:
-        logger.exception(f"wikipedia_lookup failed for page: {page_safe}")
-        return f"Wikipedia Error: Unexpected error: {e}"
 REACT_INSTRUCTION_PROMPT = """You are a helpful assistant using tools to answer questions.
 Available Tools:
 - web_search(query: str): Searches the web. Use for general info or current events.
@@ -102,108 +85,123 @@ Formatting Rules for FINAL ANSWER:
 Let's begin!
 """
-logger.info(f"Initializing LLM connection: {MODEL_ID} @ {GITHUB_ENDPOINT}")
 try:
-    logger.info("Attempting to configure OpenAIServerModel with 'api_base' (and no request_timeout)...")
     llm_model = OpenAIServerModel(
         model_id=MODEL_ID,
         api_key=GITHUB_TOKEN,
         api_base=GITHUB_ENDPOINT
-        # Removed request_timeout=60
     )
-    logger.info("LLM connection configured using 'api_base'.")
-except Exception as e:
-    logger.exception("CRITICAL: Failed to configure OpenAIServerModel (tried with api_base)")
-    raise RuntimeError(f"Could not configure SmolAgents model using api_base: {e}") from e
-logger.info("Initializing CodeAgent...")
-try:
     agent = CodeAgent(
         tools=[web_search, wikipedia_lookup],
         model=llm_model
     )
-    logger.info("CodeAgent initialized OK.")
 except Exception as e:
-    logger.exception("CRITICAL: Failed to initialize CodeAgent")
-    raise RuntimeError(f"Could not initialize CodeAgent: {e}") from e
 def run_agent_on_question(question: str) -> str:
     question = question.strip()
-    if not question: return "AGENT_ERROR: Question cannot be empty."
-    full_prompt = REACT_INSTRUCTION_PROMPT.strip() + "\n\nQUESTION: " + question
-    logger.info(f"--- Running Agent for Question: '{question}' ---")
-    logger.info(f"CRITICAL_DEBUG: Using prompt beginning:\n{full_prompt[:400]}\n...")
     try:
-        raw_result = agent.run(full_prompt)
-        logger.info(f"Agent run completed. Output length: {len(raw_result)}")
-        return raw_result
     except Exception as e:
-        logger.exception(f"Agent run failed for question '{question}'")
-        return f"AGENT_ERROR: Exception during run: {e}\n{traceback.format_exc()}"
 def evaluate_and_submit():
-    logger.info("🚀 Starting evaluation run...")
     username = os.getenv("HF_USERNAME", "unknown_user")
-    if username == "unknown_user": logger.warning("Could not get HF username reliably.")
-    logger.info(f"Running as user (best effort): {username}")
     try:
-        resp = requests.get(f"{SUBMISSION_URL}/questions", timeout=20)
-        resp.raise_for_status()
-        questions = resp.json()
-        if not isinstance(questions, list): raise ValueError("Invalid format")
-        logger.info(f"✅ Fetched {len(questions)} questions.")
     except Exception as e:
         logger.exception("Failed to fetch questions")
         return f"❌ Error fetching questions: {e}", pd.DataFrame()
-    if not questions: return "ℹ️ No questions fetched.", pd.DataFrame()
     results_log = []
     answers_payload = []
     for i, item in enumerate(questions):
-        task_id = item.get("task_id"); question_text = item.get("question")
-        if not task_id or not question_text: continue
-        logger.info(f"Processing Q {i+1}/{len(questions)} (ID: {task_id})...")
-        raw_agent_output = run_agent_on_question(question_text)
-        final_answer = "AGENT_ERROR: No 'FINAL ANSWER:' marker."
-        marker = "FINAL ANSWER:";
-        if marker in raw_agent_output: final_answer = raw_agent_output.split(marker, 1)[1].strip()
-        elif "AGENT_ERROR:" in raw_agent_output: final_answer = raw_agent_output
-        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer, "Full Output": raw_agent_output})
         answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})
     results_df = pd.DataFrame(results_log)
-    if not answers_payload: return "⚠️ Agent ran but produced no answers.", results_df
     logger.info(f"Submitting {len(answers_payload)} answers...")
-    space_id = os.getenv("SPACE_ID", "NA"); agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id != "NA" else "NA"
-    submit_data = {"username": username, "agent_code": agent_code_url, "answers": answers_payload}
     try:
-        response = requests.post(f"{SUBMISSION_URL}/submit", json=submit_data, timeout=90)
-        response.raise_for_status(); result = response.json()
-        logger.info(f"✅ Submission successful! Response: {result}")
-        score = result.get('score', 'N/A'); score_str = f"{float(score):.2f}%" if isinstance(score, (int, float)) else str(score)
-        status = (f"✅ Success! Score: {score_str} ({result.get('correct_count','?')}/{result.get('total_attempted','?')}). Msg: {result.get('message','')}")
-        return status, results_df
     except Exception as e:
-        logger.exception("Submission failed")
         err_msg = f"❌ Submission Failed: {e}"
-        if hasattr(e, 'response') and e.response is not None: err_msg += f" | Response: {e.response.text[:300]}"
         return err_msg, results_df
-logger.info("Setting up Gradio interface...")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🚀 Agent Evaluation Runner 🚀\nEnsure `GITHUB_TOKEN` secret is set. Click Run to start.")
     run_button = gr.Button("▶️ Run Evaluation & Submit All Answers", variant="primary")
-    status_textbox = gr.Textbox(label="📊 Status", lines=4, interactive=False)
-    results_df_display = gr.DataFrame(label="📋 Detailed Log", headers=["Task ID", "Question", "Submitted Answer", "Full Output"], wrap=True, column_widths=["10%", "25%", "20%", "45%"])
-    run_button.click(fn=evaluate_and_submit, inputs=None, outputs=[status_textbox, results_df_display])
-logger.info("Gradio interface setup complete.")
 if __name__ == "__main__":
     logger.info("Launching Gradio application...")
     demo.launch(debug=True, share=False)
-    logger.info("Gradio application launched.") __name__ == "__main__": logger.info("Launching Gradio application...") demo.launch(debug=True, share=False) logger.info("Gradio application launched.")

 import gradio as gr
 import requests
 import pandas as pd
 from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
 from smolagents.models import OpenAIServerModel
+# Setup logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 logger = logging.getLogger(__name__)
+# Constants
 SUBMISSION_URL = "https://agents-course-unit4-scoring.hf.space"
 GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
 if not GITHUB_TOKEN:
 GITHUB_ENDPOINT = "https://models.github.ai/inference"
 MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini")
+# Initialize search tool
 try:
     search_tool_instance = DuckDuckGoSearchTool()
     logger.info("DuckDuckGoSearchTool initialized successfully.")
 except Exception as e:
+    logger.error(f"Failed to initialize DuckDuckGoSearchTool: {e}")
     search_tool_instance = None
 @tool
 def web_search(query: str) -> str:
+    """Performs a web search using DuckDuckGo."""
+    logger.info(f"Searching: '{query[:50]}...'")
     if search_tool_instance is None:
         return "Search Error: Tool not initialized."
     try:
         result = search_tool_instance(query=query)
+        return result[:3000] + "... (truncated)" if len(result) > 3000 else result
     except Exception as e:
+        logger.exception("Search failed")
         return f"Search Error: {e}"
 @tool
 def wikipedia_lookup(page_title: str) -> str:
+    """Fetches the summary intro text of an English Wikipedia page."""
     page_safe = page_title.replace(" ", "_")
+    logger.info(f"Wikipedia lookup: '{page_title}'")
     try:
         url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_safe}"
         headers = {'User-Agent': f'GAIAgent/1.2 ({os.getenv("SPACE_ID", "unknown")})'}
         r = requests.get(url, headers=headers, timeout=15)
         r.raise_for_status()
         data = r.json()
+        if extract := data.get("extract", ""):
             return extract
+        title = data.get("title", page_title)
+        if data.get("type") == "disambiguation":
+            return f"Wikipedia Error: '{title}' is a disambiguation page. Try a more specific title."
+        return f"Wikipedia Error: Page '{title}' found but has no summary."
     except requests.exceptions.HTTPError as e:
+        status_code = e.response.status_code
+        return f"Wikipedia Error: {'Page not found' if status_code == 404 else f'HTTP {status_code}'} for '{page_title}'."
     except Exception as e:
+        return f"Wikipedia Error: {e}"
+# Agent prompt
 REACT_INSTRUCTION_PROMPT = """You are a helpful assistant using tools to answer questions.
 Available Tools:
 - web_search(query: str): Searches the web. Use for general info or current events.
 Let's begin!
 """
+# Initialize LLM and agent
+logger.info(f"Initializing LLM and agent: {MODEL_ID}")
 try:
     llm_model = OpenAIServerModel(
         model_id=MODEL_ID,
         api_key=GITHUB_TOKEN,
         api_base=GITHUB_ENDPOINT
     )
     agent = CodeAgent(
         tools=[web_search, wikipedia_lookup],
         model=llm_model
     )
+    logger.info("Agent initialization complete")
 except Exception as e:
+    logger.exception("CRITICAL: Agent initialization failed")
+    raise RuntimeError(f"Agent initialization failed: {e}") from e
 def run_agent_on_question(question: str) -> str:
+    """Run the agent on a question and return the result."""
     question = question.strip()
+    if not question:
+        return "AGENT_ERROR: Empty question"
+    logger.info(f"Running agent on: '{question}'")
     try:
+        return agent.run(f"{REACT_INSTRUCTION_PROMPT.strip()}\n\nQUESTION: {question}")
     except Exception as e:
+        logger.exception("Agent run failed")
+        return f"AGENT_ERROR: {e}\n{traceback.format_exc()}"
 def evaluate_and_submit():
+    """Evaluate all questions and submit answers."""
+    logger.info("🚀 Starting evaluation...")
     username = os.getenv("HF_USERNAME", "unknown_user")
+    # Fetch questions
     try:
+        questions = requests.get(f"{SUBMISSION_URL}/questions", timeout=20).json()
+        if not isinstance(questions, list):
+            raise ValueError("Invalid response format")
+        logger.info(f"✅ Fetched {len(questions)} questions")
     except Exception as e:
         logger.exception("Failed to fetch questions")
         return f"❌ Error fetching questions: {e}", pd.DataFrame()
+    if not questions:
+        return "ℹ️ No questions received", pd.DataFrame()
+    # Process questions
     results_log = []
     answers_payload = []
     for i, item in enumerate(questions):
+        task_id, question_text = item.get("task_id"), item.get("question")
+        if not task_id or not question_text:
+            continue
+        logger.info(f"Processing Q{i+1}/{len(questions)}: ID={task_id}")
+        raw_output = run_agent_on_question(question_text)
+        # Extract final answer
+        if "FINAL ANSWER:" in raw_output:
+            final_answer = raw_output.split("FINAL ANSWER:", 1)[1].strip()
+        elif "AGENT_ERROR:" in raw_output:
+            final_answer = raw_output
+        else:
+            final_answer = "AGENT_ERROR: No final answer found"
+        results_log.append({
+            "Task ID": task_id,
+            "Question": question_text,
+            "Submitted Answer": final_answer,
+            "Full Output": raw_output
+        })
         answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})
     results_df = pd.DataFrame(results_log)
+    if not answers_payload:
+        return "⚠️ No answers generated", results_df
+    # Submit answers
     logger.info(f"Submitting {len(answers_payload)} answers...")
+    space_id = os.getenv("SPACE_ID", "NA")
+    agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id != "NA" else "NA"
     try:
+        response = requests.post(
+            f"{SUBMISSION_URL}/submit",
+            json={"username": username, "agent_code": agent_code_url, "answers": answers_payload},
+            timeout=90
+        ).json()
+        score = response.get('score', 'N/A')
+        score_str = f"{float(score):.2f}%" if isinstance(score, (int, float)) else str(score)
+        return (f"✅ Success! Score: {score_str} "
+                f"({response.get('correct_count','?')}/{response.get('total_attempted','?')}). "
+                f"Msg: {response.get('message','')}"), results_df
     except Exception as e:
         err_msg = f"❌ Submission Failed: {e}"
+        if hasattr(e, 'response') and e.response:
+            err_msg += f" | Response: {e.response.text[:300]}"
         return err_msg, results_df
+# Gradio interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🚀 Agent Evaluation Runner 🚀\nEnsure `GITHUB_TOKEN` secret is set. Click Run to start.")
     run_button = gr.Button("▶️ Run Evaluation & Submit All Answers", variant="primary")
+    status_box = gr.Textbox(label="📊 Status", lines=4, interactive=False)
+    results_display = gr.DataFrame(
+        label="📋 Detailed Log",
+        headers=["Task ID", "Question", "Submitted Answer", "Full Output"],
+        wrap=True,
+        column_widths=["10%", "25%", "20%", "45%"]
+    )
+    run_button.click(fn=evaluate_and_submit, outputs=[status_box, results_display])
 if __name__ == "__main__":
     logger.info("Launching Gradio application...")
     demo.launch(debug=True, share=False)