Final_Assignment_Template

Sleeping

App Files Files Community

Snaseem2026 commited on Jan 6

Commit

83c2752

verified ·

1 Parent(s): 4b28196

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -301

app.py CHANGED Viewed

@@ -1,322 +1,150 @@
-from smolagents import CodeAgent, HfApiModel, tool, DuckDuckGoSearchTool
-import requests
 import gradio as gr
-# Tools Definition
-search_tool = DuckDuckGoSearchTool()
-@tool
-def calculator(expression: str) -> str:
-    """Evaluates mathematical expressions safely.
-    Args:
-        expression: A mathematical expression like '2+2', '15*23', or '100/4'
-    Returns:
-        The calculated result as a string
     """
-    try:
-        result = eval(expression, {"__builtins__": {}}, {})
-        return f"{result}"
-    except Exception as e:
-        return f"Error calculating: {str(e)}"
-@tool
-def get_question_file(task_id: str) -> str:
-    """Downloads and reads a file associated with a GAIA question.
-    Args:
-        task_id: The task ID from the question
-    Returns:
-        The file content or error message
     """
-    try:
-        url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
-        response = requests.get(url, timeout=30)
-        if response.status_code == 200:
-            content = response.text[:1000]
-            return f"File content (first 1000 chars):\n{content}"
-        else:
-            return f"Could not fetch file. Status code: {response.status_code}"
     except Exception as e:
-        return f"Error fetching file: {str(e)}"
-@tool
-def final_answer(answer: str) -> str:
-    """Returns the final answer to the question.
-    IMPORTANT: Use this ONLY ONCE when you have the exact answer.
-    The answer should be precise, concise, and exactly formatted.
-    Args:
-        answer: The exact answer with no extra text or explanation
-    Returns:
-        The answer
-    """
-    return answer.strip()
-def clean_answer(raw_answer: str) -> str:
-    """Cleans the agent's response to extract the exact answer."""
-    if not raw_answer:
-        return ""
-    answer = str(raw_answer).strip()
-    prefixes_to_remove = [
-        "the answer is",
-        "the result is",
-        "final answer:",
-        "answer:",
-        "final_answer:",
-        "result:",
-        "output:",
-    ]
-    answer_lower = answer.lower()
-    for prefix in prefixes_to_remove:
-        if answer_lower.startswith(prefix):
-            answer = answer[len(prefix):].strip()
-            break
-    answer = answer.strip('"\'')
-    if answer.endswith('.') and not answer[-2].isdigit():
-        answer = answer[:-1]
-    return answer
-model = HfApiModel(
-    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
-    max_tokens=4096,
-    temperature=0.1,
-)
-system_prompt = """You are a precise AI assistant solving GAIA benchmark questions.
-CRITICAL RULES:
-1. Give EXACT answers ONLY - no explanations, no preamble
-2. Format matters: check if answer should be a number, name, date, etc.
-3. For numbers: give just the number (e.g., "42" not "The answer is 42")
-4. For names: use proper capitalization as commonly written
-5. For lists: follow exact format requested (comma-separated, etc.)
-6. Use tools efficiently - web_search for facts, calculator for math
-7. When you have the final answer, use the final_answer tool ONCE
-8. Double-check your answer before using final_answer tool
-EXAMPLES OF CORRECT ANSWERS:
-- Question: "What is 15% of 200?" Answer: "30"
-- Question: "Who founded Microsoft?" Answer: "Bill Gates"
-- Question: "What year was Python released?" Answer: "1991"
-Remember: EXACT MATCH scoring. Close doesn't count!"""
-agent = CodeAgent(
-    model=model,
-    tools=[search_tool, calculator, get_question_file, final_answer],
-    max_steps=12,
-    verbosity_level=2,
-    additional_authorized_imports=["requests", "json"],
-)
-def process_single_question(question_data, progress_callback=None):
-    """Process a single GAIA question"""
-    task_id = question_data['task_id']
-    question_text = question_data['Question']
-    has_file = 'file_name' in question_data and question_data['file_name']
-    prompt = f"""{system_prompt}
-Question: {question_text}
-{f"NOTE: This question has an attached file. Use get_question_file('{task_id}') to access it." if has_file else ""}
-Instructions:
-1. Analyze the question carefully
-2. Use tools as needed (web_search, calculator, get_question_file)
-3. When you have the exact answer, use final_answer(your_answer)
-4. Remember: ONLY the answer, nothing else!
-Now solve this question."""
-    if progress_callback:
-        progress_callback(f"Processing: {question_text[:100]}...")
     try:
-        result = agent.run(prompt)
-        cleaned = clean_answer(str(result))
-        return {
-            "task_id": task_id,
-            "submitted_answer": cleaned,
-            "raw_answer": str(result),
-            "question": question_text[:100]
-        }
     except Exception as e:
-        print(f"Error on task {task_id}: {e}")
-        return {
-            "task_id": task_id,
-            "submitted_answer": "Error",
-            "error": str(e),
-            "question": question_text[:100]
-        }
-def run_full_evaluation(username, progress=gr.Progress()):
-    """Fetches all questions, runs agent on each, and submits to the API"""
-    if not username or username.strip() == "":
-        return {"error": "Please provide your Hugging Face username"}
     try:
-        progress(0, desc="Fetching questions from API...")
-        response = requests.get(
-            "https://agents-course-unit4-scoring.hf.space/questions",
-            timeout=30
-        )
-        questions = response.json()
-        total_questions = len(questions)
-        progress(0.1, desc=f"Got {total_questions} questions. Starting evaluation...")
-        all_answers = []
-        results_log = []
-        for idx, question in enumerate(questions):
-            progress((idx + 1) / total_questions,
-                    desc=f"Processing question {idx + 1}/{total_questions}")
-            result = process_single_question(question)
-            all_answers.append({
-                "task_id": result["task_id"],
-                "submitted_answer": result["submitted_answer"]
-            })
-            results_log.append(result)
-            print(f"\nQuestion {idx + 1}: {result['question']}")
-            print(f"Answer: {result['submitted_answer']}\n")
-        progress(0.95, desc="Submitting answers to scoring API...")
-        submission_data = {
-            "username": username.strip(),
-            "agent_code": "https://huggingface.co/spaces/Snaseem2026/Final_Assignment_Template/tree/main",
-            "answers": all_answers
         }
-        submit_response = requests.post(
-            "https://agents-course-unit4-scoring.hf.space/submit",
-            json=submission_data,
-            timeout=60
-        )
-        if submit_response.status_code == 200:
-            result_data = submit_response.json()
-            progress(1.0, desc="Submission complete!")
-            return {
-                "status": "Success!",
-                "score": result_data.get("score", "N/A"),
-                "total_questions": total_questions,
-                "submission_details": result_data,
-                "sample_answers": results_log[:5]
-            }
-        else:
-            return {
-                "status": "Submission failed",
-                "error": submit_response.text,
-                "sample_answers": results_log[:5]
-            }
-    except Exception as e:
-        return {
-            "status": "Error",
-            "error": str(e)
-        }
-def test_single_question(progress=gr.Progress()):
-    """Test the agent on one random question"""
-    try:
-        progress(0.3, desc="Fetching random question...")
-        response = requests.get(
-            "https://agents-course-unit4-scoring.hf.space/random-question",
-            timeout=30
-        )
-        question = response.json()
-        progress(0.5, desc="Running agent...")
-        result = process_single_question(question)
-        progress(1.0, desc="Complete!")
-        return {
-            "question": question['Question'],
-            "task_id": result['task_id'],
-            "agent_answer": result['submitted_answer'],
-            "raw_output": result.get('raw_answer', 'N/A')
-        }
     except Exception as e:
-        return {"error": str(e)}
-with gr.Blocks(title="GAIA Agent Evaluator") as demo:
-    gr.Markdown("""
-    # GAIA Benchmark Agent - Final Assignment
-    This agent solves GAIA Level 1 questions using reasoning, web search, and calculation tools.
-    **Target Score:** 30% or higher (6/20 questions) to pass
-    ### How to use:
-    1. **Test Mode**: Click "Test on Random Question" to see how your agent performs
-    2. **Full Evaluation**: Enter your HF username and run full evaluation on all 20 questions
-    3. **Submit**: Results automatically submitted to the leaderboard
-    """)
-    with gr.Tab("Test Mode"):
-        gr.Markdown("### Test your agent on a single random question")
-        test_button = gr.Button("Test on Random Question", variant="primary")
-        test_output = gr.JSON(label="Test Results")
-        test_button.click(fn=test_single_question, outputs=test_output)
-    with gr.Tab("Full Evaluation"):
-        gr.Markdown("### Run complete evaluation and submit to leaderboard")
-        username_input = gr.Textbox(
-            label="Your Hugging Face Username",
-            placeholder="e.g., Snaseem2026",
-            info="Required for leaderboard submission"
-        )
-        submit_button = gr.Button("Run Full Evaluation & Submit", variant="primary", size="lg")
-        gr.Markdown("""
-        This will take 10-20 minutes to process all 20 questions.
-        """)
-        results_output = gr.JSON(label="Evaluation Results")
-        submit_button.click(
-            fn=run_full_evaluation,
-            inputs=username_input,
-            outputs=results_output
-        )
-    with gr.Tab("About"):
-        gr.Markdown("""
-        ### Tools Available:
-        - Web Search (DuckDuckGo): For finding current information
-        - Calculator: For mathematical calculations
-        - File Reader: For questions with attachments
-        - Final Answer: Returns the exact answer
-        ### Tips for Better Scores:
-        1. Answers must be EXACT MATCH (case-sensitive)
-        2. No extra text - just the answer
-        3. Format matters (numbers vs words vs dates)
-        4. Test on random questions first before full evaluation
-        """)
-demo.launch()

+import os
 import gradio as gr
+import requests
+import inspect
+import pandas as pd
+from smolagents import CodeAgent, HfApiModel
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring. hf.space"
+# --- Agent Definition ---
+class BasicAgent:
+    def __init__(self):
+        print("BasicAgent initialized.")
+        # Initialize the model and agent here
+        try:
+            model = HfApiModel()
+            self.agent = CodeAgent(tools=[], model=model, max_steps=4)
+        except Exception as e:
+            print(f"Error initializing agent: {e}")
+            self.agent = None
+    def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        if self.agent is None:
+            return "Agent failed to initialize properly."
+        try:
+            # Run the agent with the question
+            answer = self.agent. run(question)
+            print(f"Agent returning answer: {str(answer)[:100]}...")
+            return str(answer)
+        except Exception as e:
+            print(f"Error running agent: {e}")
+            return f"Error processing question: {str(e)}"
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
+    Fetches all questions, runs the BasicAgent on them, submits all answers, and displays the results.
     """
+    space_id = os.getenv("SPACE_ID")
+    if profile:
+        username = f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
+    try:
+        agent = BasicAgent()
     except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(f"Agent code URL: {agent_code}")
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
     try:
+        response = requests.get(questions_url, timeout=30)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[: 500]}")
+        return f"Error decoding server response for questions:  {e}", None
     except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
+        task_id = item. get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "answer": answer})
+            results_log.append({"task_id": task_id, "question": question_text[: 50], "answer": str(answer)[:100]})
+        except Exception as e:
+            print(f"Error processing task {task_id}: {e}")
+            answers_payload.append({"task_id": task_id, "answer": f"Error: {str(e)}"})
+    # 4. Submit answers
+    print(f"Submitting {len(answers_payload)} answers...")
     try:
+        payload = {
+            "username": username,
+            "agent_code": agent_code,
+            "answers": answers_payload
         }
+        response = requests.post(submit_url, json=payload, timeout=30)
+        response.raise_for_status()
+        result = response.json()
+        print(f"Submission successful:  {result}")
+        # Create results dataframe
+        df = pd.DataFrame(results_log)
+        return f"✅ Submission successful! Score: {result. get('score', 'N/A')}", df
+    except requests.exceptions.RequestException as e:
+        print(f"Error submitting answers: {e}")
+        return f"Error submitting answers: {e}", pd.DataFrame(results_log)
     except Exception as e:
+        print(f"Unexpected error during submission: {e}")
+        return f"Unexpected error during submission:  {e}", pd.DataFrame(results_log)
+# --- Gradio Interface ---
+with gr.Blocks() as demo:
+    gr.Markdown("# 🤖 Agent Assignment Submission")
+    gr.Markdown("Click the button below to run your agent on all questions and submit your answers.")
+    with gr.Row():
+        submit_btn = gr.Button("🚀 Run & Submit All", variant="primary")
+    status_output = gr.Textbox(label="Status", lines=3)
+    results_output = gr. Dataframe(label="Results")
+    submit_btn. click(
+        fn=run_and_submit_all,
+        inputs=[],
+        outputs=[status_output, results_output]
+    )
+if __name__ == "__main__":
+    demo.launch()