File size: 4,180 Bytes
10e9b7d eccf8e4 3c4371f 91d888c 10e9b7d e80aab9 3db6293 e80aab9 91d888c 31243f4 91d888c 31243f4 91d888c 4021bf3 91d888c 3c4371f 7e4a06b 91d888c 7e4a06b 7d65c66 3c4371f 7e4a06b 31243f4 e80aab9 91d888c 31243f4 91d888c 36ed51a 3c4371f 7d65c66 eccf8e4 31243f4 7d65c66 31243f4 7d65c66 91d888c e80aab9 91d888c 7d65c66 31243f4 91d888c 31243f4 7d65c66 31243f4 91d888c 31243f4 91d888c 7d65c66 e80aab9 7d65c66 e80aab9 31243f4 e80aab9 91d888c e80aab9 91d888c 7d65c66 91d888c e80aab9 91d888c e80aab9 91d888c 7e4a06b 91d888c e80aab9 91d888c e80aab9 91d888c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | import os
import gradio as gr
import requests
import pandas as pd
# Import smolagents components
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Robust Agent Definition ---
class BasicAgent:
def __init__(self):
print("Initializing High-Performance CodeAgent...")
# We use Qwen 2.5 72B because it excels at the logic needed for GAIA tasks
self.model = HfApiModel(model_id="Qwen/Qwen2.5-72B-Instruct")
# Tools are the agent's 'hands'
self.tools = [
DuckDuckGoSearchTool(),
VisitWebpageTool()
]
# The CodeAgent allows the LLM to write and run Python to solve math/sorting
self.agent = CodeAgent(
tools=self.tools,
model=self.model,
add_base_tools=True # This gives it built-in tools for things like math
)
def __call__(self, question: str) -> str:
print(f"Agent received question: {question[:100]}...")
try:
# We add a small prompt hint to ensure the answer is concise for the grader
prompt = f"{question}\n\nFinal Answer Requirement: Provide ONLY the specific answer requested (number, name, or list) with no extra text."
result = self.agent.run(prompt)
return str(result)
except Exception as e:
print(f"Agent Error: {e}")
return f"Error: {e}"
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if profile:
username = f"{profile.username}"
else:
return "Please Login to Hugging Face with the button.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
# 1. Instantiate our new Agent
try:
agent = BasicAgent()
except Exception as e:
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
# 2. Fetch Questions
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
except Exception as e:
return f"Error fetching questions: {e}", None
# 3. Run Agent
results_log = []
answers_payload = []
for item in questions_data:
task_id = item.get("task_id")
question_text = item.get("question")
try:
submitted_answer = agent(question_text)
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
except Exception as e:
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
# 4. Prepare & Submit
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% \n"
f"Message: {result_data.get('message')}"
)
return final_status, pd.DataFrame(results_log)
except Exception as e:
return f"Submission Failed: {e}", pd.DataFrame(results_log)
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# 🚀 Professional Agent Evaluation Runner")
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5)
results_table = gr.DataFrame(label="Live Results Trace", wrap=True)
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
if __name__ == "__main__":
demo.launch() |