Spaces:
Sleeping
Sleeping
File size: 3,928 Bytes
3f0e339 36b0556 8ff3469 28b77fd e0509e3 8ff3469 d267ada 8098f7a 9dbc28b e0509e3 8098f7a e0509e3 8f41e23 8098f7a e0509e3 8f41e23 e0509e3 28b77fd e0509e3 7f756b6 e0509e3 bf1ac9d 8098f7a 36b0556 e0509e3 28b77fd e0509e3 8098f7a e0509e3 bf1ac9d 8098f7a bf1ac9d e0509e3 9e55565 8ff3469 9dbc28b 8ff3469 28b77fd e0509e3 28b77fd 8ff3469 28b77fd 8ff3469 28b77fd e0509e3 8ff3469 e0509e3 28b77fd 8ff3469 bf1ac9d 8ff3469 e0509e3 8ff3469 e0509e3 8098f7a 8ff3469 e0509e3 8ff3469 e0509e3 8ff3469 9b5b26a 8ff3469 e0509e3 8ff3469 28b77fd e0509e3 8ff3469 e0509e3 9b5b26a e0509e3 8ff3469 e0509e3 8ff3469 e0509e3 8098f7a cb53552 8ff3469 8098f7a 28b77fd 8098f7a 28b77fd e0509e3 8098f7a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel
# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# Basic Agent
class BasicAgent:
def __init__(self):
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
raise ValueError(
"OPENAI_API_KEY not set. Add it in HF → Settings → Secrets."
)
model = OpenAIServerModel(
model_id="gpt-4o-mini",
api_key=OPENAI_API_KEY
)
search_tool = DuckDuckGoSearchTool()
self.agent = CodeAgent(
model=model,
tools=[search_tool],
max_steps=3 # prevents infinite loops
)
def __call__(self, question: str) -> str:
return self.agent.run(question)
# Run + Submit
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if not profile:
return "Please login first.", None
username = profile.username
questions_url = f"{DEFAULT_API_URL}/questions"
submit_url = f"{DEFAULT_API_URL}/submit"
try:
agent = BasicAgent()
except Exception as e:
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
except Exception as e:
return f"Error fetching questions: {e}", None
results_log = []
answers_payload = []
for item in questions_data:
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or not question_text:
continue
try:
answer = agent(question_text)
except Exception as e:
answer = f"AGENT ERROR: {e}"
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({
"Task ID": task_id,
"Question": question_text,
"Submitted Answer": answer
})
if not answers_payload:
return "No answers generated.", pd.DataFrame(results_log)
submission_data = {
"username": username.strip(),
"agent_code": agent_code,
"answers": answers_payload
}
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result.get('username')}\n"
f"Score: {result.get('score', 'N/A')}% "
f"({result.get('correct_count', '?')}/"
f"{result.get('total_attempted', '?')})\n"
f"Message: {result.get('message', '')}"
)
return final_status, pd.DataFrame(results_log)
except Exception as e:
return f"Submission failed: {e}", pd.DataFrame(results_log)
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# 🤖 Basic Agent Evaluation Runner")
gr.Markdown("""
**Instructions**
1. Login with your Hugging Face account.
2. Click **Run Evaluation & Submit All Answers**.
3. Wait for the agent to finish.
**Requirements**
- Uses **OpenAI (gpt-4o-mini)**
- Requires `OPENAI_API_KEY` in HF Space Secrets
- Agent is **step-limited** (max 3 steps)
""")
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
if __name__ == "__main__":
demo.launch(debug=True, share=False) |