wlchee's picture
Update app.py
fd7ec87 verified
raw
history blame
4.37 kB
import os
import gradio as gr
import requests
import pandas as pd
from datetime import datetime
from smolagents import Tool, ToolCallingAgent
from smolagents.models import InferenceClientModel
# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Custom Tools ---
class CalculatorTool(Tool):
name = "calculator"
description = "Performs mathematical calculations"
input_schema = {
"expression": {
"type": "string",
"description": "Math expression to evaluate (e.g. '2+2')"
}
}
output_schema = {
"result": {
"type": "string",
"description": "The result of the expression"
}
}
def use(self, expression: str) -> dict:
try:
return {"result": str(eval(expression))}
except Exception as e:
return {"result": f"Error: {e}"}
class TimeTool(Tool):
name = "current_time"
description = "Gets current UTC time"
input_schema = {}
output_schema = {
"time": {
"type": "string",
"description": "Current time in UTC"
}
}
def use(self) -> dict:
return {"time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")}
# --- Agent ---
class LocalAgent:
def __init__(self):
self.tools = [CalculatorTool(), TimeTool()]
self.agent = ToolCallingAgent(
tools=self.tools,
model=InferenceClientModel(
model_id="HuggingFaceH4/zephyr-7b-beta",
api_base="https://api-inference.huggingface.co/models"
)
)
def __call__(self, question: str) -> str:
question_lower = question.lower()
if any(op in question_lower for op in ["calculate", "+", "-", "*", "/", "what is"]):
return CalculatorTool().use(question.replace("?", ""))["result"]
if "time" in question_lower:
return TimeTool().use()["time"]
try:
return str(self.agent.run(question))
except Exception as e:
return f"Error: {e}"
# --- Evaluation Function ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
if not profile:
return "Please login first.", None
space_id = os.getenv("SPACE_ID", "local-test")
api_url = os.getenv("API_URL", DEFAULT_API_URL)
try:
agent = LocalAgent()
questions = requests.get(f"{api_url}/questions", timeout=15).json()
answers = []
logs = []
for q in questions:
try:
ans = agent(q["question"])
answers.append({
"task_id": q["task_id"],
"submitted_answer": ans
})
logs.append({
"Task ID": q["task_id"],
"Question": q["question"],
"Answer": ans
})
except Exception as e:
logs.append({
"Task ID": q["task_id"],
"Question": q["question"],
"Answer": f"Error: {e}"
})
submission = {
"username": profile.username,
"agent_code": f"https://huggingface.co/spaces/{space_id}",
"answers": answers
}
result = requests.post(f"{api_url}/submit", json=submission, timeout=60).json()
return (
f"✅ Score: {result.get('score', 'N/A')}%\n"
f"Correct: {result.get('correct_count', '?')}/{result.get('total_attempted', '?')}",
pd.DataFrame(logs)
)
except Exception as e:
return f"Evaluation failed: {e}", pd.DataFrame([])
# --- Gradio UI ---
with gr.Blocks(title="Agent Evaluation") as app:
gr.Markdown("## 🤖 Agent Evaluation with smolagents")
gr.Markdown("Login, then click 'Run Evaluation' to test your agent.")
gr.LoginButton() # Login button visible
profile = gr.OAuthProfile() # Profile input (not visible)
run_btn = gr.Button("🚀 Run Evaluation")
output = gr.Textbox(label="Evaluation Result")
results_table = gr.DataFrame(label="Answer Log")
run_btn.click(
fn=run_and_submit_all,
inputs=[profile], # ✅ Must include this
outputs=[output, results_table]
)
if __name__ == "__main__":
app.launch()