wlchee's picture
Update app.py
c3e97df verified
raw
history blame
4.97 kB
import os
import gradio as gr
import requests
import pandas as pd
from datetime import datetime
from smolagents import Tool, ToolCallingAgent
from smolagents.models import InferenceClientModel
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Custom Tools with Proper Input/Output Specifications ---
class CalculatorTool(Tool):
name = "calculator"
description = "Performs mathematical calculations"
input_schema = {
"expression": {
"type": "string",
"description": "Mathematical expression to evaluate (e.g., '2+2')"
}
}
output_schema = {
"result": {
"type": "string",
"description": "The calculated result of the expression"
}
}
def use(self, expression: str) -> dict:
try:
return {"result": str(eval(expression))}
except Exception as e:
return {"result": f"Error: {str(e)}"}
class TimeTool(Tool):
name = "current_time"
description = "Gets current UTC time"
input_schema = {} # No inputs needed
output_schema = {
"time": {
"type": "string",
"description": "Current time in UTC (YYYY-MM-DD HH:MM:SS)"
}
}
def use(self) -> dict:
return {"time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")}
# --- Enhanced Agent ---
class LocalAgent:
def __init__(self):
print("Initializing agent with smolagents...")
self.tools = [CalculatorTool(), TimeTool()]
self.agent = ToolCallingAgent(
tools=self.tools,
model=InferenceClientModel(
model_id="HuggingFaceH4/zephyr-7b-beta",
api_base="https://api-inference.huggingface.co/models"
)
)
def __call__(self, question: str) -> str:
print(f"Processing: {question[:100]}...")
question_lower = question.lower()
# Direct tool usage for simple queries
if any(word in question_lower for word in ["calculate", "what is", "how much is", "+", "-", "*", "/"]):
result = CalculatorTool().use(question.replace("?", ""))
return result["result"]
if any(word in question_lower for word in ["time", "current time"]):
result = TimeTool().use()
return result["time"]
# Use full agent for complex questions
try:
response = self.agent.run(question)
return str(response)
except Exception as e:
print(f"Agent error: {e}")
return "I couldn't process this question."
# --- Evaluation Runner ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
if not profile:
return "Please login first", None
space_id = os.getenv("SPACE_ID", "local-test")
api_url = os.getenv("API_URL", DEFAULT_API_URL)
try:
agent = LocalAgent()
response = requests.get(f"{api_url}/questions", timeout=15)
response.raise_for_status()
questions = response.json()
results = []
answers = []
for q in questions:
try:
answer = agent(q["question"])
answers.append({
"task_id": q["task_id"],
"submitted_answer": answer
})
results.append({
"Task ID": q["task_id"],
"Question": q["question"],
"Answer": answer
})
except Exception as e:
results.append({
"Task ID": q["task_id"],
"Question": q["question"],
"Answer": f"Error: {e}"
})
submission = {
"username": profile.username,
"agent_code": f"https://huggingface.co/spaces/{space_id}",
"answers": answers
}
response = requests.post(f"{api_url}/submit", json=submission, timeout=60)
response.raise_for_status()
result = response.json()
return (
f"Success! Score: {result.get('score', 'N/A')}%\n"
f"Correct: {result.get('correct_count', '?')}/{result.get('total_attempted', '?')}",
pd.DataFrame(results)
)
except Exception as e:
return f"Evaluation failed: {str(e)}", pd.DataFrame(results if 'results' in locals() else [])
# --- Gradio Interface ---
with gr.Blocks(title="Agent Evaluation Runner") as app:
gr.Markdown("""
## Advanced Agent Evaluation
Uses smolagents with proper tool schemas
""")
gr.LoginButton()
run_btn = gr.Button("Run Evaluation")
output = gr.Textbox(label="Results")
results_table = gr.DataFrame(label="Question Log")
run_btn.click(
fn=run_and_submit_all,
inputs=[], # <-- This line is the required fix
outputs=[output, results_table]
)
if __name__ == "__main__":
app.launch()