Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / app.py

wlchee

Update app.py

c3e97df verified 12 months ago

raw

history blame

4.97 kB

	import os
	import gradio as gr
	import requests
	import pandas as pd
	from datetime import datetime
	from smolagents import Tool, ToolCallingAgent
	from smolagents.models import InferenceClientModel

	# --- Constants ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	# --- Custom Tools with Proper Input/Output Specifications ---
	class CalculatorTool(Tool):
	name = "calculator"
	description = "Performs mathematical calculations"
	input_schema = {
	"expression": {
	"type": "string",
	"description": "Mathematical expression to evaluate (e.g., '2+2')"
	}
	}
	output_schema = {
	"result": {
	"type": "string",
	"description": "The calculated result of the expression"
	}
	}

	def use(self, expression: str) -> dict:
	try:
	return {"result": str(eval(expression))}
	except Exception as e:
	return {"result": f"Error: {str(e)}"}

	class TimeTool(Tool):
	name = "current_time"
	description = "Gets current UTC time"
	input_schema = {} # No inputs needed
	output_schema = {
	"time": {
	"type": "string",
	"description": "Current time in UTC (YYYY-MM-DD HH:MM:SS)"
	}
	}

	def use(self) -> dict:
	return {"time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")}

	# --- Enhanced Agent ---
	class LocalAgent:
	def __init__(self):
	print("Initializing agent with smolagents...")
	self.tools = [CalculatorTool(), TimeTool()]
	self.agent = ToolCallingAgent(
	tools=self.tools,
	model=InferenceClientModel(
	model_id="HuggingFaceH4/zephyr-7b-beta",
	api_base="https://api-inference.huggingface.co/models"
	)
	)

	def __call__(self, question: str) -> str:
	print(f"Processing: {question[:100]}...")
	question_lower = question.lower()

	# Direct tool usage for simple queries
	if any(word in question_lower for word in ["calculate", "what is", "how much is", "+", "-", "*", "/"]):
	result = CalculatorTool().use(question.replace("?", ""))
	return result["result"]

	if any(word in question_lower for word in ["time", "current time"]):
	result = TimeTool().use()
	return result["time"]

	# Use full agent for complex questions
	try:
	response = self.agent.run(question)
	return str(response)
	except Exception as e:
	print(f"Agent error: {e}")
	return "I couldn't process this question."

	# --- Evaluation Runner ---
	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	if not profile:
	return "Please login first", None

	space_id = os.getenv("SPACE_ID", "local-test")
	api_url = os.getenv("API_URL", DEFAULT_API_URL)

	try:
	agent = LocalAgent()
	response = requests.get(f"{api_url}/questions", timeout=15)
	response.raise_for_status()
	questions = response.json()

	results = []
	answers = []
	for q in questions:
	try:
	answer = agent(q["question"])
	answers.append({
	"task_id": q["task_id"],
	"submitted_answer": answer
	})
	results.append({
	"Task ID": q["task_id"],
	"Question": q["question"],
	"Answer": answer
	})
	except Exception as e:
	results.append({
	"Task ID": q["task_id"],
	"Question": q["question"],
	"Answer": f"Error: {e}"
	})

	submission = {
	"username": profile.username,
	"agent_code": f"https://huggingface.co/spaces/{space_id}",
	"answers": answers
	}

	response = requests.post(f"{api_url}/submit", json=submission, timeout=60)
	response.raise_for_status()
	result = response.json()

	return (
	f"Success! Score: {result.get('score', 'N/A')}%\n"
	f"Correct: {result.get('correct_count', '?')}/{result.get('total_attempted', '?')}",
	pd.DataFrame(results)
	)

	except Exception as e:
	return f"Evaluation failed: {str(e)}", pd.DataFrame(results if 'results' in locals() else [])

	# --- Gradio Interface ---
	with gr.Blocks(title="Agent Evaluation Runner") as app:
	gr.Markdown("""
	## Advanced Agent Evaluation
	Uses smolagents with proper tool schemas
	""")

	gr.LoginButton()
	run_btn = gr.Button("Run Evaluation")
	output = gr.Textbox(label="Results")
	results_table = gr.DataFrame(label="Question Log")

	run_btn.click(
	fn=run_and_submit_all,
	inputs=[], # <-- This line is the required fix
	outputs=[output, results_table]
	)

	if __name__ == "__main__":
	app.launch()