Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import requests | |
| import pandas as pd | |
| from datetime import datetime | |
| from smolagents import Tool, ToolCallingAgent | |
| from smolagents.models import InferenceClientModel | |
| # Constants | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| # --- Custom Tools --- | |
| class CalculatorTool(Tool): | |
| name = "calculator" | |
| description = "Performs mathematical calculations" | |
| input_schema = { | |
| "expression": { | |
| "type": "string", | |
| "description": "Math expression to evaluate (e.g. '2+2')" | |
| } | |
| } | |
| output_schema = { | |
| "result": { | |
| "type": "string", | |
| "description": "The result of the expression" | |
| } | |
| } | |
| def use(self, expression: str) -> dict: | |
| try: | |
| return {"result": str(eval(expression))} | |
| except Exception as e: | |
| return {"result": f"Error: {e}"} | |
| class TimeTool(Tool): | |
| name = "current_time" | |
| description = "Gets current UTC time" | |
| input_schema = {} | |
| output_schema = { | |
| "time": { | |
| "type": "string", | |
| "description": "Current time in UTC" | |
| } | |
| } | |
| def use(self) -> dict: | |
| return {"time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")} | |
| # --- Agent --- | |
| class LocalAgent: | |
| def __init__(self): | |
| self.tools = [CalculatorTool(), TimeTool()] | |
| self.agent = ToolCallingAgent( | |
| tools=self.tools, | |
| model=InferenceClientModel( | |
| model_id="HuggingFaceH4/zephyr-7b-beta", | |
| api_base="https://api-inference.huggingface.co/models" | |
| ) | |
| ) | |
| def __call__(self, question: str) -> str: | |
| question_lower = question.lower() | |
| if any(op in question_lower for op in ["calculate", "+", "-", "*", "/", "what is"]): | |
| return CalculatorTool().use(question.replace("?", ""))["result"] | |
| if "time" in question_lower: | |
| return TimeTool().use()["time"] | |
| try: | |
| return str(self.agent.run(question)) | |
| except Exception as e: | |
| return f"Error: {e}" | |
| # --- Evaluation Function --- | |
| def run_and_submit_all(profile: gr.OAuthProfile | None): | |
| if not profile: | |
| return "Please login first.", None | |
| space_id = os.getenv("SPACE_ID", "local-test") | |
| api_url = os.getenv("API_URL", DEFAULT_API_URL) | |
| try: | |
| agent = LocalAgent() | |
| questions = requests.get(f"{api_url}/questions", timeout=15).json() | |
| answers = [] | |
| logs = [] | |
| for q in questions: | |
| try: | |
| ans = agent(q["question"]) | |
| answers.append({ | |
| "task_id": q["task_id"], | |
| "submitted_answer": ans | |
| }) | |
| logs.append({ | |
| "Task ID": q["task_id"], | |
| "Question": q["question"], | |
| "Answer": ans | |
| }) | |
| except Exception as e: | |
| logs.append({ | |
| "Task ID": q["task_id"], | |
| "Question": q["question"], | |
| "Answer": f"Error: {e}" | |
| }) | |
| submission = { | |
| "username": profile.username, | |
| "agent_code": f"https://huggingface.co/spaces/{space_id}", | |
| "answers": answers | |
| } | |
| result = requests.post(f"{api_url}/submit", json=submission, timeout=60).json() | |
| return ( | |
| f"✅ Score: {result.get('score', 'N/A')}%\n" | |
| f"Correct: {result.get('correct_count', '?')}/{result.get('total_attempted', '?')}", | |
| pd.DataFrame(logs) | |
| ) | |
| except Exception as e: | |
| return f"Evaluation failed: {e}", pd.DataFrame([]) | |
| # --- Gradio UI --- | |
| with gr.Blocks(title="Agent Evaluation") as app: | |
| gr.Markdown("## 🤖 Agent Evaluation with smolagents") | |
| gr.Markdown("Login, then click 'Run Evaluation' to test your agent.") | |
| gr.LoginButton() # Login button visible | |
| profile = gr.OAuthProfile() # Profile input (not visible) | |
| run_btn = gr.Button("🚀 Run Evaluation") | |
| output = gr.Textbox(label="Evaluation Result") | |
| results_table = gr.DataFrame(label="Answer Log") | |
| run_btn.click( | |
| fn=run_and_submit_all, | |
| inputs=[profile], # ✅ Must include this | |
| outputs=[output, results_table] | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() | |