wlchee commited on
Commit
fd7ec87
·
verified ·
1 Parent(s): 6b13ec1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -25
app.py CHANGED
@@ -6,7 +6,7 @@ from datetime import datetime
6
  from smolagents import Tool, ToolCallingAgent
7
  from smolagents.models import InferenceClientModel
8
 
9
- # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  # --- Custom Tools ---
@@ -16,13 +16,13 @@ class CalculatorTool(Tool):
16
  input_schema = {
17
  "expression": {
18
  "type": "string",
19
- "description": "Mathematical expression to evaluate (e.g., '2+2')"
20
  }
21
  }
22
  output_schema = {
23
  "result": {
24
  "type": "string",
25
- "description": "The calculated result of the expression"
26
  }
27
  }
28
 
@@ -30,26 +30,26 @@ class CalculatorTool(Tool):
30
  try:
31
  return {"result": str(eval(expression))}
32
  except Exception as e:
33
- return {"result": f"Error: {str(e)}"}
 
34
 
35
  class TimeTool(Tool):
36
  name = "current_time"
37
  description = "Gets current UTC time"
38
- input_schema = {} # No input
39
  output_schema = {
40
  "time": {
41
  "type": "string",
42
- "description": "Current UTC time (YYYY-MM-DD HH:MM:SS)"
43
  }
44
  }
45
 
46
  def use(self) -> dict:
47
  return {"time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")}
48
 
49
- # --- Agent Setup ---
50
  class LocalAgent:
51
  def __init__(self):
52
- print("Initializing agent...")
53
  self.tools = [CalculatorTool(), TimeTool()]
54
  self.agent = ToolCallingAgent(
55
  tools=self.tools,
@@ -61,17 +61,16 @@ class LocalAgent:
61
 
62
  def __call__(self, question: str) -> str:
63
  question_lower = question.lower()
64
- if any(op in question_lower for op in ["calculate", "what is", "+", "-", "*", "/"]):
65
  return CalculatorTool().use(question.replace("?", ""))["result"]
66
  if "time" in question_lower:
67
  return TimeTool().use()["time"]
68
-
69
  try:
70
  return str(self.agent.run(question))
71
  except Exception as e:
72
  return f"Error: {e}"
73
 
74
- # --- Evaluation Logic ---
75
  def run_and_submit_all(profile: gr.OAuthProfile | None):
76
  if not profile:
77
  return "Please login first.", None
@@ -84,22 +83,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
84
  questions = requests.get(f"{api_url}/questions", timeout=15).json()
85
 
86
  answers = []
87
- results = []
88
 
89
  for q in questions:
90
  try:
91
- answer = agent(q["question"])
92
  answers.append({
93
  "task_id": q["task_id"],
94
- "submitted_answer": answer
95
  })
96
- results.append({
97
  "Task ID": q["task_id"],
98
  "Question": q["question"],
99
- "Answer": answer
100
  })
101
  except Exception as e:
102
- results.append({
103
  "Task ID": q["task_id"],
104
  "Question": q["question"],
105
  "Answer": f"Error: {e}"
@@ -116,29 +115,31 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
116
  return (
117
  f"✅ Score: {result.get('score', 'N/A')}%\n"
118
  f"Correct: {result.get('correct_count', '?')}/{result.get('total_attempted', '?')}",
119
- pd.DataFrame(results)
120
  )
121
 
122
  except Exception as e:
123
- return f"Evaluation failed: {str(e)}", pd.DataFrame([])
124
 
125
  # --- Gradio UI ---
126
- with gr.Blocks(title="Agent Evaluation Runner") as app:
127
  gr.Markdown("## 🤖 Agent Evaluation with smolagents")
128
- gr.Markdown("Login, then run the evaluation to test your agent.")
129
 
130
- gr.LoginButton() # OAuth Login UI
 
131
 
132
  run_btn = gr.Button("🚀 Run Evaluation")
133
- output = gr.Textbox(label="Result Summary")
134
- results_table = gr.DataFrame(label="Answers and Logs")
135
 
136
  run_btn.click(
137
  fn=run_and_submit_all,
138
- inputs=[], # No need to pass profile manually
139
  outputs=[output, results_table]
140
  )
141
 
142
  if __name__ == "__main__":
143
  app.launch()
144
 
 
 
6
  from smolagents import Tool, ToolCallingAgent
7
  from smolagents.models import InferenceClientModel
8
 
9
+ # Constants
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  # --- Custom Tools ---
 
16
  input_schema = {
17
  "expression": {
18
  "type": "string",
19
+ "description": "Math expression to evaluate (e.g. '2+2')"
20
  }
21
  }
22
  output_schema = {
23
  "result": {
24
  "type": "string",
25
+ "description": "The result of the expression"
26
  }
27
  }
28
 
 
30
  try:
31
  return {"result": str(eval(expression))}
32
  except Exception as e:
33
+ return {"result": f"Error: {e}"}
34
+
35
 
36
  class TimeTool(Tool):
37
  name = "current_time"
38
  description = "Gets current UTC time"
39
+ input_schema = {}
40
  output_schema = {
41
  "time": {
42
  "type": "string",
43
+ "description": "Current time in UTC"
44
  }
45
  }
46
 
47
  def use(self) -> dict:
48
  return {"time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")}
49
 
50
+ # --- Agent ---
51
  class LocalAgent:
52
  def __init__(self):
 
53
  self.tools = [CalculatorTool(), TimeTool()]
54
  self.agent = ToolCallingAgent(
55
  tools=self.tools,
 
61
 
62
  def __call__(self, question: str) -> str:
63
  question_lower = question.lower()
64
+ if any(op in question_lower for op in ["calculate", "+", "-", "*", "/", "what is"]):
65
  return CalculatorTool().use(question.replace("?", ""))["result"]
66
  if "time" in question_lower:
67
  return TimeTool().use()["time"]
 
68
  try:
69
  return str(self.agent.run(question))
70
  except Exception as e:
71
  return f"Error: {e}"
72
 
73
+ # --- Evaluation Function ---
74
  def run_and_submit_all(profile: gr.OAuthProfile | None):
75
  if not profile:
76
  return "Please login first.", None
 
83
  questions = requests.get(f"{api_url}/questions", timeout=15).json()
84
 
85
  answers = []
86
+ logs = []
87
 
88
  for q in questions:
89
  try:
90
+ ans = agent(q["question"])
91
  answers.append({
92
  "task_id": q["task_id"],
93
+ "submitted_answer": ans
94
  })
95
+ logs.append({
96
  "Task ID": q["task_id"],
97
  "Question": q["question"],
98
+ "Answer": ans
99
  })
100
  except Exception as e:
101
+ logs.append({
102
  "Task ID": q["task_id"],
103
  "Question": q["question"],
104
  "Answer": f"Error: {e}"
 
115
  return (
116
  f"✅ Score: {result.get('score', 'N/A')}%\n"
117
  f"Correct: {result.get('correct_count', '?')}/{result.get('total_attempted', '?')}",
118
+ pd.DataFrame(logs)
119
  )
120
 
121
  except Exception as e:
122
+ return f"Evaluation failed: {e}", pd.DataFrame([])
123
 
124
  # --- Gradio UI ---
125
+ with gr.Blocks(title="Agent Evaluation") as app:
126
  gr.Markdown("## 🤖 Agent Evaluation with smolagents")
127
+ gr.Markdown("Login, then click 'Run Evaluation' to test your agent.")
128
 
129
+ gr.LoginButton() # Login button visible
130
+ profile = gr.OAuthProfile() # Profile input (not visible)
131
 
132
  run_btn = gr.Button("🚀 Run Evaluation")
133
+ output = gr.Textbox(label="Evaluation Result")
134
+ results_table = gr.DataFrame(label="Answer Log")
135
 
136
  run_btn.click(
137
  fn=run_and_submit_all,
138
+ inputs=[profile], # Must include this
139
  outputs=[output, results_table]
140
  )
141
 
142
  if __name__ == "__main__":
143
  app.launch()
144
 
145
+