wahibtim commited on
Commit
ae0d42c
Β·
verified Β·
1 Parent(s): 66b8f74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -93
app.py CHANGED
@@ -3,177 +3,130 @@ import gradio as gr
3
  import requests
4
  import pandas as pd
5
  from smolagents import CodeAgent, HfApiModel, tool
6
- import inspect
7
 
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # ====================== CUSTOM TOOLS ======================
12
- # Add any tools you want here. The more useful ones you add, the better your score.
13
-
14
- # ====================== CUSTOM TOOLS ======================
15
-
16
  @tool
17
  def web_search(query: str) -> str:
18
- """Perform a web search using DuckDuckGo and return the top results.
19
 
20
  Args:
21
  query: The search query to look up on the web.
22
-
23
- Returns:
24
- str: A string containing the title and snippet of the top search results.
25
  """
26
  try:
27
  from duckduckgo_search import DDGS
28
  with DDGS() as ddgs:
29
  results = list(ddgs.text(query, max_results=5))
30
  if not results:
31
- return "No search results found."
32
- formatted = []
33
- for r in results:
34
- formatted.append(f"Title: {r.get('title', 'N/A')}\nSnippet: {r.get('body', r.get('snippet', 'N/A'))}")
35
- return "\n\n".join(formatted)
36
  except Exception as e:
37
- return f"Web search failed: {str(e)}"
38
 
39
 
40
  @tool
41
  def calculate(expression: str) -> str:
42
- """Evaluate a simple mathematical expression.
43
 
44
  Args:
45
- expression: The math expression to calculate (e.g. '2 + 2 * 3', 'sin(3.14)', etc.)
46
-
47
- Returns:
48
- str: The result of the calculation as a string.
49
  """
50
  try:
51
  import math
52
- allowed_names = {"math": math}
53
- return str(eval(expression, {"__builtins__": {}}, allowed_names))
54
  except Exception as e:
55
  return f"Calculation error: {str(e)}"
56
 
57
 
58
- # Add more tools as needed (file handling, image description, code execution, etc.)
59
- # GAIA often requires: search, math, file reading, reasoning over tables/images, etc.
60
-
61
- # ====================== AGENT DEFINITION ======================
62
  class BasicAgent:
63
  def __init__(self):
64
- print("πŸš€ Initializing Smolagents Agent for GAIA benchmark...")
65
-
66
- # Choose your model
67
- # Option 1: Free HF Inference (good enough for many questions)
68
  self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
69
 
70
- # Option 2: Stronger (if you have API key) β†’ uncomment and set env var
71
- # self.model = HfApiModel(
72
- # model_id="gpt-4o-mini", # or "Qwen/Qwen2.5-72B-Instruct"
73
- # provider="openai" if "OPENAI_API_KEY" in os.environ else "hf"
74
- # )
75
-
76
- # Define tools for the agent
77
- tools = [web_search, calculate] # ← Add your custom tools here
78
-
79
  self.agent = CodeAgent(
80
  model=self.model,
81
- tools=tools,
82
- add_base_tools=True, # includes Python interpreter, final_answer, etc.
83
  verbosity_level=1,
84
- max_steps=12, # GAIA questions can need several steps
85
- planning_interval=4 # helps with complex multi-step reasoning
86
  )
87
-
88
  print("βœ… Agent initialized successfully.")
89
 
90
  def __call__(self, question: str) -> str:
91
- print(f"πŸ€– Agent processing question (first 80 chars): {question[:80]}...")
92
-
93
  try:
94
- # Run the agent
95
  result = self.agent.run(question)
96
-
97
- # Smolagents usually returns the final answer nicely
98
  final_answer = str(result).strip()
99
-
100
- print(f"βœ… Agent returned: {final_answer[:200]}{'...' if len(final_answer) > 200 else ''}")
101
  return final_answer
102
-
103
  except Exception as e:
104
  error_msg = f"Agent error: {str(e)}"
105
  print(f"❌ {error_msg}")
106
  return error_msg
107
 
108
 
109
- # ====================== THE REST OF THE CODE (unchanged) ======================
110
- # Keep everything from run_and_submit_all() onward exactly as you had it
111
-
112
  def run_and_submit_all(profile: gr.OAuthProfile | None):
113
- """
114
- Fetches all questions, runs the agent on them, submits all answers,
115
- and displays the results.
116
- """
117
  if not profile:
118
- return "❌ Please Login to Hugging Face with the button.", None
119
 
120
  username = profile.username.strip()
121
- print(f"πŸ‘€ User logged in: {username}")
122
 
123
  api_url = DEFAULT_API_URL
124
  questions_url = f"{api_url}/questions"
125
  submit_url = f"{api_url}/submit"
126
 
127
- # 1. Instantiate Agent
128
  try:
129
  agent = BasicAgent()
130
  except Exception as e:
131
- print(f"Error instantiating agent: {e}")
132
- return f"Error initializing agent: {e}", None
133
 
134
  space_id = os.getenv("SPACE_ID")
135
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE/tree/main"
136
 
137
- # 2. Fetch Questions
138
  try:
139
  response = requests.get(questions_url, timeout=20)
140
  response.raise_for_status()
141
  questions_data = response.json()
142
- if not questions_data:
143
- return "Fetched questions list is empty.", None
144
  print(f"πŸ“₯ Fetched {len(questions_data)} questions.")
145
  except Exception as e:
146
  return f"Error fetching questions: {e}", None
147
 
148
- # 3. Run Agent on all questions
149
  results_log = []
150
  answers_payload = []
151
 
152
- print(f"πŸš€ Running agent on {len(questions_data)} questions... (this may take 5-15 minutes)")
153
-
154
  for item in questions_data:
155
  task_id = item.get("task_id")
156
- question_text = item.get("question")
157
- if not task_id or question_text is None:
158
  continue
159
 
160
  try:
161
- submitted_answer = agent(question_text)
162
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
163
  results_log.append({
164
  "Task ID": task_id,
165
- "Question": question_text[:150] + ("..." if len(question_text) > 150 else ""),
166
- "Submitted Answer": str(submitted_answer)[:300] + ("..." if len(str(submitted_answer)) > 300 else "")
167
  })
168
  except Exception as e:
169
- error_ans = f"AGENT ERROR: {str(e)}"
170
- answers_payload.append({"task_id": task_id, "submitted_answer": error_ans})
171
- results_log.append({"Task ID": task_id, "Question": question_text[:150]+"...", "Submitted Answer": error_ans})
172
 
173
  if not answers_payload:
174
- return "No answers were generated.", pd.DataFrame(results_log)
175
 
176
- # 4. Submit
177
  submission_data = {
178
  "username": username,
179
  "agent_code": agent_code,
@@ -183,17 +136,34 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
183
  try:
184
  response = requests.post(submit_url, json=submission_data, timeout=90)
185
  response.raise_for_status()
186
- result_data = response.json()
187
-
188
- final_status = (
189
- f"βœ… Submission Successful!\n\n"
190
- f"User: {result_data.get('username')}\n"
191
- f"Overall Score: {result_data.get('score', 'N/A')}% "
192
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n\n"
193
- f"Message: {result_data.get('message', 'No message')}"
194
- )
195
 
196
- return final_status, pd.DataFrame(results_log)
197
 
198
  except Exception as e:
199
- return f"❌ Submission Failed: {str(e)}", pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import requests
4
  import pandas as pd
5
  from smolagents import CodeAgent, HfApiModel, tool
 
6
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
+ # ====================== TOOLS ======================
 
 
 
 
11
  @tool
12
  def web_search(query: str) -> str:
13
+ """Perform a web search and return top results.
14
 
15
  Args:
16
  query: The search query to look up on the web.
 
 
 
17
  """
18
  try:
19
  from duckduckgo_search import DDGS
20
  with DDGS() as ddgs:
21
  results = list(ddgs.text(query, max_results=5))
22
  if not results:
23
+ return "No results found."
24
+ return "\n\n".join([f"Title: {r.get('title', '')}\nSnippet: {r.get('body') or r.get('snippet', '')}" for r in results])
 
 
 
25
  except Exception as e:
26
+ return f"Search failed: {str(e)}"
27
 
28
 
29
  @tool
30
  def calculate(expression: str) -> str:
31
+ """Evaluate a mathematical expression safely.
32
 
33
  Args:
34
+ expression: The math expression to calculate (e.g. "2 + 2 * 3")
 
 
 
35
  """
36
  try:
37
  import math
38
+ return str(eval(expression, {"__builtins__": {}}, {"math": math}))
 
39
  except Exception as e:
40
  return f"Calculation error: {str(e)}"
41
 
42
 
43
+ # ====================== BASIC AGENT ======================
 
 
 
44
  class BasicAgent:
45
  def __init__(self):
46
+ print("πŸš€ Initializing Smolagents Agent...")
47
+ # Using a strong free model on HF Inference
 
 
48
  self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
49
 
 
 
 
 
 
 
 
 
 
50
  self.agent = CodeAgent(
51
  model=self.model,
52
+ tools=[web_search, calculate],
53
+ add_base_tools=True, # includes final_answer tool etc.
54
  verbosity_level=1,
55
+ max_steps=12,
56
+ planning_interval=4
57
  )
 
58
  print("βœ… Agent initialized successfully.")
59
 
60
  def __call__(self, question: str) -> str:
61
+ print(f"πŸ€– Processing question: {question[:120]}...")
 
62
  try:
 
63
  result = self.agent.run(question)
 
 
64
  final_answer = str(result).strip()
65
+ print(f"βœ… Answer generated ({len(final_answer)} chars)")
 
66
  return final_answer
 
67
  except Exception as e:
68
  error_msg = f"Agent error: {str(e)}"
69
  print(f"❌ {error_msg}")
70
  return error_msg
71
 
72
 
73
+ # ====================== RUN & SUBMIT ======================
 
 
74
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
75
  if not profile:
76
+ return "❌ Please login with Hugging Face first.", None
77
 
78
  username = profile.username.strip()
79
+ print(f"πŸ‘€ Logged in as: {username}")
80
 
81
  api_url = DEFAULT_API_URL
82
  questions_url = f"{api_url}/questions"
83
  submit_url = f"{api_url}/submit"
84
 
85
+ # Instantiate agent
86
  try:
87
  agent = BasicAgent()
88
  except Exception as e:
89
+ return f"Error creating agent: {e}", None
 
90
 
91
  space_id = os.getenv("SPACE_ID")
92
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE/tree/main"
93
 
94
+ # Fetch questions
95
  try:
96
  response = requests.get(questions_url, timeout=20)
97
  response.raise_for_status()
98
  questions_data = response.json()
 
 
99
  print(f"πŸ“₯ Fetched {len(questions_data)} questions.")
100
  except Exception as e:
101
  return f"Error fetching questions: {e}", None
102
 
103
+ # Run agent on each question
104
  results_log = []
105
  answers_payload = []
106
 
 
 
107
  for item in questions_data:
108
  task_id = item.get("task_id")
109
+ question = item.get("question")
110
+ if not task_id or question is None:
111
  continue
112
 
113
  try:
114
+ answer = agent(question)
115
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
116
  results_log.append({
117
  "Task ID": task_id,
118
+ "Question": question[:150] + "..." if len(question) > 150 else question,
119
+ "Submitted Answer": str(answer)[:250] + "..." if len(str(answer)) > 250 else str(answer)
120
  })
121
  except Exception as e:
122
+ err = f"ERROR: {str(e)}"
123
+ answers_payload.append({"task_id": task_id, "submitted_answer": err})
124
+ results_log.append({"Task ID": task_id, "Question": question[:150]+"...", "Submitted Answer": err})
125
 
126
  if not answers_payload:
127
+ return "No answers generated.", pd.DataFrame(results_log)
128
 
129
+ # Submit
130
  submission_data = {
131
  "username": username,
132
  "agent_code": agent_code,
 
136
  try:
137
  response = requests.post(submit_url, json=submission_data, timeout=90)
138
  response.raise_for_status()
139
+ result = response.json()
140
+
141
+ status = f"βœ… Submission Successful!\n\n" \
142
+ f"Score: {result.get('score', 'N/A')}% " \
143
+ f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n\n" \
144
+ f"Message: {result.get('message', 'No message')}"
 
 
 
145
 
146
+ return status, pd.DataFrame(results_log)
147
 
148
  except Exception as e:
149
+ return f"❌ Submission failed: {str(e)}", pd.DataFrame(results_log)
150
+
151
+
152
+ # ====================== GRADIO UI ======================
153
+ with gr.Blocks() as demo:
154
+ gr.Markdown("# πŸ€– Unit 4 - GAIA Agent Evaluation")
155
+ gr.Markdown("Login β†’ Click the button below. It may take 8–15 minutes to run all questions.")
156
+
157
+ gr.LoginButton()
158
+ run_button = gr.Button("πŸš€ Run Evaluation & Submit All Answers", variant="primary", size="large")
159
+
160
+ status_output = gr.Textbox(label="Status / Score", lines=10, interactive=False)
161
+ results_table = gr.DataFrame(label="Results", wrap=True)
162
+
163
+ run_button.click(
164
+ fn=run_and_submit_all,
165
+ outputs=[status_output, results_table]
166
+ )
167
+
168
+ if __name__ == "__main__":
169
+ demo.launch(debug=True)