Anil777K commited on
Commit
90aaafc
·
verified ·
1 Parent(s): d3bcb6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -133
app.py CHANGED
@@ -1,55 +1,65 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
 
6
 
7
- # (Keep Constants as is)
8
- # --- Constants ---
9
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
-
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  from smolagents import (
14
  CodeAgent,
15
  DuckDuckGoSearchTool,
16
  InferenceClientModel
17
  )
18
 
 
 
 
 
 
 
 
 
 
19
  class BasicAgent:
20
 
21
  def __init__(self):
22
 
23
- print("Initializing Smart Agent...")
24
 
25
- # Web search tool
26
  search_tool = DuckDuckGoSearchTool()
27
 
28
- # Free Hugging Face model
29
  model = InferenceClientModel(
30
- model_id="Qwen/Qwen2.5-Coder-32B-Instruct"
31
- )
32
 
33
  # Main Agent
34
  self.agent = CodeAgent(
35
  tools=[search_tool],
36
  model=model,
37
  add_base_tools=True,
38
- max_steps=5
39
  )
40
 
41
  def __call__(self, question: str) -> str:
42
 
43
- print(f"Question: {question}")
44
 
45
  prompt = f"""
46
- Answer the following question.
47
-
48
- IMPORTANT:
49
- - Return ONLY the final answer
50
- - Do NOT explain
51
- - Do NOT write FINAL ANSWER
52
- - Keep the answer short and exact
 
 
 
 
 
 
 
53
 
54
  Question:
55
  {question}
@@ -61,188 +71,200 @@ Question:
61
 
62
  answer = str(response).strip()
63
 
64
- print(f"Agent answer: {answer}")
65
 
66
  return answer
67
 
68
  except Exception as e:
69
 
70
- print(f"Error: {e}")
71
 
72
- return "Error"
73
 
74
- def run_and_submit_all( profile: gr.OAuthProfile | None):
75
- """
76
- Fetches all questions, runs the BasicAgent on them, submits all answers,
77
- and displays the results.
78
- """
79
- # --- Determine HF Space Runtime URL and Repo URL ---
80
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
81
 
82
  if profile:
83
- username= f"{profile.username}"
84
  print(f"User logged in: {username}")
85
  else:
86
- print("User not logged in.")
87
  return "Please Login to Hugging Face with the button.", None
88
 
89
  api_url = DEFAULT_API_URL
 
90
  questions_url = f"{api_url}/questions"
91
  submit_url = f"{api_url}/submit"
92
 
93
- # 1. Instantiate Agent ( modify this part to create your agent)
 
 
94
  try:
95
  agent = BasicAgent()
 
96
  except Exception as e:
97
- print(f"Error instantiating agent: {e}")
98
  return f"Error initializing agent: {e}", None
99
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
 
 
 
100
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
101
  print(agent_code)
102
 
103
- # 2. Fetch Questions
104
- print(f"Fetching questions from: {questions_url}")
 
105
  try:
106
- response = requests.get(questions_url, timeout=15)
 
 
 
 
107
  response.raise_for_status()
 
108
  questions_data = response.json()
109
- if not questions_data:
110
- print("Fetched questions list is empty.")
111
- return "Fetched questions list is empty or invalid format.", None
112
- print(f"Fetched {len(questions_data)} questions.")
113
- except requests.exceptions.RequestException as e:
114
- print(f"Error fetching questions: {e}")
115
- return f"Error fetching questions: {e}", None
116
- except requests.exceptions.JSONDecodeError as e:
117
- print(f"Error decoding JSON response from questions endpoint: {e}")
118
- print(f"Response text: {response.text[:500]}")
119
- return f"Error decoding server response for questions: {e}", None
120
  except Exception as e:
121
- print(f"An unexpected error occurred fetching questions: {e}")
122
- return f"An unexpected error occurred fetching questions: {e}", None
123
 
124
- # 3. Run your Agent
 
 
125
  results_log = []
 
126
  answers_payload = []
127
- print(f"Running agent on {len(questions_data)} questions...")
128
  for item in questions_data:
 
129
  task_id = item.get("task_id")
 
130
  question_text = item.get("question")
 
131
  if not task_id or question_text is None:
132
- print(f"Skipping item with missing task_id or question: {item}")
133
  continue
 
134
  try:
 
135
  submitted_answer = agent(question_text)
136
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
137
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
138
- except Exception as e:
139
- print(f"Error running agent on task {task_id}: {e}")
140
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
141
 
142
- if not answers_payload:
143
- print("Agent did not produce any answers to submit.")
144
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
145
 
146
- # 4. Prepare Submission
147
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
148
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
149
- print(status_update)
 
150
 
151
- # 5. Submit
152
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  try:
154
- response = requests.post(submit_url, json=submission_data, timeout=60)
 
 
 
 
 
 
155
  response.raise_for_status()
 
156
  result_data = response.json()
 
157
  final_status = (
158
  f"Submission Successful!\n"
159
  f"User: {result_data.get('username')}\n"
160
  f"Overall Score: {result_data.get('score', 'N/A')}% "
161
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
 
162
  f"Message: {result_data.get('message', 'No message received.')}"
163
  )
164
- print("Submission successful.")
165
  results_df = pd.DataFrame(results_log)
 
166
  return final_status, results_df
167
- except requests.exceptions.HTTPError as e:
168
- error_detail = f"Server responded with status {e.response.status_code}."
169
- try:
170
- error_json = e.response.json()
171
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
172
- except requests.exceptions.JSONDecodeError:
173
- error_detail += f" Response: {e.response.text[:500]}"
174
- status_message = f"Submission Failed: {error_detail}"
175
- print(status_message)
176
- results_df = pd.DataFrame(results_log)
177
- return status_message, results_df
178
- except requests.exceptions.Timeout:
179
- status_message = "Submission Failed: The request timed out."
180
- print(status_message)
181
- results_df = pd.DataFrame(results_log)
182
- return status_message, results_df
183
- except requests.exceptions.RequestException as e:
184
- status_message = f"Submission Failed: Network error - {e}"
185
- print(status_message)
186
- results_df = pd.DataFrame(results_log)
187
- return status_message, results_df
188
  except Exception as e:
189
- status_message = f"An unexpected error occurred during submission: {e}"
190
- print(status_message)
191
  results_df = pd.DataFrame(results_log)
192
- return status_message, results_df
 
193
 
194
 
195
- # --- Build Gradio Interface using Blocks ---
 
 
196
  with gr.Blocks() as demo:
197
- gr.Markdown("# Basic Agent Evaluation Runner")
198
- gr.Markdown(
199
- """
200
- **Instructions:**
201
 
202
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
203
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
204
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
205
 
206
- ---
207
- **Disclaimers:**
208
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
209
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
210
  """
 
 
211
  )
212
 
213
  gr.LoginButton()
214
 
215
- run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
 
 
 
 
 
216
 
217
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
218
- # Removed max_rows=10 from DataFrame constructor
219
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
220
 
221
  run_button.click(
222
  fn=run_and_submit_all,
223
- outputs=[status_output, results_table]
 
 
 
224
  )
225
 
226
- if __name__ == "__main__":
227
- print("\n" + "-"*30 + " App Starting " + "-"*30)
228
- # Check for SPACE_HOST and SPACE_ID at startup for information
229
- space_host_startup = os.getenv("SPACE_HOST")
230
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
231
-
232
- if space_host_startup:
233
- print(f"✅ SPACE_HOST found: {space_host_startup}")
234
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
235
- else:
236
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
237
 
238
- if space_id_startup: # Print repo URLs if SPACE_ID is found
239
- print(f"✅ SPACE_ID found: {space_id_startup}")
240
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
241
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
242
- else:
243
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
244
 
245
- print("-"*(60 + len(" App Starting ")) + "\n")
246
 
247
- print("Launching Gradio Interface for Basic Agent Evaluation...")
248
- demo.launch(debug=True, share=False)
 
 
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
+ import traceback
6
 
 
 
 
 
 
 
7
  from smolagents import (
8
  CodeAgent,
9
  DuckDuckGoSearchTool,
10
  InferenceClientModel
11
  )
12
 
13
+ # -----------------------------
14
+ # Constants
15
+ # -----------------------------
16
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
+
18
+
19
+ # -----------------------------
20
+ # Smart Agent
21
+ # -----------------------------
22
  class BasicAgent:
23
 
24
  def __init__(self):
25
 
26
+ print("Initializing Smart GAIA Agent...")
27
 
28
+ # Web Search Tool
29
  search_tool = DuckDuckGoSearchTool()
30
 
31
+ # Better model
32
  model = InferenceClientModel(
33
+ model_id="Qwen/Qwen2.5-72B-Instruct"
34
+ )
35
 
36
  # Main Agent
37
  self.agent = CodeAgent(
38
  tools=[search_tool],
39
  model=model,
40
  add_base_tools=True,
41
+ max_steps=10
42
  )
43
 
44
  def __call__(self, question: str) -> str:
45
 
46
+ print(f"\nQuestion:\n{question}\n")
47
 
48
  prompt = f"""
49
+ You are solving a GAIA benchmark task.
50
+
51
+ CRITICAL RULES:
52
+ - Return ONLY the exact final answer
53
+ - No explanation
54
+ - No markdown
55
+ - No reasoning
56
+ - No bullet points
57
+ - No labels
58
+ - No "FINAL ANSWER"
59
+ - No extra spaces
60
+ - If answer is a number, return only the number
61
+ - If answer is text, return only the text
62
+ - If answer requires comma-separated values, follow exactly
63
 
64
  Question:
65
  {question}
 
71
 
72
  answer = str(response).strip()
73
 
74
+ print(f"Agent Answer: {answer}")
75
 
76
  return answer
77
 
78
  except Exception as e:
79
 
80
+ traceback.print_exc()
81
 
82
+ return str(e)
83
 
84
+
85
+ # -----------------------------
86
+ # Main Evaluation Function
87
+ # -----------------------------
88
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
89
+
90
+ space_id = os.getenv("SPACE_ID")
91
 
92
  if profile:
93
+ username = f"{profile.username}"
94
  print(f"User logged in: {username}")
95
  else:
 
96
  return "Please Login to Hugging Face with the button.", None
97
 
98
  api_url = DEFAULT_API_URL
99
+
100
  questions_url = f"{api_url}/questions"
101
  submit_url = f"{api_url}/submit"
102
 
103
+ # -----------------------------
104
+ # Create Agent
105
+ # -----------------------------
106
  try:
107
  agent = BasicAgent()
108
+
109
  except Exception as e:
 
110
  return f"Error initializing agent: {e}", None
111
+
112
+ # -----------------------------
113
+ # Space Code URL
114
+ # -----------------------------
115
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
116
+
117
  print(agent_code)
118
 
119
+ # -----------------------------
120
+ # Fetch Questions
121
+ # -----------------------------
122
  try:
123
+ response = requests.get(
124
+ questions_url,
125
+ timeout=30
126
+ )
127
+
128
  response.raise_for_status()
129
+
130
  questions_data = response.json()
131
+
132
+ print(f"Fetched {len(questions_data)} questions")
133
+
 
 
 
 
 
 
 
 
134
  except Exception as e:
135
+ return f"Error fetching questions: {e}", None
 
136
 
137
+ # -----------------------------
138
+ # Run Agent
139
+ # -----------------------------
140
  results_log = []
141
+
142
  answers_payload = []
143
+
144
  for item in questions_data:
145
+
146
  task_id = item.get("task_id")
147
+
148
  question_text = item.get("question")
149
+
150
  if not task_id or question_text is None:
 
151
  continue
152
+
153
  try:
154
+
155
  submitted_answer = agent(question_text)
 
 
 
 
 
156
 
157
+ answers_payload.append({
158
+ "task_id": task_id,
159
+ "submitted_answer": submitted_answer
160
+ })
161
 
162
+ results_log.append({
163
+ "Task ID": task_id,
164
+ "Question": question_text,
165
+ "Submitted Answer": submitted_answer
166
+ })
167
 
168
+ except Exception as e:
169
+
170
+ results_log.append({
171
+ "Task ID": task_id,
172
+ "Question": question_text,
173
+ "Submitted Answer": f"ERROR: {e}"
174
+ })
175
+
176
+ # -----------------------------
177
+ # Prepare Submission
178
+ # -----------------------------
179
+ submission_data = {
180
+ "username": username.strip(),
181
+ "agent_code": agent_code,
182
+ "answers": answers_payload
183
+ }
184
+
185
+ print(f"Submitting {len(answers_payload)} answers...")
186
+
187
+ # -----------------------------
188
+ # Submit
189
+ # -----------------------------
190
  try:
191
+
192
+ response = requests.post(
193
+ submit_url,
194
+ json=submission_data,
195
+ timeout=120
196
+ )
197
+
198
  response.raise_for_status()
199
+
200
  result_data = response.json()
201
+
202
  final_status = (
203
  f"Submission Successful!\n"
204
  f"User: {result_data.get('username')}\n"
205
  f"Overall Score: {result_data.get('score', 'N/A')}% "
206
+ f"({result_data.get('correct_count', '?')}/"
207
+ f"{result_data.get('total_attempted', '?')} correct)\n"
208
  f"Message: {result_data.get('message', 'No message received.')}"
209
  )
210
+
211
  results_df = pd.DataFrame(results_log)
212
+
213
  return final_status, results_df
214
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  except Exception as e:
216
+
 
217
  results_df = pd.DataFrame(results_log)
218
+
219
+ return f"Submission Failed: {e}", results_df
220
 
221
 
222
+ # -----------------------------
223
+ # Gradio UI
224
+ # -----------------------------
225
  with gr.Blocks() as demo:
 
 
 
 
226
 
227
+ gr.Markdown("# GAIA Smart Agent")
 
 
228
 
229
+ gr.Markdown(
 
 
 
230
  """
231
+ Login with Hugging Face and run your AI Agent on GAIA benchmark questions.
232
+ """
233
  )
234
 
235
  gr.LoginButton()
236
 
237
+ run_button = gr.Button(
238
+ "Run Evaluation & Submit All Answers"
239
+ )
240
+
241
+ status_output = gr.Textbox(
242
+ label="Run Status / Submission Result",
243
+ lines=6
244
+ )
245
 
246
+ results_table = gr.DataFrame(
247
+ label="Questions and Agent Answers",
248
+ wrap=True
249
+ )
250
 
251
  run_button.click(
252
  fn=run_and_submit_all,
253
+ outputs=[
254
+ status_output,
255
+ results_table
256
+ ]
257
  )
258
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
+ # -----------------------------
261
+ # Launch App
262
+ # -----------------------------
263
+ if __name__ == "__main__":
 
 
264
 
265
+ print("\n========== GAIA APP STARTING ==========\n")
266
 
267
+ demo.launch(
268
+ debug=True,
269
+ share=False
270
+ )