Snaseem2026 commited on
Commit
83c2752
·
verified ·
1 Parent(s): 4b28196

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -301
app.py CHANGED
@@ -1,322 +1,150 @@
1
- from smolagents import CodeAgent, HfApiModel, tool, DuckDuckGoSearchTool
2
- import requests
3
  import gradio as gr
4
-
5
- # Tools Definition
6
- search_tool = DuckDuckGoSearchTool()
7
-
8
- @tool
9
- def calculator(expression: str) -> str:
10
- """Evaluates mathematical expressions safely.
11
-
12
- Args:
13
- expression: A mathematical expression like '2+2', '15*23', or '100/4'
14
-
15
- Returns:
16
- The calculated result as a string
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
- try:
19
- result = eval(expression, {"__builtins__": {}}, {})
20
- return f"{result}"
21
- except Exception as e:
22
- return f"Error calculating: {str(e)}"
23
-
24
- @tool
25
- def get_question_file(task_id: str) -> str:
26
- """Downloads and reads a file associated with a GAIA question.
27
-
28
- Args:
29
- task_id: The task ID from the question
30
-
31
- Returns:
32
- The file content or error message
33
  """
34
- try:
35
- url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
36
- response = requests.get(url, timeout=30)
37
-
38
- if response.status_code == 200:
39
- content = response.text[:1000]
40
- return f"File content (first 1000 chars):\n{content}"
41
- else:
42
- return f"Could not fetch file. Status code: {response.status_code}"
 
 
 
 
 
 
43
  except Exception as e:
44
- return f"Error fetching file: {str(e)}"
45
-
46
- @tool
47
- def final_answer(answer: str) -> str:
48
- """Returns the final answer to the question.
49
-
50
- IMPORTANT: Use this ONLY ONCE when you have the exact answer.
51
- The answer should be precise, concise, and exactly formatted.
52
-
53
- Args:
54
- answer: The exact answer with no extra text or explanation
55
-
56
- Returns:
57
- The answer
58
- """
59
- return answer.strip()
60
-
61
- def clean_answer(raw_answer: str) -> str:
62
- """Cleans the agent's response to extract the exact answer."""
63
- if not raw_answer:
64
- return ""
65
-
66
- answer = str(raw_answer).strip()
67
-
68
- prefixes_to_remove = [
69
- "the answer is",
70
- "the result is",
71
- "final answer:",
72
- "answer:",
73
- "final_answer:",
74
- "result:",
75
- "output:",
76
- ]
77
-
78
- answer_lower = answer.lower()
79
- for prefix in prefixes_to_remove:
80
- if answer_lower.startswith(prefix):
81
- answer = answer[len(prefix):].strip()
82
- break
83
-
84
- answer = answer.strip('"\'')
85
-
86
- if answer.endswith('.') and not answer[-2].isdigit():
87
- answer = answer[:-1]
88
-
89
- return answer
90
-
91
- model = HfApiModel(
92
- model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
93
- max_tokens=4096,
94
- temperature=0.1,
95
- )
96
-
97
- system_prompt = """You are a precise AI assistant solving GAIA benchmark questions.
98
-
99
- CRITICAL RULES:
100
- 1. Give EXACT answers ONLY - no explanations, no preamble
101
- 2. Format matters: check if answer should be a number, name, date, etc.
102
- 3. For numbers: give just the number (e.g., "42" not "The answer is 42")
103
- 4. For names: use proper capitalization as commonly written
104
- 5. For lists: follow exact format requested (comma-separated, etc.)
105
- 6. Use tools efficiently - web_search for facts, calculator for math
106
- 7. When you have the final answer, use the final_answer tool ONCE
107
- 8. Double-check your answer before using final_answer tool
108
-
109
- EXAMPLES OF CORRECT ANSWERS:
110
- - Question: "What is 15% of 200?" Answer: "30"
111
- - Question: "Who founded Microsoft?" Answer: "Bill Gates"
112
- - Question: "What year was Python released?" Answer: "1991"
113
-
114
- Remember: EXACT MATCH scoring. Close doesn't count!"""
115
-
116
- agent = CodeAgent(
117
- model=model,
118
- tools=[search_tool, calculator, get_question_file, final_answer],
119
- max_steps=12,
120
- verbosity_level=2,
121
- additional_authorized_imports=["requests", "json"],
122
- )
123
-
124
- def process_single_question(question_data, progress_callback=None):
125
- """Process a single GAIA question"""
126
- task_id = question_data['task_id']
127
- question_text = question_data['Question']
128
-
129
- has_file = 'file_name' in question_data and question_data['file_name']
130
-
131
- prompt = f"""{system_prompt}
132
-
133
- Question: {question_text}
134
-
135
- {f"NOTE: This question has an attached file. Use get_question_file('{task_id}') to access it." if has_file else ""}
136
 
137
- Instructions:
138
- 1. Analyze the question carefully
139
- 2. Use tools as needed (web_search, calculator, get_question_file)
140
- 3. When you have the exact answer, use final_answer(your_answer)
141
- 4. Remember: ONLY the answer, nothing else!
142
 
143
- Now solve this question."""
144
-
145
- if progress_callback:
146
- progress_callback(f"Processing: {question_text[:100]}...")
147
-
148
  try:
149
- result = agent.run(prompt)
150
- cleaned = clean_answer(str(result))
151
-
152
- return {
153
- "task_id": task_id,
154
- "submitted_answer": cleaned,
155
- "raw_answer": str(result),
156
- "question": question_text[:100]
157
- }
158
-
 
 
 
 
159
  except Exception as e:
160
- print(f"Error on task {task_id}: {e}")
161
- return {
162
- "task_id": task_id,
163
- "submitted_answer": "Error",
164
- "error": str(e),
165
- "question": question_text[:100]
166
- }
167
-
168
- def run_full_evaluation(username, progress=gr.Progress()):
169
- """Fetches all questions, runs agent on each, and submits to the API"""
170
- if not username or username.strip() == "":
171
- return {"error": "Please provide your Hugging Face username"}
172
-
 
 
 
 
 
 
 
 
 
 
 
 
173
  try:
174
- progress(0, desc="Fetching questions from API...")
175
- response = requests.get(
176
- "https://agents-course-unit4-scoring.hf.space/questions",
177
- timeout=30
178
- )
179
- questions = response.json()
180
-
181
- total_questions = len(questions)
182
- progress(0.1, desc=f"Got {total_questions} questions. Starting evaluation...")
183
-
184
- all_answers = []
185
- results_log = []
186
-
187
- for idx, question in enumerate(questions):
188
- progress((idx + 1) / total_questions,
189
- desc=f"Processing question {idx + 1}/{total_questions}")
190
-
191
- result = process_single_question(question)
192
- all_answers.append({
193
- "task_id": result["task_id"],
194
- "submitted_answer": result["submitted_answer"]
195
- })
196
-
197
- results_log.append(result)
198
- print(f"\nQuestion {idx + 1}: {result['question']}")
199
- print(f"Answer: {result['submitted_answer']}\n")
200
-
201
- progress(0.95, desc="Submitting answers to scoring API...")
202
-
203
- submission_data = {
204
- "username": username.strip(),
205
- "agent_code": "https://huggingface.co/spaces/Snaseem2026/Final_Assignment_Template/tree/main",
206
- "answers": all_answers
207
  }
208
 
209
- submit_response = requests.post(
210
- "https://agents-course-unit4-scoring.hf.space/submit",
211
- json=submission_data,
212
- timeout=60
213
- )
214
-
215
- if submit_response.status_code == 200:
216
- result_data = submit_response.json()
217
- progress(1.0, desc="Submission complete!")
218
-
219
- return {
220
- "status": "Success!",
221
- "score": result_data.get("score", "N/A"),
222
- "total_questions": total_questions,
223
- "submission_details": result_data,
224
- "sample_answers": results_log[:5]
225
- }
226
- else:
227
- return {
228
- "status": "Submission failed",
229
- "error": submit_response.text,
230
- "sample_answers": results_log[:5]
231
- }
232
-
233
- except Exception as e:
234
- return {
235
- "status": "Error",
236
- "error": str(e)
237
- }
238
-
239
- def test_single_question(progress=gr.Progress()):
240
- """Test the agent on one random question"""
241
- try:
242
- progress(0.3, desc="Fetching random question...")
243
- response = requests.get(
244
- "https://agents-course-unit4-scoring.hf.space/random-question",
245
- timeout=30
246
- )
247
- question = response.json()
248
 
249
- progress(0.5, desc="Running agent...")
250
- result = process_single_question(question)
251
 
252
- progress(1.0, desc="Complete!")
 
253
 
254
- return {
255
- "question": question['Question'],
256
- "task_id": result['task_id'],
257
- "agent_answer": result['submitted_answer'],
258
- "raw_output": result.get('raw_answer', 'N/A')
259
- }
260
 
 
 
 
261
  except Exception as e:
262
- return {"error": str(e)}
 
263
 
264
- with gr.Blocks(title="GAIA Agent Evaluator") as demo:
265
- gr.Markdown("""
266
- # GAIA Benchmark Agent - Final Assignment
267
-
268
- This agent solves GAIA Level 1 questions using reasoning, web search, and calculation tools.
269
 
270
- **Target Score:** 30% or higher (6/20 questions) to pass
 
271
 
272
- ### How to use:
273
- 1. **Test Mode**: Click "Test on Random Question" to see how your agent performs
274
- 2. **Full Evaluation**: Enter your HF username and run full evaluation on all 20 questions
275
- 3. **Submit**: Results automatically submitted to the leaderboard
276
- """)
277
 
278
- with gr.Tab("Test Mode"):
279
- gr.Markdown("### Test your agent on a single random question")
280
- test_button = gr.Button("Test on Random Question", variant="primary")
281
- test_output = gr.JSON(label="Test Results")
282
- test_button.click(fn=test_single_question, outputs=test_output)
283
-
284
- with gr.Tab("Full Evaluation"):
285
- gr.Markdown("### Run complete evaluation and submit to leaderboard")
286
-
287
- username_input = gr.Textbox(
288
- label="Your Hugging Face Username",
289
- placeholder="e.g., Snaseem2026",
290
- info="Required for leaderboard submission"
291
- )
292
-
293
- submit_button = gr.Button("Run Full Evaluation & Submit", variant="primary", size="lg")
294
-
295
- gr.Markdown("""
296
- This will take 10-20 minutes to process all 20 questions.
297
- """)
298
-
299
- results_output = gr.JSON(label="Evaluation Results")
300
-
301
- submit_button.click(
302
- fn=run_full_evaluation,
303
- inputs=username_input,
304
- outputs=results_output
305
- )
306
-
307
- with gr.Tab("About"):
308
- gr.Markdown("""
309
- ### Tools Available:
310
- - Web Search (DuckDuckGo): For finding current information
311
- - Calculator: For mathematical calculations
312
- - File Reader: For questions with attachments
313
- - Final Answer: Returns the exact answer
314
-
315
- ### Tips for Better Scores:
316
- 1. Answers must be EXACT MATCH (case-sensitive)
317
- 2. No extra text - just the answer
318
- 3. Format matters (numbers vs words vs dates)
319
- 4. Test on random questions first before full evaluation
320
- """)
321
 
322
- demo.launch()
 
 
1
+ import os
 
2
  import gradio as gr
3
+ import requests
4
+ import inspect
5
+ import pandas as pd
6
+ from smolagents import CodeAgent, HfApiModel
7
+
8
+ # --- Constants ---
9
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring. hf.space"
10
+
11
+ # --- Agent Definition ---
12
+ class BasicAgent:
13
+ def __init__(self):
14
+ print("BasicAgent initialized.")
15
+ # Initialize the model and agent here
16
+ try:
17
+ model = HfApiModel()
18
+ self.agent = CodeAgent(tools=[], model=model, max_steps=4)
19
+ except Exception as e:
20
+ print(f"Error initializing agent: {e}")
21
+ self.agent = None
22
+
23
+ def __call__(self, question: str) -> str:
24
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
25
+
26
+ if self.agent is None:
27
+ return "Agent failed to initialize properly."
28
+
29
+ try:
30
+ # Run the agent with the question
31
+ answer = self.agent. run(question)
32
+ print(f"Agent returning answer: {str(answer)[:100]}...")
33
+ return str(answer)
34
+ except Exception as e:
35
+ print(f"Error running agent: {e}")
36
+ return f"Error processing question: {str(e)}"
37
+
38
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
39
  """
40
+ Fetches all questions, runs the BasicAgent on them, submits all answers, and displays the results.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  """
42
+ space_id = os.getenv("SPACE_ID")
43
+ if profile:
44
+ username = f"{profile.username}"
45
+ print(f"User logged in: {username}")
46
+ else:
47
+ print("User not logged in.")
48
+ return "Please Login to Hugging Face with the button.", None
49
+
50
+ api_url = DEFAULT_API_URL
51
+ questions_url = f"{api_url}/questions"
52
+ submit_url = f"{api_url}/submit"
53
+
54
+ # 1. Instantiate Agent
55
+ try:
56
+ agent = BasicAgent()
57
  except Exception as e:
58
+ print(f"Error instantiating agent: {e}")
59
+ return f"Error initializing agent: {e}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
62
+ print(f"Agent code URL: {agent_code}")
 
 
 
63
 
64
+ # 2. Fetch Questions
65
+ print(f"Fetching questions from: {questions_url}")
 
 
 
66
  try:
67
+ response = requests.get(questions_url, timeout=30)
68
+ response.raise_for_status()
69
+ questions_data = response.json()
70
+ if not questions_data:
71
+ print("Fetched questions list is empty.")
72
+ return "Fetched questions list is empty or invalid format.", None
73
+ print(f"Fetched {len(questions_data)} questions.")
74
+ except requests.exceptions.RequestException as e:
75
+ print(f"Error fetching questions: {e}")
76
+ return f"Error fetching questions: {e}", None
77
+ except requests.exceptions.JSONDecodeError as e:
78
+ print(f"Error decoding JSON response from questions endpoint: {e}")
79
+ print(f"Response text: {response.text[: 500]}")
80
+ return f"Error decoding server response for questions: {e}", None
81
  except Exception as e:
82
+ print(f"An unexpected error occurred fetching questions: {e}")
83
+ return f"An unexpected error occurred fetching questions: {e}", None
84
+
85
+ # 3. Run your Agent
86
+ results_log = []
87
+ answers_payload = []
88
+ print(f"Running agent on {len(questions_data)} questions...")
89
+
90
+ for item in questions_data:
91
+ task_id = item. get("task_id")
92
+ question_text = item.get("question")
93
+ if not task_id or question_text is None:
94
+ print(f"Skipping item with missing task_id or question: {item}")
95
+ continue
96
+
97
+ try:
98
+ answer = agent(question_text)
99
+ answers_payload.append({"task_id": task_id, "answer": answer})
100
+ results_log.append({"task_id": task_id, "question": question_text[: 50], "answer": str(answer)[:100]})
101
+ except Exception as e:
102
+ print(f"Error processing task {task_id}: {e}")
103
+ answers_payload.append({"task_id": task_id, "answer": f"Error: {str(e)}"})
104
+
105
+ # 4. Submit answers
106
+ print(f"Submitting {len(answers_payload)} answers...")
107
  try:
108
+ payload = {
109
+ "username": username,
110
+ "agent_code": agent_code,
111
+ "answers": answers_payload
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  }
113
 
114
+ response = requests.post(submit_url, json=payload, timeout=30)
115
+ response.raise_for_status()
116
+ result = response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ print(f"Submission successful: {result}")
 
119
 
120
+ # Create results dataframe
121
+ df = pd.DataFrame(results_log)
122
 
123
+ return f"✅ Submission successful! Score: {result. get('score', 'N/A')}", df
 
 
 
 
 
124
 
125
+ except requests.exceptions.RequestException as e:
126
+ print(f"Error submitting answers: {e}")
127
+ return f"Error submitting answers: {e}", pd.DataFrame(results_log)
128
  except Exception as e:
129
+ print(f"Unexpected error during submission: {e}")
130
+ return f"Unexpected error during submission: {e}", pd.DataFrame(results_log)
131
 
132
+ # --- Gradio Interface ---
133
+ with gr.Blocks() as demo:
134
+ gr.Markdown("# 🤖 Agent Assignment Submission")
135
+ gr.Markdown("Click the button below to run your agent on all questions and submit your answers.")
 
136
 
137
+ with gr.Row():
138
+ submit_btn = gr.Button("🚀 Run & Submit All", variant="primary")
139
 
140
+ status_output = gr.Textbox(label="Status", lines=3)
141
+ results_output = gr. Dataframe(label="Results")
 
 
 
142
 
143
+ submit_btn. click(
144
+ fn=run_and_submit_all,
145
+ inputs=[],
146
+ outputs=[status_output, results_output]
147
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ if __name__ == "__main__":
150
+ demo.launch()