TommasoBB commited on
Commit
9ed74de
·
verified ·
1 Parent(s): 7272cb4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +353 -404
app.py CHANGED
@@ -1,415 +1,319 @@
1
  import os
2
- import base64
3
- from io import BytesIO
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
- import tools
8
- from smolagents import InferenceClientModel
9
- from typing import TypedDict, List, Dict, Any, Optional
10
- from langgraph.graph import StateGraph, START, END
11
-
12
- # Helper to build a smolagents-compatible message list
13
- def _msg(content: str) -> list:
14
- return [{"role": "user", "content": content}]
15
 
 
 
 
16
 
17
- # --- Constants ---
18
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
 
20
- # --- Models via HF Inference API (correct method for HF Spaces) ---
21
- # InferenceClientModel routes all calls through the HF Serverless Inference API.
22
- # No GPU or local model weights are required in the Space container.
23
- model = InferenceClientModel(
24
- model_id="meta-llama/Llama-3.2-3B-Instruct",
25
- max_tokens=2048,
26
- temperature=0.3,
27
- )
28
-
29
- math_model = InferenceClientModel(
30
- model_id="deepseek-ai/deepseek-math-7b-instruct",
31
- max_tokens=2048,
32
- temperature=0.3,
33
- )
34
 
35
- # Vision model for image / OCR tasks — also served via Inference API
36
- vision_model = InferenceClientModel(
37
- model_id="Qwen/Qwen2.5-VL-7B-Instruct",
38
- max_tokens=2048,
39
- )
 
 
 
 
 
 
 
40
 
 
 
 
 
 
41
 
42
- def _extract_text_from_response(response: Any) -> str:
43
- """Normalize model responses into plain text."""
44
- if response is None:
45
- return ""
46
- if isinstance(response, str):
47
- return response
48
- if isinstance(response, dict):
49
- for key in ("content", "answer", "output", "text", "solution", "extracted_info"):
50
- if key in response and response[key] is not None:
51
- return str(response[key])
52
- return str(response)
53
- content = getattr(response, "content", None)
54
- if content is not None:
55
- return str(content)
56
- return str(response)
57
-
58
-
59
- # --- State ---
60
- class AgentState(TypedDict):
61
- question: str
62
- task_id: Optional[str]
63
- file_name: Optional[str]
64
- is_searching: Optional[bool]
65
- have_file: Optional[bool]
66
- is_math: Optional[bool]
67
- have_image: Optional[bool]
68
- final_answer: Optional[str]
69
- retry_count: Optional[int]
70
- messages: List[Dict[str, Any]]
71
-
72
-
73
- # --- Nodes ---
74
-
75
- def read(state: AgentState) -> dict:
76
- """Agent reads and logs the incoming question."""
77
- question = state["question"]
78
- print(f"Agent is reading the question: {question[:50]}...")
79
- return {}
80
-
81
-
82
- def classify(state: AgentState) -> dict:
83
- """Agent classifies the question to determine which tools to use."""
84
- question = state["question"].lower()
85
-
86
- prompt = f"""
87
- You are an agent that classifies questions to determine which tools to use.
88
- Classify the following question into the categories: 'need to be searched on web/wikipedia', 'has a file in the question', 'is a math problem', 'has an image in the question'.
89
- Question: {question}
90
- Return a JSON object with boolean fields for each category, for example:
91
- {{
92
- "is_searching": true,
93
- "have_file": false,
94
- "is_math": false,
95
- "have_image": false
96
- }}
97
- """
98
- messages = _msg(prompt)
99
- response = model(messages)
100
- raw = _extract_text_from_response(response)
101
-
102
- import json, re
103
- match = re.search(r'\{.*?\}', raw, re.DOTALL)
104
- data = {}
105
- if match:
106
- try:
107
- data = json.loads(match.group())
108
- except json.JSONDecodeError:
109
- pass
110
-
111
- is_searching = bool(data.get("is_searching", False))
112
- have_file = bool(data.get("have_file", False))
113
- is_math = bool(data.get("is_math", False))
114
- have_image = bool(data.get("have_image", False))
115
- print(f"Classification: is_searching={is_searching}, have_file={have_file}, is_math={is_math}, have_image={have_image}")
116
-
117
- new_messages = state.get("messages", []) + [
118
- {"role": "system", "content": "Classify the question to determine which tools to use."},
119
- {"role": "user", "content": question},
120
- {"role": "assistant", "content": f"is_searching={is_searching}, have_file={have_file}, is_math={is_math}, have_image={have_image}"},
121
- ]
122
- return {
123
- "is_searching": is_searching,
124
- "have_file": have_file,
125
- "is_math": is_math,
126
- "have_image": have_image,
127
- "messages": new_messages,
128
- }
129
 
130
 
131
- def handele_search(state: AgentState) -> dict:
132
- """Agent performs a web search if classified as needing search."""
133
- question = state["question"]
134
- print(f"Agent is performing a web search for: {question[:50]}...")
135
- search_results = tools.WebSearchTool()(question)
136
- print(f"Search results: {search_results[:100]}...")
137
- new_messages = state.get("messages", []) + [
138
- {"role": "system", "content": "Perform a web search if classified as needing search."},
139
- {"role": "user", "content": question},
140
- {"role": "assistant", "content": f"Search results: {search_results[:100]}..."},
141
- ]
142
- return {"search_results": search_results, "messages": new_messages}
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
- def handle_image(state: AgentState) -> dict:
146
- """Agent handles an image using a vision model via the HF Inference API.
147
 
148
- Instead of loading a local transformer model (which would be too heavy for
149
- a standard Space), the image is forwarded to a vision-capable
150
- InferenceClientModel (Qwen2.5-VL) through the HF Serverless Inference API.
151
  """
152
- question = state["question"]
153
- task_id = state.get("task_id", "")
154
- file_name = state.get("file_name", "")
155
-
156
- image_reader = tools.ImageReaderTool()
157
- image_data_uri = image_reader(task_id, file_name) if task_id and file_name else ""
158
-
159
- if not image_data_uri or image_data_uri.startswith("Failed"):
160
- print(f"Could not download image for task {task_id}")
161
- new_messages = state.get("messages", []) + [
162
- {"role": "assistant", "content": f"[Could not download image '{file_name}' for analysis.]"},
163
- ]
164
- return {"image_description": "", "transcribed_text": "", "messages": new_messages}
165
-
166
- prompt_text = (
167
- f"Analyze the attached image in detail.\n"
168
- f"Describe its content and transcribe all text visible in it.\n\n"
169
- f"Question: {question}\n\n"
170
- f"Return a JSON object: "
171
- f'{{ "image_description": "...", "transcribed_text": "..." }}'
172
- )
173
 
174
- # Send image + text to the vision model via the HF Inference API.
175
- # InferenceClientModel accepts OpenAI-style multimodal message format.
176
- vision_messages = [
177
- {
 
 
 
178
  "role": "user",
179
  "content": [
180
- {"type": "image_url", "image_url": {"url": image_data_uri}},
181
- {"type": "text", "text": prompt_text},
182
  ],
183
- }
184
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  try:
186
- response = vision_model(vision_messages)
187
- ocr_text = _extract_text_from_response(response)
 
188
  except Exception as e:
189
- ocr_text = f"Vision model error: {e}"
 
190
 
191
- import json, re
192
- match = re.search(r'\{.*?\}', ocr_text, re.DOTALL)
193
- image_description = ocr_text
194
- transcribed_text = ocr_text
195
- if match:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  try:
197
- data = json.loads(match.group())
198
- image_description = data.get("image_description", ocr_text)
199
- transcribed_text = data.get("transcribed_text", ocr_text)
200
- except json.JSONDecodeError:
201
- pass
202
-
203
- print(f"Image description: {image_description[:100]}...")
204
- print(f"Transcribed text: {transcribed_text[:100]}...")
205
- new_messages = state.get("messages", []) + [
206
- {"role": "system", "content": "Analyze and describe the image if classified as having an image."},
207
- {"role": "user", "content": question},
208
- {"role": "assistant", "content": f"Image description: {image_description[:100]}..., Transcribed text: {transcribed_text[:100]}..."},
209
- ]
210
- return {"image_description": image_description, "transcribed_text": transcribed_text, "messages": new_messages}
211
-
212
-
213
- def handle_file(state: AgentState) -> dict:
214
- """Agent processes the file if classified as having a file."""
215
- question = state["question"]
216
- task_id = state.get("task_id", "")
217
- file_name = state.get("file_name", "")
218
-
219
- file_reader = tools.FileReaderTool()
220
- file_content = file_reader(task_id, file_name) if task_id and file_name else ""
221
-
222
- file_context = ""
223
- if file_content:
224
- file_context = f"\n\n--- Attached file: {file_name} ---\n{file_content}\n--- End of file ---"
225
- elif file_name:
226
- file_context = f"\n\n[Note: A file '{file_name}' was referenced but could not be retrieved.]"
227
-
228
- prompt = (
229
- f"You are an agent that can read and extract information from files.\n"
230
- f"Read the attached file content carefully and extract any relevant information "
231
- f"that could help answer the question.\n\n"
232
- f"Question: {question}{file_context}\n\n"
233
- f'Return a JSON object: {{ "extracted_info": "..." }}'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  )
235
- messages = _msg(prompt)
236
- response = model(messages)
237
- extracted_info = _extract_text_from_response(response)
238
- print(f"Extracted file info: {extracted_info[:100]}...")
239
- new_messages = state.get("messages", []) + [
240
- {"role": "system", "content": "Read and extract information from the attached file."},
241
- {"role": "user", "content": question},
242
- {"role": "assistant", "content": f"Extracted info: {extracted_info[:100]}..."},
243
- ]
244
- return {"extracted_info": extracted_info, "messages": new_messages}
245
-
246
-
247
- def handle_math(state: AgentState) -> dict:
248
- """Agent handles a math problem if classified as a math problem."""
249
- question = state["question"]
250
- print(f"Agent is handling a math problem: {question[:50]}...")
251
- messages = _msg(f"Solve the following math problem step by step:\n\n{question}")
252
- response = math_model(messages)
253
- solution = _extract_text_from_response(response)
254
- print(f"Math solution: {solution[:100]}...")
255
- new_messages = state.get("messages", []) + [
256
- {"role": "system", "content": "Handle the question if classified as a math problem."},
257
- {"role": "user", "content": question},
258
- {"role": "assistant", "content": f"Math solution: {solution[:100]}..."},
259
- ]
260
- return {"math_solution": solution, "messages": new_messages}
261
-
262
-
263
- def answer(state: AgentState) -> dict:
264
- """Synthesize a final answer from all gathered context in messages."""
265
- question = state["question"]
266
- messages_history = state.get("messages", [])
267
-
268
- context_parts = [
269
- msg["content"]
270
- for msg in messages_history
271
- if msg.get("role") == "assistant"
272
- ]
273
- context = "\n".join(context_parts) if context_parts else "No additional context gathered."
274
-
275
- prompt = (
276
- "You are a general AI assistant. I will ask you a question. Report your thoughts, "
277
- "and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. "
278
- "YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated "
279
- "list of numbers and/or strings. If you are asked for a number, don't use comma to write "
280
- "your number neither use units such as $ or percent sign unless specified otherwise. "
281
- "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), "
282
- "and write the digits in plain text unless specified otherwise. If you are asked for a comma "
283
- "separated list, apply the above rules depending of whether the element to be put in the list "
284
- "is a number or a string.\n\n"
285
- f"Question: {question}\n\n"
286
- f"Context gathered:\n{context}\n"
287
  )
288
- messages = _msg(prompt)
289
- response = model(messages)
290
- raw_response = _extract_text_from_response(response)
291
 
292
- if "FINAL ANSWER:" in raw_response:
293
- final_answer = raw_response.split("FINAL ANSWER:")[-1].strip()
294
- else:
295
- final_answer = raw_response.strip()
296
-
297
- print(f"Final answer: {final_answer[:100]}...")
298
- return {"final_answer": final_answer}
299
-
300
-
301
- def evaluate(state: AgentState) -> dict:
302
- """LLM evaluates whether the current final_answer is adequate."""
303
- import json, re
304
- question = state["question"]
305
- current_answer = state.get("final_answer", "")
306
- retry_count = state.get("retry_count", 0) or 0
307
-
308
- prompt = (
309
- f"You are a strict evaluator. Given the question and a candidate answer, decide if the "
310
- f"answer is complete, relevant, and not an error message.\n\n"
311
- f"Question: {question}\nCandidate answer: {current_answer}\n\n"
312
- f'Return ONLY a JSON object:\n'
313
- f'{{"is_adequate": true}} if the answer looks correct and complete,\n'
314
- f'{{"is_adequate": false}} if the answer is wrong, incomplete, an error, or says it could not find information.'
315
- )
316
- response = model(_msg(prompt))
317
- raw = _extract_text_from_response(response)
318
- match = re.search(r'\{.*?\}', raw, re.DOTALL)
319
- data = {}
320
- if match:
321
- try:
322
- data = json.loads(match.group())
323
- except json.JSONDecodeError:
324
- pass
325
- is_adequate = bool(data.get("is_adequate", True))
326
- print(f"Evaluation: is_adequate={is_adequate}, retry_count={retry_count}")
327
- return {
328
- "retry_count": retry_count + (0 if is_adequate else 1),
329
- "is_searching": False if not is_adequate else state.get("is_searching"),
330
- "have_file": False if not is_adequate else state.get("have_file"),
331
- "is_math": False if not is_adequate else state.get("is_math"),
332
- "have_image": False if not is_adequate else state.get("have_image"),
333
- }
334
 
335
 
336
- def route_after_evaluate(state: AgentState) -> str:
337
- retry_count = state.get("retry_count", 0) or 0
338
- if retry_count > 0 and retry_count <= 2:
339
- print(f"Answer inadequate retry {retry_count}/2, routing to web search")
340
- return "handle_search"
341
- return END
342
-
343
-
344
- def route_after_classify(state: AgentState) -> str:
345
- if state.get("have_image"):
346
- return "handle_image"
347
- if state.get("have_file"):
348
- return "handle_file"
349
- if state.get("is_math"):
350
- return "handle_math"
351
- if state.get("is_searching"):
352
- return "handle_search"
353
- return "answer"
354
-
355
-
356
- # --- Build LangGraph ---
357
- agent_graph = StateGraph(AgentState)
358
- agent_graph.add_node("read", read)
359
- agent_graph.add_node("classify", classify)
360
- agent_graph.add_node("handle_search", handele_search)
361
- agent_graph.add_node("handle_image", handle_image)
362
- agent_graph.add_node("handle_file", handle_file)
363
- agent_graph.add_node("handle_math", handle_math)
364
- agent_graph.add_node("answer", answer)
365
- agent_graph.add_node("evaluate", evaluate)
366
-
367
- agent_graph.add_edge(START, "read")
368
- agent_graph.add_edge("read", "classify")
369
- agent_graph.add_conditional_edges("classify", route_after_classify)
370
- agent_graph.add_edge("handle_search", "answer")
371
- agent_graph.add_edge("handle_image", "answer")
372
- agent_graph.add_edge("handle_file", "answer")
373
- agent_graph.add_edge("handle_math", "answer")
374
- agent_graph.add_edge("answer", "evaluate")
375
- agent_graph.add_conditional_edges("evaluate", route_after_evaluate)
376
-
377
- compiled_agent = agent_graph.compile()
378
-
379
-
380
- # --- Agent ---
381
- class BasicAgent:
382
- def __init__(self):
383
- self.file_reader = tools.FileReaderTool()
384
- self.image_reader = tools.ImageReaderTool()
385
- self.web_search = tools.WebSearchTool()
386
- print("Agent initialized.")
387
-
388
- def __call__(self, question: str, task_id: str = "", file_name: str = "") -> str:
389
- print(f"Agent received question (first 50 chars): {question[:50]}...")
390
- result_state = compiled_agent.invoke({
391
- "question": question,
392
- "task_id": task_id,
393
- "file_name": file_name,
394
- "messages": [],
395
- "is_searching": False,
396
- "have_file": False,
397
- "is_math": False,
398
- "have_image": False,
399
- "final_answer": "",
400
- "retry_count": 0,
401
- })
402
- final_answer = result_state.get("final_answer", "No answer produced.")
403
- print(f"Agent returning answer: {final_answer[:100]}...")
404
- return final_answer
405
-
406
-
407
- def run_and_submit_all(profile: gr.OAuthProfile | None):
408
- """Fetches all questions, runs the BasicAgent on them, submits all answers."""
409
- space_id = os.getenv("SPACE_ID")
410
 
411
  if profile:
412
- username = f"{profile.username}"
413
  print(f"User logged in: {username}")
414
  else:
415
  print("User not logged in.")
@@ -419,52 +323,80 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
419
  questions_url = f"{api_url}/questions"
420
  submit_url = f"{api_url}/submit"
421
 
 
422
  try:
423
- agent = BasicAgent()
424
  except Exception as e:
425
  print(f"Error instantiating agent: {e}")
426
  return f"Error initializing agent: {e}", None
427
 
 
428
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
429
  print(agent_code)
430
 
 
431
  print(f"Fetching questions from: {questions_url}")
432
  try:
433
  response = requests.get(questions_url, timeout=15)
434
  response.raise_for_status()
435
  questions_data = response.json()
436
  if not questions_data:
437
- return "Fetched questions list is empty or invalid format.", None
 
438
  print(f"Fetched {len(questions_data)} questions.")
439
  except requests.exceptions.RequestException as e:
 
440
  return f"Error fetching questions: {e}", None
 
 
 
 
441
  except Exception as e:
 
442
  return f"An unexpected error occurred fetching questions: {e}", None
443
 
 
444
  results_log = []
445
  answers_payload = []
446
  print(f"Running agent on {len(questions_data)} questions...")
447
  for item in questions_data:
448
  task_id = item.get("task_id")
449
- question_text = item.get("question") or item.get("Question")
450
- if not task_id or question_text is None:
 
 
 
451
  print(f"Skipping item with missing task_id or question: {item}")
452
  continue
453
- file_name = item.get("file_name", "")
 
454
  if file_name:
455
- print(f"Task {task_id} has attached file: {file_name}")
 
 
 
 
 
 
456
  try:
457
- submitted_answer = agent(question_text, task_id=task_id, file_name=file_name)
 
458
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
459
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
460
  except Exception as e:
461
- print(f"Error running agent on task {task_id}: {e}")
462
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
463
 
464
  if not answers_payload:
 
465
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
466
 
 
467
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
468
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
469
  try:
470
  response = requests.post(submit_url, json=submission_data, timeout=60)
@@ -478,24 +410,37 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
478
  f"Message: {result_data.get('message', 'No message received.')}"
479
  )
480
  print("Submission successful.")
481
- return final_status, pd.DataFrame(results_log)
 
482
  except requests.exceptions.HTTPError as e:
483
  error_detail = f"Server responded with status {e.response.status_code}."
484
  try:
485
  error_json = e.response.json()
486
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
487
- except Exception:
488
  error_detail += f" Response: {e.response.text[:500]}"
489
- return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
 
 
 
490
  except requests.exceptions.Timeout:
491
- return "Submission Failed: The request timed out.", pd.DataFrame(results_log)
 
 
 
492
  except requests.exceptions.RequestException as e:
493
- return f"Submission Failed: Network error - {e}", pd.DataFrame(results_log)
 
 
 
494
  except Exception as e:
495
- return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
 
 
 
496
 
497
 
498
- # --- Gradio Interface ---
499
  with gr.Blocks() as demo:
500
  gr.Markdown("# Basic Agent Evaluation Runner")
501
  gr.Markdown(
@@ -509,14 +454,16 @@ with gr.Blocks() as demo:
509
  ---
510
  **Disclaimers:**
511
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
512
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a separate action or even to answer the questions in async.
513
  """
514
  )
515
 
516
  gr.LoginButton()
517
 
518
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
519
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
520
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
521
 
522
  run_button.click(
@@ -525,9 +472,10 @@ with gr.Blocks() as demo:
525
  )
526
 
527
  if __name__ == "__main__":
528
- print("\n" + "-" * 30 + " App Starting " + "-" * 30)
 
529
  space_host_startup = os.getenv("SPACE_HOST")
530
- space_id_startup = os.getenv("SPACE_ID")
531
 
532
  if space_host_startup:
533
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -535,13 +483,14 @@ if __name__ == "__main__":
535
  else:
536
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
537
 
538
- if space_id_startup:
539
  print(f"✅ SPACE_ID found: {space_id_startup}")
540
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
541
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
542
  else:
543
- print("ℹ️ SPACE_ID environment variable not found (running locally?).")
 
 
544
 
545
- print("-" * (60 + len(" App Starting ")) + "\n")
546
  print("Launching Gradio Interface for Basic Agent Evaluation...")
547
- demo.launch(debug=True, share=False)
 
1
  import os
 
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ import math
6
+ import statistics
7
+ import ast
8
+ import pathlib
9
+ import io
10
+ import tempfile
11
+ import base64
12
+ import urllib.request
13
 
14
+ from huggingface_hub import InferenceClient
15
+ from smolagents import CodeAgent, HfApiModel, tool
16
+ from smolagents import DuckDuckGoSearchTool, VisitWebpageTool
17
 
 
 
18
 
19
+ # --- Custom tool: safe arithmetic calculator ---
20
+ @tool
21
+ def calculator(expression: str) -> str:
22
+ """
23
+ Evaluate a safe arithmetic or mathematical expression.
24
+ Use this for numeric computations: arithmetic, trig, sqrt, logarithms, etc.
 
 
 
 
 
 
 
 
25
 
26
+ Args:
27
+ expression: A Python-style math expression, e.g. "sqrt(144) + 2**10" or "mean([3,5,7])"
28
+ """
29
+ _ALLOWED_NODES = {
30
+ ast.Expression, ast.BinOp, ast.UnaryOp, ast.Num, ast.Constant,
31
+ ast.Add, ast.Sub, ast.Mult, ast.Div, ast.Pow, ast.Mod, ast.USub, ast.UAdd,
32
+ ast.FloorDiv, ast.Load, ast.Compare, ast.Eq, ast.NotEq, ast.Lt, ast.LtE, ast.Gt, ast.GtE,
33
+ ast.Call, ast.Name, ast.Tuple, ast.List,
34
+ }
35
+ _math_funcs = {k: getattr(math, k) for k in dir(math) if not k.startswith("_")}
36
+ _math_funcs.update({"mean": statistics.mean, "median": statistics.median,
37
+ "sum": sum, "min": min, "max": max, "round": round, "abs": abs})
38
 
39
+ def _check(n):
40
+ if type(n) not in _ALLOWED_NODES:
41
+ raise ValueError(f"Disallowed expression: {type(n).__name__}")
42
+ for child in ast.iter_child_nodes(n):
43
+ _check(child)
44
 
45
+ try:
46
+ node = ast.parse(expression, mode="eval")
47
+ _check(node)
48
+ val = eval(compile(node, "<calc>", "eval"), {"__builtins__": {}}, _math_funcs)
49
+ return str(val)
50
+ except Exception as e:
51
+ return f"ERROR: calculator failed: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
 
54
+ # --- Multimodal tool: image OCR via FireRed-OCR ---
55
+ @tool
56
+ def ocr_image(image_source: str) -> str:
57
+ """
58
+ Extract all text visible in an image using FireRed-OCR (a VLM-based OCR model).
59
+ Accepts an HTTP/HTTPS image URL or a local file path.
 
 
 
 
 
 
60
 
61
+ Args:
62
+ image_source: HTTP URL or absolute local file path of the image to process.
63
+ """
64
+ try:
65
+ client = InferenceClient("FireRedTeam/FireRed-OCR", token=os.getenv("HF_API_TOKEN"))
66
+ if image_source.startswith("http"):
67
+ image_content = {"type": "image_url", "image_url": {"url": image_source}}
68
+ else:
69
+ with open(image_source, "rb") as f:
70
+ b64 = base64.b64encode(f.read()).decode()
71
+ ext = pathlib.Path(image_source).suffix.lstrip(".") or "png"
72
+ image_content = {
73
+ "type": "image_url",
74
+ "image_url": {"url": f"data:image/{ext};base64,{b64}"},
75
+ }
76
+ messages = [{
77
+ "role": "user",
78
+ "content": [
79
+ image_content,
80
+ {"type": "text", "text": "Extract and return ALL text visible in this image. Output only the extracted text, and a full description of the image."},
81
+ ],
82
+ }]
83
+ resp = client.chat_completion(messages=messages, max_tokens=1024)
84
+ return resp.choices[0].message.content.strip() or "(no text detected)"
85
+ except Exception as e:
86
+ return f"ERROR: ocr_image failed: {e}"
87
 
 
 
88
 
89
+ # --- Multimodal tool: video understanding via LLaVA-Video-7B-Qwen2 ---
90
+ @tool
91
+ def analyze_video(video_url: str, question: str = "Describe this video in detail.") -> str:
92
  """
93
+ Analyze a video and answer a question about it using LLaVA-Video-7B-Qwen2.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ Args:
96
+ video_url: Direct HTTP/HTTPS URL to the video file (mp4, avi, webm, mov, etc.).
97
+ question: The question to ask about the video content.
98
+ """
99
+ try:
100
+ client = InferenceClient("lmms-lab/LLaVA-Video-7B-Qwen2", token=os.getenv("HF_API_TOKEN"))
101
+ messages = [{
102
  "role": "user",
103
  "content": [
104
+ {"type": "video_url", "video_url": {"url": video_url}},
105
+ {"type": "text", "text": question},
106
  ],
107
+ }]
108
+ resp = client.chat_completion(messages=messages, max_tokens=768)
109
+ return resp.choices[0].message.content.strip()
110
+ except Exception as e:
111
+ return f"ERROR: analyze_video failed: {e}"
112
+
113
+
114
+ # --- Audio transcription via Whisper ---
115
+ @tool
116
+ def transcribe_audio(audio_source: str) -> str:
117
+ """
118
+ Transcribe speech in an audio file to text using openai/whisper-large-v3.
119
+ Accepts an HTTP/HTTPS URL or a local file path.
120
+
121
+ Args:
122
+ audio_source: HTTP URL or local path to an audio file (mp3, wav, flac, ogg, m4a).
123
+ """
124
  try:
125
+ client = InferenceClient("openai/whisper-large-v3", token=os.getenv("HF_API_TOKEN"))
126
+ result = client.automatic_speech_recognition(audio_source)
127
+ return result.text if hasattr(result, "text") else str(result)
128
  except Exception as e:
129
+ return f"ERROR: transcribe_audio failed: {e}"
130
+
131
 
132
+ # --- File interpretation: PDF, CSV, Excel, text, image, audio, video ---
133
+ @tool
134
+ def read_task_file(task_id: str, file_name: str, file_path: str = "") -> str:
135
+ """
136
+ Download and parse the file attached to a GAIA task question.
137
+ Automatically handles: PDF (text extraction), CSV/Excel (table as text),
138
+ plain text/JSON/HTML, images (OCR), audio (transcription), video (analysis).
139
+
140
+ Args:
141
+ task_id: The GAIA task ID whose attached file should be read.
142
+ file_name: The original file name including extension (e.g. 'data.csv', 'chart.png').
143
+ file_path: Optional relative file path from the task metadata (e.g. '2023/test/uuid.jpg').
144
+ When provided this is tried first as the download URL.
145
+ """
146
+ BASE = "https://agents-course-unit4-scoring.hf.space"
147
+ # Try /files/{task_id} first (standard GAIA endpoint), then /files/{file_path} as fallback
148
+ candidates = [f"{BASE}/files/{task_id}"]
149
+ if file_path:
150
+ candidates.append(f"{BASE}/files/{file_path}")
151
+ data = None
152
+ last_err = ""
153
+ for url in candidates:
154
  try:
155
+ req = urllib.request.Request(url, headers={"User-Agent": "HF-AgentsCourse/1.0"})
156
+ with urllib.request.urlopen(req, timeout=30) as resp:
157
+ data = resp.read()
158
+ break # success
159
+ except Exception as e:
160
+ last_err = str(e)
161
+ if data is None:
162
+ return f"ERROR: could not download file for task '{task_id}': {last_err}"
163
+ try:
164
+ req = urllib.request.Request(url, headers={"User-Agent": "HF-AgentsCourse/1.0"})
165
+ with urllib.request.urlopen(req, timeout=30) as resp:
166
+ data = resp.read()
167
+ except Exception as e:
168
+ return f"ERROR: could not download file for task '{task_id}': {e}"
169
+
170
+ ext = pathlib.Path(file_name).suffix.lower()
171
+ try:
172
+ if ext == ".pdf":
173
+ import pypdf
174
+ reader = pypdf.PdfReader(io.BytesIO(data))
175
+ pages = [p.extract_text() or "" for p in reader.pages]
176
+ text = "\n\n--- Page Break ---\n\n".join(pages).strip()
177
+ return text[:8000] if text else "(no text extracted from PDF)"
178
+
179
+ elif ext == ".csv":
180
+ df = pd.read_csv(io.BytesIO(data))
181
+ return df.to_string(max_rows=200, index=False)
182
+
183
+ elif ext in (".xlsx", ".xls"):
184
+ df = pd.read_excel(io.BytesIO(data))
185
+ return df.to_string(max_rows=200, index=False)
186
+
187
+ elif ext in (".txt", ".md", ".json", ".xml", ".html", ".htm", ".py", ".tsv"):
188
+ return data.decode("utf-8", errors="replace")[:8000]
189
+
190
+ elif ext in (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"):
191
+ suffix = ext or ".png"
192
+ with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
193
+ tmp.write(data)
194
+ tmp_path = tmp.name
195
+ try:
196
+ return ocr_image(tmp_path)
197
+ finally:
198
+ os.unlink(tmp_path)
199
+
200
+ elif ext in (".mp3", ".wav", ".flac", ".ogg", ".m4a"):
201
+ return transcribe_audio(url)
202
+
203
+ elif ext in (".mp4", ".avi", ".mov", ".mkv", ".webm"):
204
+ return analyze_video(url)
205
+
206
+ else:
207
+ # Try decoding as UTF-8 text, fall back to size info
208
+ try:
209
+ return data.decode("utf-8", errors="replace")[:4000]
210
+ except Exception:
211
+ return f"[binary file, {len(data)} bytes, extension='{ext}']"
212
+ except Exception as e:
213
+ return f"ERROR: read_task_file parsing failed (ext='{ext}'): {e}"
214
+
215
+
216
+ # --- Constants ---
217
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
218
+
219
+
220
+ # ReAct-style instructions appended to each task. CodeAgent implements the
221
+ # Thought → Code → Observation → … → final_answer() ReAct loop natively.
222
+ # The final_answer() value must follow the GAIA submission format below.
223
+ REACT_INSTRUCTIONS = (
224
+ "\n\nYou are a general AI assistant. I will ask you a question. "
225
+ "Report your thoughts, and finish your answer with the following template: "
226
+ "FINAL ANSWER: [YOUR FINAL ANSWER].\n"
227
+ "YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma "
228
+ "separated list of numbers and/or strings.\n"
229
+ "If you are asked for a number, don't use comma to write your number neither use "
230
+ "units such as $ or percent sign unless specified otherwise.\n"
231
+ "If you are asked for a string, don't use articles, neither abbreviations "
232
+ "(e.g. for cities), and write the digits in plain text unless specified otherwise.\n"
233
+ "If you are asked for a comma separated list, apply the above rules depending of "
234
+ "whether the element to be put in the list is a number or a string.\n\n"
235
+ "Additional execution rules:\n"
236
+ "- Reason step-by-step in code comments before calling tools.\n"
237
+ "- Use DuckDuckGoSearchTool / VisitWebpageTool to look up facts.\n"
238
+ "- Use calculator for any arithmetic; never compute in your head.\n"
239
+ "- If the question mentions an attached file, call read_task_file first.\n"
240
+ "- For images call ocr_image, for audio call transcribe_audio, "
241
+ "for video call analyze_video.\n"
242
+ "- When you are confident, call final_answer() with ONLY the bare answer value "
243
+ "(no 'FINAL ANSWER:' prefix — the prefix is for your reasoning trace only)."
244
+ )
245
+
246
+
247
+ def _extract_final_answer(raw: str) -> str:
248
+ """
249
+ Pull the answer out of the agent's output.
250
+ Handles both:
251
+ - CodeAgent returning a plain string from final_answer()
252
+ - A string containing 'FINAL ANSWER: ...' anywhere in it
253
+ """
254
+ if not isinstance(raw, str):
255
+ raw = str(raw)
256
+ # Look for the canonical submission marker
257
+ marker = "FINAL ANSWER:"
258
+ idx = raw.upper().rfind(marker) # rfind → take the last occurrence
259
+ if idx != -1:
260
+ answer = raw[idx + len(marker):].strip()
261
+ # Strip trailing punctuation that may have been added
262
+ answer = answer.rstrip(".")
263
+ return answer
264
+ # No marker found — the CodeAgent returned the bare value directly
265
+ return raw.strip()
266
+
267
+
268
+ def build_agent() -> CodeAgent:
269
+ """
270
+ Build a ReAct CodeAgent (Thought → Code → Observation loop) powered by
271
+ Qwen2.5-72B-Instruct with the following tools:
272
+ - DuckDuckGoSearchTool : web search
273
+ - VisitWebpageTool : fetch and read a web page
274
+ - calculator : safe AST-based arithmetic / math
275
+ - ocr_image : image text extraction (FireRedTeam/FireRed-OCR)
276
+ - analyze_video : video understanding (lmms-lab/LLaVA-Video-7B-Qwen2)
277
+ - transcribe_audio : speech-to-text (openai/whisper-large-v3)
278
+ - read_task_file : download & parse task attachments
279
+ (PDF, CSV, Excel, text, image, audio, video)
280
+ """
281
+ model = HfApiModel(
282
+ model_id="Qwen/Qwen2.5-72B-Instruct",
283
+ token=os.getenv("HF_API_TOKEN"),
284
  )
285
+ return CodeAgent(
286
+ tools=[
287
+ DuckDuckGoSearchTool(max_results=5),
288
+ VisitWebpageTool(),
289
+ calculator,
290
+ ocr_image,
291
+ analyze_video,
292
+ transcribe_audio,
293
+ read_task_file,
294
+ ],
295
+ model=model,
296
+ max_steps=10,
297
+ additional_authorized_imports=[
298
+ "math", "statistics", "json", "re",
299
+ "datetime", "collections", "itertools",
300
+ "pandas", "io", "base64", "pathlib",
301
+ ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  )
 
 
 
303
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
 
306
+
307
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
308
+ """
309
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
310
+ and displays the results.
311
+ """
312
+ # --- Determine HF Space Runtime URL and Repo URL ---
313
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
 
315
  if profile:
316
+ username= f"{profile.username}"
317
  print(f"User logged in: {username}")
318
  else:
319
  print("User not logged in.")
 
323
  questions_url = f"{api_url}/questions"
324
  submit_url = f"{api_url}/submit"
325
 
326
+ # 1. Instantiate Agent
327
  try:
328
+ agent = build_agent()
329
  except Exception as e:
330
  print(f"Error instantiating agent: {e}")
331
  return f"Error initializing agent: {e}", None
332
 
333
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
334
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
335
  print(agent_code)
336
 
337
+ # 2. Fetch Questions
338
  print(f"Fetching questions from: {questions_url}")
339
  try:
340
  response = requests.get(questions_url, timeout=15)
341
  response.raise_for_status()
342
  questions_data = response.json()
343
  if not questions_data:
344
+ print("Fetched questions list is empty.")
345
+ return "Fetched questions list is empty or invalid format.", None
346
  print(f"Fetched {len(questions_data)} questions.")
347
  except requests.exceptions.RequestException as e:
348
+ print(f"Error fetching questions: {e}")
349
  return f"Error fetching questions: {e}", None
350
+ except requests.exceptions.JSONDecodeError as e:
351
+ print(f"Error decoding JSON response from questions endpoint: {e}")
352
+ print(f"Response text: {response.text[:500]}")
353
+ return f"Error decoding server response for questions: {e}", None
354
  except Exception as e:
355
+ print(f"An unexpected error occurred fetching questions: {e}")
356
  return f"An unexpected error occurred fetching questions: {e}", None
357
 
358
+ # 3. Run your Agent
359
  results_log = []
360
  answers_payload = []
361
  print(f"Running agent on {len(questions_data)} questions...")
362
  for item in questions_data:
363
  task_id = item.get("task_id")
364
+ # API returns 'Question' (capital Q); guard against both casings
365
+ question_text = item.get("Question") or item.get("question")
366
+ file_name = item.get("file_name", "")
367
+ file_path = item.get("file_path", "")
368
+ if not task_id or not question_text:
369
  print(f"Skipping item with missing task_id or question: {item}")
370
  continue
371
+ # Build the task input: append file hint and ReAct instructions
372
+ task_input = question_text
373
  if file_name:
374
+ fp_arg = f", file_path='{file_path}'" if file_path else ""
375
+ task_input += (
376
+ f"\n\n[Attached file: '{file_name}'. "
377
+ f"Call read_task_file(task_id='{task_id}', file_name='{file_name}'{fp_arg}) "
378
+ f"to download and read its contents before answering.]"
379
+ )
380
+ task_input += REACT_INSTRUCTIONS
381
  try:
382
+ raw_answer = agent.run(task_input)
383
+ submitted_answer = _extract_final_answer(raw_answer)
384
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
385
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
386
  except Exception as e:
387
+ print(f"Error running agent on task {task_id}: {e}")
388
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
389
 
390
  if not answers_payload:
391
+ print("Agent did not produce any answers to submit.")
392
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
393
 
394
+ # 4. Prepare Submission
395
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
396
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
397
+ print(status_update)
398
+
399
+ # 5. Submit
400
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
401
  try:
402
  response = requests.post(submit_url, json=submission_data, timeout=60)
 
410
  f"Message: {result_data.get('message', 'No message received.')}"
411
  )
412
  print("Submission successful.")
413
+ results_df = pd.DataFrame(results_log)
414
+ return final_status, results_df
415
  except requests.exceptions.HTTPError as e:
416
  error_detail = f"Server responded with status {e.response.status_code}."
417
  try:
418
  error_json = e.response.json()
419
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
420
+ except requests.exceptions.JSONDecodeError:
421
  error_detail += f" Response: {e.response.text[:500]}"
422
+ status_message = f"Submission Failed: {error_detail}"
423
+ print(status_message)
424
+ results_df = pd.DataFrame(results_log)
425
+ return status_message, results_df
426
  except requests.exceptions.Timeout:
427
+ status_message = "Submission Failed: The request timed out."
428
+ print(status_message)
429
+ results_df = pd.DataFrame(results_log)
430
+ return status_message, results_df
431
  except requests.exceptions.RequestException as e:
432
+ status_message = f"Submission Failed: Network error - {e}"
433
+ print(status_message)
434
+ results_df = pd.DataFrame(results_log)
435
+ return status_message, results_df
436
  except Exception as e:
437
+ status_message = f"An unexpected error occurred during submission: {e}"
438
+ print(status_message)
439
+ results_df = pd.DataFrame(results_log)
440
+ return status_message, results_df
441
 
442
 
443
+ # --- Build Gradio Interface using Blocks ---
444
  with gr.Blocks() as demo:
445
  gr.Markdown("# Basic Agent Evaluation Runner")
446
  gr.Markdown(
 
454
  ---
455
  **Disclaimers:**
456
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
457
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
458
  """
459
  )
460
 
461
  gr.LoginButton()
462
 
463
  run_button = gr.Button("Run Evaluation & Submit All Answers")
464
+
465
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
466
+ # Removed max_rows=10 from DataFrame constructor
467
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
468
 
469
  run_button.click(
 
472
  )
473
 
474
  if __name__ == "__main__":
475
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
476
+ # Check for SPACE_HOST and SPACE_ID at startup for information
477
  space_host_startup = os.getenv("SPACE_HOST")
478
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
479
 
480
  if space_host_startup:
481
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
483
  else:
484
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
485
 
486
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
487
  print(f"✅ SPACE_ID found: {space_id_startup}")
488
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
489
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
490
  else:
491
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
492
+
493
+ print("-"*(60 + len(" App Starting ")) + "\n")
494
 
 
495
  print("Launching Gradio Interface for Basic Agent Evaluation...")
496
+ demo.launch(debug=True, share=False)