TommasoBB commited on
Commit
4906175
·
verified ·
1 Parent(s): a9a1e3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -297
app.py CHANGED
@@ -2,74 +2,41 @@ import os
2
  import base64
3
  from io import BytesIO
4
  import gradio as gr
5
- from gradio_client import file
6
  import requests
7
- import inspect
8
  import pandas as pd
9
  import tools
10
- from smolagents import CodeAgent
11
- # Resolve the correct LLM model class across smolagents versions
12
- try:
13
- from smolagents import InferenceClientModel as _HFModel # smolagents >= 1.0
14
- except ImportError:
15
- try:
16
- from smolagents.models import HfApiModel as _HFModel
17
- except ImportError:
18
- from smolagents import HfApiModel as _HFModel
19
  from typing import TypedDict, List, Dict, Any, Optional
20
  from langgraph.graph import StateGraph, START, END
21
- from langchain_core.messages import HumanMessage # kept for LangGraph compatibility
22
 
23
  # Helper to build a smolagents-compatible message list
24
  def _msg(content: str) -> list:
25
  return [{"role": "user", "content": content}]
26
 
27
 
28
- # (Keep Constants as is)
29
  # --- Constants ---
30
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
31
 
32
- # --- Models ---
33
- def _build_hf_model(model_name: str):
34
- """Build a text model across smolagents versions."""
35
- for kwargs in (
36
- {"model_id": model_name, "max_tokens": 2048, "temperature": 0.3},
37
- {"model_id": model_name, "max_new_tokens": 2048, "temperature": 0.3},
38
- {"repo_id": model_name, "max_tokens": 2048, "temperature": 0.3},
39
- {"repo_id": model_name, "max_new_tokens": 2048, "temperature": 0.3},
40
- ):
41
- try:
42
- return _HFModel(**kwargs)
43
- except TypeError:
44
- continue
45
- raise RuntimeError(f"Cannot instantiate model {model_name} with available smolagents version")
46
-
47
-
48
- # Text/math models via smolagents
49
- model = _build_hf_model("meta-llama/Llama-3.2-3B-Instruct") # General model for classification and final answer synthesis
50
- math_model = _build_hf_model("deepseek-ai/deepseek-math-7b-instruct")
51
-
52
- # FireRed OCR (Transformers) loaded lazily to avoid startup crashes
53
- _fire_red_model = None
54
- _fire_red_processor = None
55
-
56
-
57
- def _load_fire_red_ocr():
58
- """Lazy-load FireRed OCR model and processor using Transformers."""
59
- global _fire_red_model, _fire_red_processor
60
- if _fire_red_model is not None and _fire_red_processor is not None:
61
- return _fire_red_model, _fire_red_processor
62
 
63
- import torch
64
- from transformers import AutoProcessor, Qwen3VLForConditionalGeneration
 
 
 
65
 
66
- _fire_red_model = Qwen3VLForConditionalGeneration.from_pretrained(
67
- "FireRedTeam/FireRed-OCR",
68
- torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
69
- device_map="auto",
70
- )
71
- _fire_red_processor = AutoProcessor.from_pretrained("FireRedTeam/FireRed-OCR")
72
- return _fire_red_model, _fire_red_processor
73
 
74
 
75
  def _extract_text_from_response(response: Any) -> str:
@@ -88,7 +55,8 @@ def _extract_text_from_response(response: Any) -> str:
88
  return str(content)
89
  return str(response)
90
 
91
- #define the state
 
92
  class AgentState(TypedDict):
93
  question: str
94
  task_id: Optional[str]
@@ -97,37 +65,40 @@ class AgentState(TypedDict):
97
  have_file: Optional[bool]
98
  is_math: Optional[bool]
99
  have_image: Optional[bool]
100
- final_answer: Optional[str] # The final answer produced by the agent
101
- retry_count: Optional[int] # Number of retries so far
102
- messages: List[Dict[str, Any]] # Track conversation with LLM for analysis
103
- #define nodes
 
 
104
 
105
- def read(state: AgentState) -> str:
106
  """Agent reads and logs the incoming question."""
107
  question = state["question"]
108
  print(f"Agent is reading the question: {question[:50]}...")
109
  return {}
110
- def classify(state: AgentState) -> str:
 
 
111
  """Agent classifies the question to determine which tools to use."""
112
  question = state["question"].lower()
113
-
114
- #prompt for LLM to classify the question
115
  prompt = f"""
116
- You are an agent that classifies questions to determine which tools to use.
117
- Classify the following question into the categories: 'need to be searched on web/wikipidia', 'has a file in the question', 'is a math problem', 'has an image in the question'.
118
- Question: {question}
119
- Return a JSON object with boolean fields for each category, for example:
120
- {{
121
- "is_searching": true,
122
- "have_file": false,
123
- "is_math": false,
124
- "have_image": false
125
- }}
126
- """
127
  messages = _msg(prompt)
128
  response = model(messages)
129
  raw = _extract_text_from_response(response)
130
- # Parse JSON from the model's response
131
  import json, re
132
  match = re.search(r'\{.*?\}', raw, re.DOTALL)
133
  data = {}
@@ -136,28 +107,28 @@ def classify(state: AgentState) -> str:
136
  data = json.loads(match.group())
137
  except json.JSONDecodeError:
138
  pass
 
139
  is_searching = bool(data.get("is_searching", False))
140
  have_file = bool(data.get("have_file", False))
141
  is_math = bool(data.get("is_math", False))
142
  have_image = bool(data.get("have_image", False))
143
- print(f"Classification result: is_searching={is_searching}, have_file={have_file}, is_math={is_math}, have_image={have_image}")
144
- mew_messages = state.get("messages", []) + [
 
145
  {"role": "system", "content": "Classify the question to determine which tools to use."},
146
  {"role": "user", "content": question},
147
- {"role": "assistant", "content": f"Classification result: is_searching={is_searching}, have_file={have_file}, is_math={is_math}, have_image={have_image}"}
148
  ]
149
-
150
  return {
151
  "is_searching": is_searching,
152
  "have_file": have_file,
153
  "is_math": is_math,
154
  "have_image": have_image,
155
- "messages": mew_messages
156
  }
157
-
158
 
159
-
160
- def handele_search(state: AgentState) -> str:
161
  """Agent performs a web search if classified as needing search."""
162
  question = state["question"]
163
  print(f"Agent is performing a web search for: {question[:50]}...")
@@ -166,137 +137,101 @@ def handele_search(state: AgentState) -> str:
166
  new_messages = state.get("messages", []) + [
167
  {"role": "system", "content": "Perform a web search if classified as needing search."},
168
  {"role": "user", "content": question},
169
- {"role": "assistant", "content": f"Search results: {search_results[:100]}..."}
170
  ]
171
- return {
172
- "search_results": search_results,
173
- "messages": new_messages
174
- }
175
-
176
- def handle_image(state: AgentState) -> str:
177
- """Agent handles an image if classified as having an image.
178
- Downloads the image as base64 and sends it to a vision-capable model
179
- using a multimodal message format."""
 
180
  question = state["question"]
181
  task_id = state.get("task_id", "")
182
  file_name = state.get("file_name", "")
183
 
184
- # Use ImageReaderTool to download the image as base64
185
  image_reader = tools.ImageReaderTool()
186
  image_data_uri = image_reader(task_id, file_name) if task_id and file_name else ""
187
 
188
  if not image_data_uri or image_data_uri.startswith("Failed"):
189
  print(f"Could not download image for task {task_id}")
190
  new_messages = state.get("messages", []) + [
191
- {"role": "assistant", "content": f"[Could not download image '{file_name}' for analysis.]"}
192
  ]
193
- return {
194
- "image_description": "",
195
- "transcribed_text": "",
196
- "messages": new_messages
197
- }
198
-
199
- # Build multimodal message with image for a vision-capable model
200
- prompt_text = f"""Analyze the attached image in detail.
201
- Describe the content of the image and transcribe all text visible in it.
202
-
203
- Question: {question}
204
-
205
- Return a JSON object with the following fields:
206
- {{
207
- "image_description": "A detailed description of the image content.",
208
- "transcribed_text": "All text visible in the image transcribed here."
209
- }}"""
210
 
211
- # Run OCR through FireRed-OCR using Transformers
 
 
 
 
 
 
 
 
 
 
212
  try:
213
- # Decode base64 data URI into bytes/PIL image
214
- _, b64_data = image_data_uri.split(",", 1)
215
- image_bytes = base64.b64decode(b64_data)
216
- from PIL import Image
217
- image = Image.open(BytesIO(image_bytes)).convert("RGB")
218
-
219
- ocr_model, ocr_processor = _load_fire_red_ocr()
220
-
221
- messages = [
222
- {
223
- "role": "user",
224
- "content": [
225
- {"type": "image", "image": image},
226
- {"type": "text", "text": prompt_text},
227
- ],
228
- }
229
- ]
230
-
231
- text = ocr_processor.apply_chat_template(
232
- messages,
233
- tokenize=False,
234
- add_generation_prompt=True,
235
- )
236
- inputs = ocr_processor(
237
- text=[text],
238
- images=[image],
239
- return_tensors="pt",
240
- padding=True,
241
- )
242
- inputs = {k: v.to(ocr_model.device) for k, v in inputs.items()}
243
-
244
- generated_ids = ocr_model.generate(**inputs, max_new_tokens=2048)
245
- prompt_len = inputs["input_ids"].shape[1]
246
- generated_trimmed = generated_ids[:, prompt_len:]
247
- output_text = ocr_processor.batch_decode(
248
- generated_trimmed,
249
- skip_special_tokens=True,
250
- clean_up_tokenization_spaces=False,
251
- )
252
- ocr_text = output_text[0].strip() if output_text else ""
253
  except Exception as e:
254
- ocr_text = f"OCR error: {e}"
255
 
 
 
256
  image_description = ocr_text
257
  transcribed_text = ocr_text
 
 
 
 
 
 
 
 
258
  print(f"Image description: {image_description[:100]}...")
259
  print(f"Transcribed text: {transcribed_text[:100]}...")
260
  new_messages = state.get("messages", []) + [
261
  {"role": "system", "content": "Analyze and describe the image if classified as having an image."},
262
  {"role": "user", "content": question},
263
- {"role": "assistant", "content": f"Image description: {image_description[:100]}..., Transcribed text: {transcribed_text[:100]}..."}
264
  ]
265
- return {
266
- "image_description": image_description,
267
- "transcribed_text": transcribed_text,
268
- "messages": new_messages
269
- }
270
-
271
-
272
-
273
- def handle_file(state: AgentState) -> str:
274
- """Agent processes the file if classified as having a file.
275
- Uses the FileReaderTool to download and read the file from the API."""
276
  question = state["question"]
277
  task_id = state.get("task_id", "")
278
  file_name = state.get("file_name", "")
279
 
280
- # Use the file_reader tool to fetch the file content
281
  file_reader = tools.FileReaderTool()
282
  file_content = file_reader(task_id, file_name) if task_id and file_name else ""
283
 
284
- # Build prompt with the retrieved file content
285
  file_context = ""
286
  if file_content:
287
  file_context = f"\n\n--- Attached file: {file_name} ---\n{file_content}\n--- End of file ---"
288
  elif file_name:
289
  file_context = f"\n\n[Note: A file '{file_name}' was referenced but could not be retrieved.]"
290
 
291
- prompt = f"""You are an agent that can read and extract information from files.
292
- Below is the content of the attached file retrieved from the API. Read it carefully and extract any relevant information that could help answer the question.
293
-
294
- Question: {question}{file_context}
295
-
296
- Return a JSON object with the following field:
297
- {{
298
- "extracted_info": "The relevant extracted information from the file."
299
- }}"""
300
  messages = _msg(prompt)
301
  response = model(messages)
302
  extracted_info = _extract_text_from_response(response)
@@ -304,14 +239,12 @@ Return a JSON object with the following field:
304
  new_messages = state.get("messages", []) + [
305
  {"role": "system", "content": "Read and extract information from the attached file."},
306
  {"role": "user", "content": question},
307
- {"role": "assistant", "content": f"Extracted info: {extracted_info[:100]}..."}
308
  ]
309
- return {
310
- "extracted_info": extracted_info,
311
- "messages": new_messages
312
- }
313
 
314
- def handle_math(state: AgentState) -> str:
315
  """Agent handles a math problem if classified as a math problem."""
316
  question = state["question"]
317
  print(f"Agent is handling a math problem: {question[:50]}...")
@@ -322,12 +255,9 @@ def handle_math(state: AgentState) -> str:
322
  new_messages = state.get("messages", []) + [
323
  {"role": "system", "content": "Handle the question if classified as a math problem."},
324
  {"role": "user", "content": question},
325
- {"role": "assistant", "content": f"Math solution: {solution[:100]}..."}
326
  ]
327
- return {
328
- "math_solution": solution,
329
- "messages": new_messages
330
- }
331
 
332
 
333
  def answer(state: AgentState) -> dict:
@@ -335,26 +265,30 @@ def answer(state: AgentState) -> dict:
335
  question = state["question"]
336
  messages_history = state.get("messages", [])
337
 
338
- # Build context summary from all assistant messages
339
- context_parts = []
340
- for msg in messages_history:
341
- if msg.get("role") == "assistant":
342
- context_parts.append(msg["content"])
343
  context = "\n".join(context_parts) if context_parts else "No additional context gathered."
344
 
345
- prompt = f"""You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
346
-
347
- Question: {question}
348
-
349
- Context gathered:
350
- {context}
351
- """
 
 
 
 
 
 
352
  messages = _msg(prompt)
353
- # Use the general model for final answer synthesis
354
  response = model(messages)
355
  raw_response = _extract_text_from_response(response)
356
 
357
- # Extract the final answer after "FINAL ANSWER:" if present
358
  if "FINAL ANSWER:" in raw_response:
359
  final_answer = raw_response.split("FINAL ANSWER:")[-1].strip()
360
  else:
@@ -365,22 +299,20 @@ Context gathered:
365
 
366
 
367
  def evaluate(state: AgentState) -> dict:
368
- """LLM evaluates whether the current final_answer is adequate.
369
- If not, increments retry_count so the graph can loop back."""
370
  import json, re
371
  question = state["question"]
372
  current_answer = state.get("final_answer", "")
373
  retry_count = state.get("retry_count", 0) or 0
374
 
375
- prompt = f"""You are a strict evaluator. Given the question and a candidate answer, decide if the answer is complete, relevant, and not an error message.
376
-
377
- Question: {question}
378
- Candidate answer: {current_answer}
379
-
380
- Return ONLY a JSON object:
381
- {{"is_adequate": true}} if the answer looks correct and complete,
382
- {{"is_adequate": false}} if the answer is wrong, incomplete, an error, or just says it could not find information."""
383
-
384
  response = model(_msg(prompt))
385
  raw = _extract_text_from_response(response)
386
  match = re.search(r'\{.*?\}', raw, re.DOTALL)
@@ -390,7 +322,7 @@ Return ONLY a JSON object:
390
  data = json.loads(match.group())
391
  except json.JSONDecodeError:
392
  pass
393
- is_adequate = bool(data.get("is_adequate", True)) # default: accept
394
  print(f"Evaluation: is_adequate={is_adequate}, retry_count={retry_count}")
395
  return {
396
  "retry_count": retry_count + (0 if is_adequate else 1),
@@ -402,7 +334,6 @@ Return ONLY a JSON object:
402
 
403
 
404
  def route_after_evaluate(state: AgentState) -> str:
405
- """If answer was inadequate and retries remain, search web for more context."""
406
  retry_count = state.get("retry_count", 0) or 0
407
  if retry_count > 0 and retry_count <= 2:
408
  print(f"Answer inadequate — retry {retry_count}/2, routing to web search")
@@ -411,7 +342,6 @@ def route_after_evaluate(state: AgentState) -> str:
411
 
412
 
413
  def route_after_classify(state: AgentState) -> str:
414
- """Routing function: decide which handler to invoke based on classification."""
415
  if state.get("have_image"):
416
  return "handle_image"
417
  if state.get("have_file"):
@@ -420,11 +350,10 @@ def route_after_classify(state: AgentState) -> str:
420
  return "handle_math"
421
  if state.get("is_searching"):
422
  return "handle_search"
423
- # Default: go straight to answer
424
  return "answer"
425
 
426
 
427
- #create the graph
428
  agent_graph = StateGraph(AgentState)
429
  agent_graph.add_node("read", read)
430
  agent_graph.add_node("classify", classify)
@@ -437,39 +366,27 @@ agent_graph.add_node("evaluate", evaluate)
437
 
438
  agent_graph.add_edge(START, "read")
439
  agent_graph.add_edge("read", "classify")
440
- agent_graph.add_conditional_edges(
441
- "classify",
442
- route_after_classify,
443
- )
444
-
445
  agent_graph.add_edge("handle_search", "answer")
446
  agent_graph.add_edge("handle_image", "answer")
447
  agent_graph.add_edge("handle_file", "answer")
448
  agent_graph.add_edge("handle_math", "answer")
449
  agent_graph.add_edge("answer", "evaluate")
450
- agent_graph.add_conditional_edges(
451
- "evaluate",
452
- route_after_evaluate,
453
- )
454
 
455
  compiled_agent = agent_graph.compile()
456
 
457
 
458
- # --- Basic Agent Definition ---
459
-
460
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
461
  class BasicAgent:
462
  def __init__(self):
463
  self.file_reader = tools.FileReaderTool()
464
  self.image_reader = tools.ImageReaderTool()
465
  self.web_search = tools.WebSearchTool()
466
- self.tools = [self.file_reader, self.image_reader, self.web_search]
467
  print("Agent initialized.")
468
 
469
  def __call__(self, question: str, task_id: str = "", file_name: str = "") -> str:
470
  print(f"Agent received question (first 50 chars): {question[:50]}...")
471
-
472
- # Run the LangGraph workflow
473
  result_state = compiled_agent.invoke({
474
  "question": question,
475
  "task_id": task_id,
@@ -480,24 +397,19 @@ class BasicAgent:
480
  "is_math": False,
481
  "have_image": False,
482
  "final_answer": "",
483
- "retry_count": 0
484
  })
485
-
486
- # Extract the final answer from the state
487
  final_answer = result_state.get("final_answer", "No answer produced.")
488
  print(f"Agent returning answer: {final_answer[:100]}...")
489
  return final_answer
490
 
491
- def run_and_submit_all( profile: gr.OAuthProfile | None):
492
- """
493
- Fetches all questions, runs the BasicAgent on them, submits all answers,
494
- and displays the results.
495
- """
496
- # --- Determine HF Space Runtime URL and Repo URL ---
497
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
498
 
499
  if profile:
500
- username= f"{profile.username}"
501
  print(f"User logged in: {username}")
502
  else:
503
  print("User not logged in.")
@@ -507,72 +419,52 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
507
  questions_url = f"{api_url}/questions"
508
  submit_url = f"{api_url}/submit"
509
 
510
- # 1. Instantiate Agent ( modify this part to create your agent)
511
  try:
512
  agent = BasicAgent()
513
  except Exception as e:
514
  print(f"Error instantiating agent: {e}")
515
  return f"Error initializing agent: {e}", None
516
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
517
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
518
  print(agent_code)
519
 
520
- # 2. Fetch Questions
521
  print(f"Fetching questions from: {questions_url}")
522
  try:
523
  response = requests.get(questions_url, timeout=15)
524
  response.raise_for_status()
525
  questions_data = response.json()
526
  if not questions_data:
527
- print("Fetched questions list is empty.")
528
- return "Fetched questions list is empty or invalid format.", None
529
  print(f"Fetched {len(questions_data)} questions.")
530
  except requests.exceptions.RequestException as e:
531
- print(f"Error fetching questions: {e}")
532
  return f"Error fetching questions: {e}", None
533
- except requests.exceptions.JSONDecodeError as e:
534
- print(f"Error decoding JSON response from questions endpoint: {e}")
535
- print(f"Response text: {response.text[:500]}")
536
- return f"Error decoding server response for questions: {e}", None
537
  except Exception as e:
538
- print(f"An unexpected error occurred fetching questions: {e}")
539
  return f"An unexpected error occurred fetching questions: {e}", None
540
 
541
- # 3. Run your Agent
542
  results_log = []
543
  answers_payload = []
544
  print(f"Running agent on {len(questions_data)} questions...")
545
  for item in questions_data:
546
  task_id = item.get("task_id")
547
- # Handle both "Question" (dataset format) and "question" (API format)
548
  question_text = item.get("question") or item.get("Question")
549
  if not task_id or question_text is None:
550
  print(f"Skipping item with missing task_id or question: {item}")
551
  continue
552
-
553
- # Check for attached file
554
  file_name = item.get("file_name", "")
555
  if file_name:
556
  print(f"Task {task_id} has attached file: {file_name}")
557
-
558
  try:
559
  submitted_answer = agent(question_text, task_id=task_id, file_name=file_name)
560
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
561
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
562
  except Exception as e:
563
- print(f"Error running agent on task {task_id}: {e}")
564
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
565
 
566
  if not answers_payload:
567
- print("Agent did not produce any answers to submit.")
568
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
569
 
570
- # 4. Prepare Submission
571
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
572
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
573
- print(status_update)
574
-
575
- # 5. Submit
576
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
577
  try:
578
  response = requests.post(submit_url, json=submission_data, timeout=60)
@@ -586,37 +478,24 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
586
  f"Message: {result_data.get('message', 'No message received.')}"
587
  )
588
  print("Submission successful.")
589
- results_df = pd.DataFrame(results_log)
590
- return final_status, results_df
591
  except requests.exceptions.HTTPError as e:
592
  error_detail = f"Server responded with status {e.response.status_code}."
593
  try:
594
  error_json = e.response.json()
595
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
596
- except requests.exceptions.JSONDecodeError:
597
  error_detail += f" Response: {e.response.text[:500]}"
598
- status_message = f"Submission Failed: {error_detail}"
599
- print(status_message)
600
- results_df = pd.DataFrame(results_log)
601
- return status_message, results_df
602
  except requests.exceptions.Timeout:
603
- status_message = "Submission Failed: The request timed out."
604
- print(status_message)
605
- results_df = pd.DataFrame(results_log)
606
- return status_message, results_df
607
  except requests.exceptions.RequestException as e:
608
- status_message = f"Submission Failed: Network error - {e}"
609
- print(status_message)
610
- results_df = pd.DataFrame(results_log)
611
- return status_message, results_df
612
  except Exception as e:
613
- status_message = f"An unexpected error occurred during submission: {e}"
614
- print(status_message)
615
- results_df = pd.DataFrame(results_log)
616
- return status_message, results_df
617
 
618
 
619
- # --- Build Gradio Interface using Blocks ---
620
  with gr.Blocks() as demo:
621
  gr.Markdown("# Basic Agent Evaluation Runner")
622
  gr.Markdown(
@@ -630,16 +509,14 @@ with gr.Blocks() as demo:
630
  ---
631
  **Disclaimers:**
632
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
633
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
634
  """
635
  )
636
 
637
  gr.LoginButton()
638
 
639
  run_button = gr.Button("Run Evaluation & Submit All Answers")
640
-
641
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
642
- # Removed max_rows=10 from DataFrame constructor
643
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
644
 
645
  run_button.click(
@@ -648,10 +525,9 @@ with gr.Blocks() as demo:
648
  )
649
 
650
  if __name__ == "__main__":
651
- print("\n" + "-"*30 + " App Starting " + "-"*30)
652
- # Check for SPACE_HOST and SPACE_ID at startup for information
653
  space_host_startup = os.getenv("SPACE_HOST")
654
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
655
 
656
  if space_host_startup:
657
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -659,14 +535,13 @@ if __name__ == "__main__":
659
  else:
660
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
661
 
662
- if space_id_startup: # Print repo URLs if SPACE_ID is found
663
  print(f"✅ SPACE_ID found: {space_id_startup}")
664
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
665
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
666
  else:
667
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
668
-
669
- print("-"*(60 + len(" App Starting ")) + "\n")
670
 
 
671
  print("Launching Gradio Interface for Basic Agent Evaluation...")
672
- demo.launch(debug=True, share=False)
 
2
  import base64
3
  from io import BytesIO
4
  import gradio as gr
 
5
  import requests
 
6
  import pandas as pd
7
  import tools
8
+ from smolagents import InferenceClientModel
 
 
 
 
 
 
 
 
9
  from typing import TypedDict, List, Dict, Any, Optional
10
  from langgraph.graph import StateGraph, START, END
 
11
 
12
  # Helper to build a smolagents-compatible message list
13
  def _msg(content: str) -> list:
14
  return [{"role": "user", "content": content}]
15
 
16
 
 
17
  # --- Constants ---
18
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
 
20
+ # --- Models via HF Inference API (correct method for HF Spaces) ---
21
+ # InferenceClientModel routes all calls through the HF Serverless Inference API.
22
+ # No GPU or local model weights are required in the Space container.
23
+ model = InferenceClientModel(
24
+ model_id="meta-llama/Llama-3.2-3B-Instruct",
25
+ max_tokens=2048,
26
+ temperature=0.3,
27
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ math_model = InferenceClientModel(
30
+ model_id="deepseek-ai/deepseek-math-7b-instruct",
31
+ max_tokens=2048,
32
+ temperature=0.3,
33
+ )
34
 
35
+ # Vision model for image / OCR tasks — also served via Inference API
36
+ vision_model = InferenceClientModel(
37
+ model_id="Qwen/Qwen2.5-VL-7B-Instruct",
38
+ max_tokens=2048,
39
+ )
 
 
40
 
41
 
42
  def _extract_text_from_response(response: Any) -> str:
 
55
  return str(content)
56
  return str(response)
57
 
58
+
59
+ # --- State ---
60
  class AgentState(TypedDict):
61
  question: str
62
  task_id: Optional[str]
 
65
  have_file: Optional[bool]
66
  is_math: Optional[bool]
67
  have_image: Optional[bool]
68
+ final_answer: Optional[str]
69
+ retry_count: Optional[int]
70
+ messages: List[Dict[str, Any]]
71
+
72
+
73
+ # --- Nodes ---
74
 
75
+ def read(state: AgentState) -> dict:
76
  """Agent reads and logs the incoming question."""
77
  question = state["question"]
78
  print(f"Agent is reading the question: {question[:50]}...")
79
  return {}
80
+
81
+
82
+ def classify(state: AgentState) -> dict:
83
  """Agent classifies the question to determine which tools to use."""
84
  question = state["question"].lower()
85
+
 
86
  prompt = f"""
87
+ You are an agent that classifies questions to determine which tools to use.
88
+ Classify the following question into the categories: 'need to be searched on web/wikipedia', 'has a file in the question', 'is a math problem', 'has an image in the question'.
89
+ Question: {question}
90
+ Return a JSON object with boolean fields for each category, for example:
91
+ {{
92
+ "is_searching": true,
93
+ "have_file": false,
94
+ "is_math": false,
95
+ "have_image": false
96
+ }}
97
+ """
98
  messages = _msg(prompt)
99
  response = model(messages)
100
  raw = _extract_text_from_response(response)
101
+
102
  import json, re
103
  match = re.search(r'\{.*?\}', raw, re.DOTALL)
104
  data = {}
 
107
  data = json.loads(match.group())
108
  except json.JSONDecodeError:
109
  pass
110
+
111
  is_searching = bool(data.get("is_searching", False))
112
  have_file = bool(data.get("have_file", False))
113
  is_math = bool(data.get("is_math", False))
114
  have_image = bool(data.get("have_image", False))
115
+ print(f"Classification: is_searching={is_searching}, have_file={have_file}, is_math={is_math}, have_image={have_image}")
116
+
117
+ new_messages = state.get("messages", []) + [
118
  {"role": "system", "content": "Classify the question to determine which tools to use."},
119
  {"role": "user", "content": question},
120
+ {"role": "assistant", "content": f"is_searching={is_searching}, have_file={have_file}, is_math={is_math}, have_image={have_image}"},
121
  ]
 
122
  return {
123
  "is_searching": is_searching,
124
  "have_file": have_file,
125
  "is_math": is_math,
126
  "have_image": have_image,
127
+ "messages": new_messages,
128
  }
 
129
 
130
+
131
+ def handele_search(state: AgentState) -> dict:
132
  """Agent performs a web search if classified as needing search."""
133
  question = state["question"]
134
  print(f"Agent is performing a web search for: {question[:50]}...")
 
137
  new_messages = state.get("messages", []) + [
138
  {"role": "system", "content": "Perform a web search if classified as needing search."},
139
  {"role": "user", "content": question},
140
+ {"role": "assistant", "content": f"Search results: {search_results[:100]}..."},
141
  ]
142
+ return {"search_results": search_results, "messages": new_messages}
143
+
144
+
145
+ def handle_image(state: AgentState) -> dict:
146
+ """Agent handles an image using a vision model via the HF Inference API.
147
+
148
+ Instead of loading a local transformer model (which would be too heavy for
149
+ a standard Space), the image is forwarded to a vision-capable
150
+ InferenceClientModel (Qwen2.5-VL) through the HF Serverless Inference API.
151
+ """
152
  question = state["question"]
153
  task_id = state.get("task_id", "")
154
  file_name = state.get("file_name", "")
155
 
 
156
  image_reader = tools.ImageReaderTool()
157
  image_data_uri = image_reader(task_id, file_name) if task_id and file_name else ""
158
 
159
  if not image_data_uri or image_data_uri.startswith("Failed"):
160
  print(f"Could not download image for task {task_id}")
161
  new_messages = state.get("messages", []) + [
162
+ {"role": "assistant", "content": f"[Could not download image '{file_name}' for analysis.]"},
163
  ]
164
+ return {"image_description": "", "transcribed_text": "", "messages": new_messages}
165
+
166
+ prompt_text = (
167
+ f"Analyze the attached image in detail.\n"
168
+ f"Describe its content and transcribe all text visible in it.\n\n"
169
+ f"Question: {question}\n\n"
170
+ f"Return a JSON object: "
171
+ f'{{ "image_description": "...", "transcribed_text": "..." }}'
172
+ )
 
 
 
 
 
 
 
 
173
 
174
+ # Send image + text to the vision model via the HF Inference API.
175
+ # InferenceClientModel accepts OpenAI-style multimodal message format.
176
+ vision_messages = [
177
+ {
178
+ "role": "user",
179
+ "content": [
180
+ {"type": "image_url", "image_url": {"url": image_data_uri}},
181
+ {"type": "text", "text": prompt_text},
182
+ ],
183
+ }
184
+ ]
185
  try:
186
+ response = vision_model(vision_messages)
187
+ ocr_text = _extract_text_from_response(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  except Exception as e:
189
+ ocr_text = f"Vision model error: {e}"
190
 
191
+ import json, re
192
+ match = re.search(r'\{.*?\}', ocr_text, re.DOTALL)
193
  image_description = ocr_text
194
  transcribed_text = ocr_text
195
+ if match:
196
+ try:
197
+ data = json.loads(match.group())
198
+ image_description = data.get("image_description", ocr_text)
199
+ transcribed_text = data.get("transcribed_text", ocr_text)
200
+ except json.JSONDecodeError:
201
+ pass
202
+
203
  print(f"Image description: {image_description[:100]}...")
204
  print(f"Transcribed text: {transcribed_text[:100]}...")
205
  new_messages = state.get("messages", []) + [
206
  {"role": "system", "content": "Analyze and describe the image if classified as having an image."},
207
  {"role": "user", "content": question},
208
+ {"role": "assistant", "content": f"Image description: {image_description[:100]}..., Transcribed text: {transcribed_text[:100]}..."},
209
  ]
210
+ return {"image_description": image_description, "transcribed_text": transcribed_text, "messages": new_messages}
211
+
212
+
213
+ def handle_file(state: AgentState) -> dict:
214
+ """Agent processes the file if classified as having a file."""
 
 
 
 
 
 
215
  question = state["question"]
216
  task_id = state.get("task_id", "")
217
  file_name = state.get("file_name", "")
218
 
 
219
  file_reader = tools.FileReaderTool()
220
  file_content = file_reader(task_id, file_name) if task_id and file_name else ""
221
 
 
222
  file_context = ""
223
  if file_content:
224
  file_context = f"\n\n--- Attached file: {file_name} ---\n{file_content}\n--- End of file ---"
225
  elif file_name:
226
  file_context = f"\n\n[Note: A file '{file_name}' was referenced but could not be retrieved.]"
227
 
228
+ prompt = (
229
+ f"You are an agent that can read and extract information from files.\n"
230
+ f"Read the attached file content carefully and extract any relevant information "
231
+ f"that could help answer the question.\n\n"
232
+ f"Question: {question}{file_context}\n\n"
233
+ f'Return a JSON object: {{ "extracted_info": "..." }}'
234
+ )
 
 
235
  messages = _msg(prompt)
236
  response = model(messages)
237
  extracted_info = _extract_text_from_response(response)
 
239
  new_messages = state.get("messages", []) + [
240
  {"role": "system", "content": "Read and extract information from the attached file."},
241
  {"role": "user", "content": question},
242
+ {"role": "assistant", "content": f"Extracted info: {extracted_info[:100]}..."},
243
  ]
244
+ return {"extracted_info": extracted_info, "messages": new_messages}
245
+
 
 
246
 
247
+ def handle_math(state: AgentState) -> dict:
248
  """Agent handles a math problem if classified as a math problem."""
249
  question = state["question"]
250
  print(f"Agent is handling a math problem: {question[:50]}...")
 
255
  new_messages = state.get("messages", []) + [
256
  {"role": "system", "content": "Handle the question if classified as a math problem."},
257
  {"role": "user", "content": question},
258
+ {"role": "assistant", "content": f"Math solution: {solution[:100]}..."},
259
  ]
260
+ return {"math_solution": solution, "messages": new_messages}
 
 
 
261
 
262
 
263
  def answer(state: AgentState) -> dict:
 
265
  question = state["question"]
266
  messages_history = state.get("messages", [])
267
 
268
+ context_parts = [
269
+ msg["content"]
270
+ for msg in messages_history
271
+ if msg.get("role") == "assistant"
272
+ ]
273
  context = "\n".join(context_parts) if context_parts else "No additional context gathered."
274
 
275
+ prompt = (
276
+ "You are a general AI assistant. I will ask you a question. Report your thoughts, "
277
+ "and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. "
278
+ "YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated "
279
+ "list of numbers and/or strings. If you are asked for a number, don't use comma to write "
280
+ "your number neither use units such as $ or percent sign unless specified otherwise. "
281
+ "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), "
282
+ "and write the digits in plain text unless specified otherwise. If you are asked for a comma "
283
+ "separated list, apply the above rules depending of whether the element to be put in the list "
284
+ "is a number or a string.\n\n"
285
+ f"Question: {question}\n\n"
286
+ f"Context gathered:\n{context}\n"
287
+ )
288
  messages = _msg(prompt)
 
289
  response = model(messages)
290
  raw_response = _extract_text_from_response(response)
291
 
 
292
  if "FINAL ANSWER:" in raw_response:
293
  final_answer = raw_response.split("FINAL ANSWER:")[-1].strip()
294
  else:
 
299
 
300
 
301
  def evaluate(state: AgentState) -> dict:
302
+ """LLM evaluates whether the current final_answer is adequate."""
 
303
  import json, re
304
  question = state["question"]
305
  current_answer = state.get("final_answer", "")
306
  retry_count = state.get("retry_count", 0) or 0
307
 
308
+ prompt = (
309
+ f"You are a strict evaluator. Given the question and a candidate answer, decide if the "
310
+ f"answer is complete, relevant, and not an error message.\n\n"
311
+ f"Question: {question}\nCandidate answer: {current_answer}\n\n"
312
+ f'Return ONLY a JSON object:\n'
313
+ f'{{"is_adequate": true}} if the answer looks correct and complete,\n'
314
+ f'{{"is_adequate": false}} if the answer is wrong, incomplete, an error, or says it could not find information.'
315
+ )
 
316
  response = model(_msg(prompt))
317
  raw = _extract_text_from_response(response)
318
  match = re.search(r'\{.*?\}', raw, re.DOTALL)
 
322
  data = json.loads(match.group())
323
  except json.JSONDecodeError:
324
  pass
325
+ is_adequate = bool(data.get("is_adequate", True))
326
  print(f"Evaluation: is_adequate={is_adequate}, retry_count={retry_count}")
327
  return {
328
  "retry_count": retry_count + (0 if is_adequate else 1),
 
334
 
335
 
336
  def route_after_evaluate(state: AgentState) -> str:
 
337
  retry_count = state.get("retry_count", 0) or 0
338
  if retry_count > 0 and retry_count <= 2:
339
  print(f"Answer inadequate — retry {retry_count}/2, routing to web search")
 
342
 
343
 
344
  def route_after_classify(state: AgentState) -> str:
 
345
  if state.get("have_image"):
346
  return "handle_image"
347
  if state.get("have_file"):
 
350
  return "handle_math"
351
  if state.get("is_searching"):
352
  return "handle_search"
 
353
  return "answer"
354
 
355
 
356
+ # --- Build LangGraph ---
357
  agent_graph = StateGraph(AgentState)
358
  agent_graph.add_node("read", read)
359
  agent_graph.add_node("classify", classify)
 
366
 
367
  agent_graph.add_edge(START, "read")
368
  agent_graph.add_edge("read", "classify")
369
+ agent_graph.add_conditional_edges("classify", route_after_classify)
 
 
 
 
370
  agent_graph.add_edge("handle_search", "answer")
371
  agent_graph.add_edge("handle_image", "answer")
372
  agent_graph.add_edge("handle_file", "answer")
373
  agent_graph.add_edge("handle_math", "answer")
374
  agent_graph.add_edge("answer", "evaluate")
375
+ agent_graph.add_conditional_edges("evaluate", route_after_evaluate)
 
 
 
376
 
377
  compiled_agent = agent_graph.compile()
378
 
379
 
380
+ # --- Agent ---
 
 
381
  class BasicAgent:
382
  def __init__(self):
383
  self.file_reader = tools.FileReaderTool()
384
  self.image_reader = tools.ImageReaderTool()
385
  self.web_search = tools.WebSearchTool()
 
386
  print("Agent initialized.")
387
 
388
  def __call__(self, question: str, task_id: str = "", file_name: str = "") -> str:
389
  print(f"Agent received question (first 50 chars): {question[:50]}...")
 
 
390
  result_state = compiled_agent.invoke({
391
  "question": question,
392
  "task_id": task_id,
 
397
  "is_math": False,
398
  "have_image": False,
399
  "final_answer": "",
400
+ "retry_count": 0,
401
  })
 
 
402
  final_answer = result_state.get("final_answer", "No answer produced.")
403
  print(f"Agent returning answer: {final_answer[:100]}...")
404
  return final_answer
405
 
406
+
407
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
408
+ """Fetches all questions, runs the BasicAgent on them, submits all answers."""
409
+ space_id = os.getenv("SPACE_ID")
 
 
 
410
 
411
  if profile:
412
+ username = f"{profile.username}"
413
  print(f"User logged in: {username}")
414
  else:
415
  print("User not logged in.")
 
419
  questions_url = f"{api_url}/questions"
420
  submit_url = f"{api_url}/submit"
421
 
 
422
  try:
423
  agent = BasicAgent()
424
  except Exception as e:
425
  print(f"Error instantiating agent: {e}")
426
  return f"Error initializing agent: {e}", None
427
+
428
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
429
  print(agent_code)
430
 
 
431
  print(f"Fetching questions from: {questions_url}")
432
  try:
433
  response = requests.get(questions_url, timeout=15)
434
  response.raise_for_status()
435
  questions_data = response.json()
436
  if not questions_data:
437
+ return "Fetched questions list is empty or invalid format.", None
 
438
  print(f"Fetched {len(questions_data)} questions.")
439
  except requests.exceptions.RequestException as e:
 
440
  return f"Error fetching questions: {e}", None
 
 
 
 
441
  except Exception as e:
 
442
  return f"An unexpected error occurred fetching questions: {e}", None
443
 
 
444
  results_log = []
445
  answers_payload = []
446
  print(f"Running agent on {len(questions_data)} questions...")
447
  for item in questions_data:
448
  task_id = item.get("task_id")
 
449
  question_text = item.get("question") or item.get("Question")
450
  if not task_id or question_text is None:
451
  print(f"Skipping item with missing task_id or question: {item}")
452
  continue
 
 
453
  file_name = item.get("file_name", "")
454
  if file_name:
455
  print(f"Task {task_id} has attached file: {file_name}")
 
456
  try:
457
  submitted_answer = agent(question_text, task_id=task_id, file_name=file_name)
458
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
459
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
460
  except Exception as e:
461
+ print(f"Error running agent on task {task_id}: {e}")
462
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
463
 
464
  if not answers_payload:
 
465
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
466
 
 
467
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
468
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
469
  try:
470
  response = requests.post(submit_url, json=submission_data, timeout=60)
 
478
  f"Message: {result_data.get('message', 'No message received.')}"
479
  )
480
  print("Submission successful.")
481
+ return final_status, pd.DataFrame(results_log)
 
482
  except requests.exceptions.HTTPError as e:
483
  error_detail = f"Server responded with status {e.response.status_code}."
484
  try:
485
  error_json = e.response.json()
486
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
487
+ except Exception:
488
  error_detail += f" Response: {e.response.text[:500]}"
489
+ return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
 
 
 
490
  except requests.exceptions.Timeout:
491
+ return "Submission Failed: The request timed out.", pd.DataFrame(results_log)
 
 
 
492
  except requests.exceptions.RequestException as e:
493
+ return f"Submission Failed: Network error - {e}", pd.DataFrame(results_log)
 
 
 
494
  except Exception as e:
495
+ return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
 
 
 
496
 
497
 
498
+ # --- Gradio Interface ---
499
  with gr.Blocks() as demo:
500
  gr.Markdown("# Basic Agent Evaluation Runner")
501
  gr.Markdown(
 
509
  ---
510
  **Disclaimers:**
511
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
512
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a separate action or even to answer the questions in async.
513
  """
514
  )
515
 
516
  gr.LoginButton()
517
 
518
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
519
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
520
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
521
 
522
  run_button.click(
 
525
  )
526
 
527
  if __name__ == "__main__":
528
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
 
529
  space_host_startup = os.getenv("SPACE_HOST")
530
+ space_id_startup = os.getenv("SPACE_ID")
531
 
532
  if space_host_startup:
533
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
535
  else:
536
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
537
 
538
+ if space_id_startup:
539
  print(f"✅ SPACE_ID found: {space_id_startup}")
540
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
541
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
542
  else:
543
+ print("ℹ️ SPACE_ID environment variable not found (running locally?).")
 
 
544
 
545
+ print("-" * (60 + len(" App Starting ")) + "\n")
546
  print("Launching Gradio Interface for Basic Agent Evaluation...")
547
+ demo.launch(debug=True, share=False)