maytemuma commited on
Commit
f7d7668
·
verified ·
1 Parent(s): 58d60bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -308
app.py CHANGED
@@ -7,12 +7,10 @@ from smolagents import (
7
  CodeAgent,
8
  DuckDuckGoSearchTool,
9
  VisitWebpageTool,
10
- InferenceClientModel,
11
  tool,
12
- LiteLLMModel,
13
  )
14
 
15
-
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
@@ -23,241 +21,168 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
23
 
24
  @tool
25
  def download_file_from_api(task_id: str) -> str:
26
- """Downloads a file associated with a GAIA task and returns its text content.
27
- Use this tool when a question mentions an attached file, spreadsheet, image,
28
- audio, document, or any file that you need to read or analyze.
29
 
30
  Args:
31
- task_id: The task_id string for the question that has an associated file.
32
  """
33
  import tempfile
34
- api_url = "https://agents-course-unit4-scoring.hf.space"
35
- url = f"{api_url}/files/{task_id}"
36
  try:
37
- response = requests.get(url, timeout=30)
38
- response.raise_for_status()
39
-
40
- content_type = response.headers.get("Content-Type", "")
41
- print(f" [download_file] Content-Type: {content_type}, Size: {len(response.content)} bytes")
42
-
43
- # --- TEXT-BASED FILES ---
44
- if any(t in content_type for t in ["text", "json", "csv", "xml", "html"]):
45
- return response.text[:15000]
46
-
47
- # --- EXCEL FILES ---
48
- if any(t in content_type for t in ["spreadsheet", "excel", "openxmlformats-officedocument"]):
49
- try:
50
- import openpyxl
51
- import io
52
- wb = openpyxl.load_workbook(io.BytesIO(response.content))
53
- result = []
54
- for sheet_name in wb.sheetnames:
55
- ws = wb[sheet_name]
56
- result.append(f"--- Sheet: {sheet_name} ---")
57
- for row in ws.iter_rows(values_only=True):
58
- result.append("\t".join([str(c) if c is not None else "" for c in row]))
59
- return "\n".join(result)[:15000]
60
- except Exception as e:
61
- return f"Excel file detected but error reading it: {str(e)}"
62
-
63
- # --- PDF FILES ---
64
- if "pdf" in content_type:
65
- try:
66
- import PyPDF2
67
- import io
68
- reader = PyPDF2.PdfReader(io.BytesIO(response.content))
69
- text = ""
70
- for page in reader.pages:
71
- text += page.extract_text() or ""
72
- return text[:15000] if text.strip() else "PDF found but could not extract text."
73
- except Exception as e:
74
- return f"PDF file detected but error reading: {str(e)}"
75
-
76
- # --- IMAGE FILES ---
77
- if "image" in content_type:
78
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
79
- f.write(response.content)
80
- img_path = f.name
81
- return f"IMAGE_FILE_SAVED:{img_path}"
82
 
83
- # --- AUDIO FILES ---
84
- if any(t in content_type for t in ["audio", "mpeg", "wav", "mp3", "ogg"]):
85
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
86
- f.write(response.content)
87
- audio_path = f.name
88
- return f"AUDIO_FILE_SAVED:{audio_path}"
89
 
90
- # --- PYTHON FILES ---
91
- if "python" in content_type or "x-python" in content_type:
92
- return response.text[:15000]
93
 
94
- # --- WORD DOCUMENTS ---
95
- if "wordprocessingml" in content_type or "msword" in content_type:
96
- try:
97
- import docx
98
- import io
99
- doc = docx.Document(io.BytesIO(response.content))
100
- text = "\n".join([p.text for p in doc.paragraphs])
101
- return text[:15000] if text.strip() else "Word doc found but no text extracted."
102
- except Exception as e:
103
- return f"Word document detected but error reading: {str(e)}"
104
 
105
- # --- FALLBACK ---
106
  with tempfile.NamedTemporaryFile(delete=False, suffix=".bin") as f:
107
- f.write(response.content)
108
- return f"File downloaded to {f.name} (type: {content_type}). Size: {len(response.content)} bytes."
109
 
110
  except Exception as e:
111
- return f"Error downloading file for task {task_id}: {str(e)}"
112
 
113
 
114
  @tool
115
  def describe_image(image_path: str) -> str:
116
- """Describes the content of an image file using an AI vision model.
117
- Use this when you have an image file path (from IMAGE_FILE_SAVED)
118
- and need to understand what the image shows.
119
 
120
  Args:
121
- image_path: The local file path to the image to describe.
122
  """
123
  try:
124
  from huggingface_hub import InferenceClient
125
-
126
  client = InferenceClient(token=os.getenv("HF_TOKEN"))
127
-
128
  with open(image_path, "rb") as f:
129
- image_bytes = f.read()
130
-
131
- result = client.image_to_text(
132
- image=image_bytes,
133
- model="Salesforce/blip2-opt-2.7b",
134
- )
135
-
136
- if isinstance(result, str):
137
- return f"Image description: {result}"
138
- elif hasattr(result, "generated_text"):
139
- return f"Image description: {result.generated_text}"
140
- else:
141
- return f"Image description: {str(result)}"
142
-
143
  except Exception as e:
144
- return f"Could not describe image at {image_path}. Error: {str(e)}"
145
 
146
 
147
  @tool
148
  def transcribe_audio(audio_path: str) -> str:
149
- """Transcribes an audio file to text using Whisper speech recognition.
150
- Use this when you have an audio file path (from AUDIO_FILE_SAVED).
151
 
152
  Args:
153
- audio_path: The local file path to the audio file to transcribe.
154
  """
155
  try:
156
  from huggingface_hub import InferenceClient
157
-
158
  client = InferenceClient(token=os.getenv("HF_TOKEN"))
159
-
160
  with open(audio_path, "rb") as f:
161
- audio_bytes = f.read()
162
-
163
- result = client.automatic_speech_recognition(
164
- audio=audio_bytes,
165
- model="openai/whisper-large-v3-turbo",
166
- )
167
-
168
- if isinstance(result, str):
169
- return f"Audio transcription: {result}"
170
- elif hasattr(result, "text"):
171
- return f"Audio transcription: {result.text}"
172
- elif isinstance(result, dict):
173
- return f"Audio transcription: {result.get('text', str(result))}"
174
- else:
175
- return f"Audio transcription: {str(result)}"
176
-
177
  except Exception as e:
178
- return f"Error transcribing audio at {audio_path}: {str(e)}"
179
 
180
 
181
  @tool
182
  def read_local_file(file_path: str) -> str:
183
- """Reads the content of a local text file and returns it as a string.
184
 
185
  Args:
186
- file_path: The path to the file to read.
187
  """
188
  try:
189
  with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
190
- return f.read()[:15000]
191
  except Exception as e:
192
- return f"Error reading file: {str(e)}"
193
 
194
 
195
  @tool
196
  def execute_python_file(file_path: str) -> str:
197
- """Executes a Python script file and returns its stdout output.
198
 
199
  Args:
200
- file_path: The path to the Python file to execute.
201
  """
202
  import subprocess
203
  try:
204
- result = subprocess.run(
205
- ["python3", file_path],
206
- capture_output=True,
207
- text=True,
208
- timeout=30,
209
- )
210
- output = ""
211
- if result.stdout:
212
- output += result.stdout
213
- if result.stderr:
214
- output += f"\nSTDERR: {result.stderr}"
215
- return output.strip() if output.strip() else "Script executed but produced no output."
216
  except subprocess.TimeoutExpired:
217
- return "Script execution timed out after 30 seconds."
218
  except Exception as e:
219
- return f"Error executing Python file: {str(e)}"
220
 
221
 
222
  # =============================================
223
- # AGENT CLASS
224
  # =============================================
225
 
226
- CUSTOM_INSTRUCTIONS = """You are a precise AI assistant solving GAIA benchmark questions.
227
-
228
- CRITICAL RULES FOR ANSWERING:
229
- 1. Your final answer must be ONLY the answer itself — no explanations, no "The answer is", no extra words.
230
- 2. If the answer is a number, give just the number (e.g., "42").
231
- 3. If the answer is a name, give just the name (e.g., "Paris").
232
- 4. If asked for a comma-separated list, give just the list (e.g., "red, blue, green").
233
- 5. Be precise and factual. Use tools to verify information when needed.
234
-
235
- TOOL USAGE RULES:
236
- 6. If a question mentions an attached file, image, audio, spreadsheet, or document, FIRST use download_file_from_api with the task_id.
237
- 7. If download returns "IMAGE_FILE_SAVED:/some/path", then call describe_image with that path.
238
- 8. If download returns "AUDIO_FILE_SAVED:/some/path", then call transcribe_audio with that path.
239
- 9. If the file is a Python script (.py), you can use read_local_file to view it or execute_python_file to run it.
240
- 10. Use DuckDuckGoSearchTool when you need factual information from the internet.
241
- 11. Use visit_webpage to read the full content of a specific URL.
242
-
243
- REASONING:
244
- 12. Think step by step but keep your FINAL output as ONLY the answer.
245
- 13. Double-check your answer before giving it.
246
  """
247
 
248
 
249
  class BasicAgent:
250
- """Agent using smolagents CodeAgent with HF Inference API."""
251
-
252
  def __init__(self):
253
- print("Initializing SmolAgent for GAIA benchmark...")
254
 
255
- # Use default model -> smolagents auto-select the provider
256
- model = LiteLLMModel(
257
- model_id="groq/llama-3.3-70b-versatile",
258
- api_key=os.getenv("GROQ_API_KEY"),
259
  temperature=0.1,
260
- max_tokens=2096,
261
  )
262
 
263
  self.agent = CodeAgent(
@@ -271,223 +196,159 @@ class BasicAgent:
271
  read_local_file,
272
  execute_python_file,
273
  ],
274
- max_steps=10,
275
  verbosity_level=2,
276
- instructions=CUSTOM_INSTRUCTIONS,
277
  additional_authorized_imports=[
278
  "json", "re", "math", "datetime", "collections",
279
  "csv", "io", "os", "tempfile", "subprocess",
280
  "base64", "hashlib", "unicodedata", "string",
281
  ],
282
  )
283
-
284
- print("SmolAgent initialized successfully!")
285
 
286
  def __call__(self, question: str, task_id: str = None) -> str:
287
- print(f"Agent processing: {question[:100]}...")
288
 
289
  if task_id:
290
- prompt = f"""Answer this question. If you need to download an attached file, use download_file_from_api with task_id="{task_id}".
291
-
292
- Question: {question}
293
-
294
- Remember: respond with ONLY the final answer, nothing else."""
295
  else:
296
- prompt = f"""Answer this question precisely.
297
-
298
- Question: {question}
299
-
300
- Remember: respond with ONLY the final answer, nothing else."""
301
 
302
- # Retry logic: try up to 2 times
303
  for attempt in range(2):
304
  try:
305
  result = self.agent.run(prompt)
306
  answer = str(result).strip()
307
 
308
- # Clean up common LLM prefixes
309
- prefixes_to_remove = [
310
- "The answer is ", "The answer is: ",
311
- "Answer: ", "FINAL ANSWER: ",
312
- "Final answer: ", "The final answer is ",
313
- "The final answer is: ", "Result: ",
314
- ]
315
- for prefix in prefixes_to_remove:
316
- if answer.lower().startswith(prefix.lower()):
317
- answer = answer[len(prefix):].strip()
318
-
319
- # Remove wrapping quotes
320
- if len(answer) > 2 and \
321
- ((answer.startswith('"') and answer.endswith('"')) or
322
- (answer.startswith("'") and answer.endswith("'"))):
323
- answer = answer[1:-1].strip()
324
-
325
- # Remove trailing period for short answers
326
  if answer.endswith(".") and len(answer.split()) <= 5:
327
  answer = answer[:-1].strip()
328
 
329
- print(f"Final answer: {answer}")
330
  return answer
331
 
332
  except Exception as e:
333
- print(f"Agent error (attempt {attempt + 1}): {e}")
334
  if attempt == 0:
335
- print("Retrying in 5 seconds...")
336
- time.sleep(5)
337
 
338
  return "Unable to determine the answer."
339
 
340
 
341
  # =============================================
342
- # SUBMISSION LOGIC
343
  # =============================================
344
 
345
  def run_and_submit_all(profile: gr.OAuthProfile | None):
346
  space_id = os.getenv("SPACE_ID")
347
 
348
- if profile:
349
- username = f"{profile.username}"
350
- print(f"User logged in: {username}")
351
- else:
352
- print("User not logged in.")
353
  return "Please Login to Hugging Face with the button.", None
354
 
 
 
 
355
  api_url = DEFAULT_API_URL
356
- questions_url = f"{api_url}/questions"
357
- submit_url = f"{api_url}/submit"
358
 
359
  try:
360
  agent = BasicAgent()
361
  except Exception as e:
362
- print(f"Error instantiating agent: {e}")
363
  return f"Error initializing agent: {e}", None
364
 
365
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
366
- print(agent_code)
367
 
368
- print(f"Fetching questions from: {questions_url}")
369
  try:
370
- response = requests.get(questions_url, timeout=15)
371
- response.raise_for_status()
372
- questions_data = response.json()
373
- if not questions_data:
374
- return "Fetched questions list is empty or invalid format.", None
375
- print(f"Fetched {len(questions_data)} questions.")
376
- except requests.exceptions.RequestException as e:
377
- return f"Error fetching questions: {e}", None
378
  except Exception as e:
379
- return f"An unexpected error occurred fetching questions: {e}", None
380
 
381
  results_log = []
382
- answers_payload = []
383
- print(f"Running agent on {len(questions_data)} questions...")
384
- for i, item in enumerate(questions_data):
385
  task_id = item.get("task_id")
386
- question_text = item.get("question")
387
- if not task_id or question_text is None:
388
  continue
 
389
  print(f"\n{'='*60}")
390
- print(f" Question {i+1}/{len(questions_data)} — Task: {task_id}")
391
- print(f" Q: {question_text[:120]}...")
392
  print(f"{'='*60}")
 
393
  try:
394
- submitted_answer = agent(question_text, task_id=task_id)
395
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
396
- results_log.append({
397
- "Task ID": task_id,
398
- "Question": question_text,
399
- "Submitted Answer": submitted_answer
400
- })
401
  except Exception as e:
402
- print(f"Error running agent on task {task_id}: {e}")
403
- results_log.append({
404
- "Task ID": task_id,
405
- "Question": question_text,
406
- "Submitted Answer": f"AGENT ERROR: {e}"
407
- })
408
-
409
- # Small delay between questions to avoid rate limiting
410
- time.sleep(2)
411
-
412
- if not answers_payload:
413
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
414
-
415
- submission_data = {
416
- "username": username.strip(),
417
- "agent_code": agent_code,
418
- "answers": answers_payload
419
- }
420
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
421
  try:
422
- response = requests.post(submit_url, json=submission_data, timeout=120)
423
- response.raise_for_status()
424
- result_data = response.json()
425
- final_status = (
426
  f"Submission Successful!\n"
427
- f"User: {result_data.get('username')}\n"
428
- f"Overall Score: {result_data.get('score', 'N/A')}% "
429
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
430
- f"Message: {result_data.get('message', 'No message received.')}"
431
  )
432
- print("Submission successful.")
433
- return final_status, pd.DataFrame(results_log)
434
  except requests.exceptions.HTTPError as e:
435
- error_detail = f"Server responded with status {e.response.status_code}."
436
- try:
437
- error_json = e.response.json()
438
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
439
- except requests.exceptions.JSONDecodeError:
440
- error_detail += f" Response: {e.response.text[:500]}"
441
- return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
442
- except requests.exceptions.Timeout:
443
- return "Submission Failed: The request timed out.", pd.DataFrame(results_log)
444
- except requests.exceptions.RequestException as e:
445
- return f"Submission Failed: Network error - {e}", pd.DataFrame(results_log)
446
  except Exception as e:
447
- return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
448
 
449
 
450
- # --- Build Gradio Interface ---
451
  with gr.Blocks() as demo:
452
  gr.Markdown("# 🤖 GAIA Agent — Final Assignment")
453
  gr.Markdown(
454
  """
455
- **Agent**: SmolAgent (CodeAgent) with Qwen2.5-Coder-32B via Nebius (HF Inference)
456
-
457
  **Tools**: Web Search · Webpage Visitor · File Downloader · Image Describer · Audio Transcriber · Python Executor
458
 
459
- **Instructions:**
460
- 1. Log in to your Hugging Face account using the button below.
461
- 2. Click 'Run Evaluation & Submit All Answers' to start.
462
- 3. Wait for the agent to process all 20 questions (this may take several minutes).
463
  """
464
  )
465
 
466
  gr.LoginButton()
467
  run_button = gr.Button("🚀 Run Evaluation & Submit All Answers")
468
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
469
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
470
 
471
- run_button.click(
472
- fn=run_and_submit_all,
473
- outputs=[status_output, results_table]
474
- )
475
 
476
  if __name__ == "__main__":
477
  print("\n" + "-"*30 + " App Starting " + "-"*30)
478
- space_host_startup = os.getenv("SPACE_HOST")
479
- space_id_startup = os.getenv("SPACE_ID")
480
-
481
- if space_host_startup:
482
- print(f"✅ SPACE_HOST found: {space_host_startup}")
483
- else:
484
- print("ℹ️ SPACE_HOST not found (running locally?).")
485
-
486
- if space_id_startup:
487
- print(f"✅ SPACE_ID found: {space_id_startup}")
488
- else:
489
- print("ℹ️ SPACE_ID not found (running locally?).")
490
-
491
- print("-"*60 + "\n")
492
- print("Launching Gradio Interface...")
493
  demo.launch(debug=True, share=False)
 
7
  CodeAgent,
8
  DuckDuckGoSearchTool,
9
  VisitWebpageTool,
10
+ OpenAIModel,
11
  tool,
 
12
  )
13
 
 
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
 
21
 
22
  @tool
23
  def download_file_from_api(task_id: str) -> str:
24
+ """Downloads a file for a GAIA task. Use when question mentions a file/attachment.
 
 
25
 
26
  Args:
27
+ task_id: The task_id string for the question.
28
  """
29
  import tempfile
30
+ url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
 
31
  try:
32
+ resp = requests.get(url, timeout=30)
33
+ resp.raise_for_status()
34
+ ct = resp.headers.get("Content-Type", "")
35
+
36
+ if any(t in ct for t in ["text", "json", "csv", "xml", "html"]):
37
+ return resp.text[:12000]
38
+
39
+ if any(t in ct for t in ["spreadsheet", "excel", "openxmlformats"]):
40
+ import openpyxl, io
41
+ wb = openpyxl.load_workbook(io.BytesIO(resp.content))
42
+ lines = []
43
+ for sn in wb.sheetnames:
44
+ ws = wb[sn]
45
+ lines.append(f"--- Sheet: {sn} ---")
46
+ for row in ws.iter_rows(values_only=True):
47
+ lines.append("\t".join(str(c) if c else "" for c in row))
48
+ return "\n".join(lines)[:12000]
49
+
50
+ if "pdf" in ct:
51
+ import PyPDF2, io
52
+ reader = PyPDF2.PdfReader(io.BytesIO(resp.content))
53
+ text = "".join(p.extract_text() or "" for p in reader.pages)
54
+ return text[:12000] if text.strip() else "PDF: no text extracted."
55
+
56
+ if "image" in ct:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
58
+ f.write(resp.content)
59
+ return f"IMAGE_FILE_SAVED:{f.name}"
 
60
 
61
+ if any(t in ct for t in ["audio", "mpeg", "wav", "mp3", "ogg"]):
 
62
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
63
+ f.write(resp.content)
64
+ return f"AUDIO_FILE_SAVED:{f.name}"
 
65
 
66
+ if "python" in ct:
67
+ return resp.text[:12000]
 
68
 
69
+ if "wordprocessingml" in ct or "msword" in ct:
70
+ import docx, io
71
+ doc = docx.Document(io.BytesIO(resp.content))
72
+ return "\n".join(p.text for p in doc.paragraphs)[:12000]
 
 
 
 
 
 
73
 
 
74
  with tempfile.NamedTemporaryFile(delete=False, suffix=".bin") as f:
75
+ f.write(resp.content)
76
+ return f"File saved: {f.name} (type: {ct}, {len(resp.content)} bytes)"
77
 
78
  except Exception as e:
79
+ return f"Error downloading: {e}"
80
 
81
 
82
  @tool
83
  def describe_image(image_path: str) -> str:
84
+ """Describes an image using a vision model. Use after getting IMAGE_FILE_SAVED.
 
 
85
 
86
  Args:
87
+ image_path: Path to the image file.
88
  """
89
  try:
90
  from huggingface_hub import InferenceClient
 
91
  client = InferenceClient(token=os.getenv("HF_TOKEN"))
 
92
  with open(image_path, "rb") as f:
93
+ result = client.image_to_text(image=f.read(), model="Salesforce/blip2-opt-2.7b")
94
+ text = result if isinstance(result, str) else getattr(result, "generated_text", str(result))
95
+ return f"Image: {text}"
 
 
 
 
 
 
 
 
 
 
 
96
  except Exception as e:
97
+ return f"Image error: {e}"
98
 
99
 
100
  @tool
101
  def transcribe_audio(audio_path: str) -> str:
102
+ """Transcribes audio to text. Use after getting AUDIO_FILE_SAVED.
 
103
 
104
  Args:
105
+ audio_path: Path to the audio file.
106
  """
107
  try:
108
  from huggingface_hub import InferenceClient
 
109
  client = InferenceClient(token=os.getenv("HF_TOKEN"))
 
110
  with open(audio_path, "rb") as f:
111
+ result = client.automatic_speech_recognition(audio=f.read(), model="openai/whisper-large-v3-turbo")
112
+ text = result if isinstance(result, str) else getattr(result, "text", str(result))
113
+ return f"Transcription: {text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  except Exception as e:
115
+ return f"Audio error: {e}"
116
 
117
 
118
  @tool
119
  def read_local_file(file_path: str) -> str:
120
+ """Reads a local text file.
121
 
122
  Args:
123
+ file_path: Path to the file.
124
  """
125
  try:
126
  with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
127
+ return f.read()[:12000]
128
  except Exception as e:
129
+ return f"Read error: {e}"
130
 
131
 
132
  @tool
133
  def execute_python_file(file_path: str) -> str:
134
+ """Runs a Python script and returns output.
135
 
136
  Args:
137
+ file_path: Path to the .py file.
138
  """
139
  import subprocess
140
  try:
141
+ r = subprocess.run(["python3", file_path], capture_output=True, text=True, timeout=30)
142
+ out = r.stdout + (f"\nSTDERR: {r.stderr}" if r.stderr else "")
143
+ return out.strip() or "No output."
 
 
 
 
 
 
 
 
 
144
  except subprocess.TimeoutExpired:
145
+ return "Timeout after 30s."
146
  except Exception as e:
147
+ return f"Exec error: {e}"
148
 
149
 
150
  # =============================================
151
+ # AGENT
152
  # =============================================
153
 
154
+ # Concise instructions to save tokens
155
+ INSTRUCTIONS = """You solve GAIA benchmark questions precisely.
156
+
157
+ ANSWER FORMAT:
158
+ - Return ONLY the final answer. No "The answer is", no explanations.
159
+ - Number just the number (e.g. "42")
160
+ - Name just the name (e.g. "Paris")
161
+ - List comma-separated (e.g. "red, blue, green")
162
+
163
+ STRATEGY:
164
+ - Keep reasoning SHORT. Think step by step but briefly.
165
+ - Always verify facts with web_search. Don't rely on memory.
166
+ - If the answer isn't found directly, break the problem into parts and reason through them.
167
+ - For counting tasks: gather all items first, then count carefully.
168
+ - If a question mentions a file/attachment, FIRST call download_file_from_api with the task_id.
169
+ - If download returns IMAGE_FILE_SAVED call describe_image with that path.
170
+ - If download returns AUDIO_FILE_SAVED → call transcribe_audio with that path.
171
+ - For reversed/encoded text, decode it before answering.
172
+ - If a question references a URL, use visit_webpage to read it.
 
173
  """
174
 
175
 
176
  class BasicAgent:
 
 
177
  def __init__(self):
178
+ print("Initializing agent with Gemini 2.0 Flash...")
179
 
180
+ model = OpenAIModel(
181
+ model_id="gemini-2.0-flash",
182
+ api_base="https://generativelanguage.googleapis.com/v1beta/openai/",
183
+ api_key=os.getenv("GEMINI_API_KEY"),
184
  temperature=0.1,
185
+ max_tokens=1500,
186
  )
187
 
188
  self.agent = CodeAgent(
 
196
  read_local_file,
197
  execute_python_file,
198
  ],
199
+ max_steps=7,
200
  verbosity_level=2,
201
+ instructions=INSTRUCTIONS,
202
  additional_authorized_imports=[
203
  "json", "re", "math", "datetime", "collections",
204
  "csv", "io", "os", "tempfile", "subprocess",
205
  "base64", "hashlib", "unicodedata", "string",
206
  ],
207
  )
208
+ print("Agent ready!")
 
209
 
210
  def __call__(self, question: str, task_id: str = None) -> str:
211
+ print(f"Processing: {question[:80]}...")
212
 
213
  if task_id:
214
+ prompt = f'If needed, download file with: download_file_from_api("{task_id}")\n\nQuestion: {question}\n\nAnswer with ONLY the final answer.'
 
 
 
 
215
  else:
216
+ prompt = f"Question: {question}\n\nAnswer with ONLY the final answer."
 
 
 
 
217
 
 
218
  for attempt in range(2):
219
  try:
220
  result = self.agent.run(prompt)
221
  answer = str(result).strip()
222
 
223
+ # Clean prefixes
224
+ for p in ["The answer is ", "The answer is: ", "Answer: ",
225
+ "FINAL ANSWER: ", "Final answer: ", "The final answer is ",
226
+ "The final answer is: ", "Result: "]:
227
+ if answer.lower().startswith(p.lower()):
228
+ answer = answer[len(p):].strip()
229
+
230
+ # Remove quotes
231
+ if len(answer) > 2 and answer[0] in '"\'':
232
+ if answer[-1] == answer[0]:
233
+ answer = answer[1:-1].strip()
234
+
235
+ # Remove trailing period
 
 
 
 
 
236
  if answer.endswith(".") and len(answer.split()) <= 5:
237
  answer = answer[:-1].strip()
238
 
239
+ print(f"Answer: {answer}")
240
  return answer
241
 
242
  except Exception as e:
243
+ print(f"Error (attempt {attempt+1}): {e}")
244
  if attempt == 0:
245
+ time.sleep(3)
 
246
 
247
  return "Unable to determine the answer."
248
 
249
 
250
  # =============================================
251
+ # SUBMISSION
252
  # =============================================
253
 
254
  def run_and_submit_all(profile: gr.OAuthProfile | None):
255
  space_id = os.getenv("SPACE_ID")
256
 
257
+ if not profile:
 
 
 
 
258
  return "Please Login to Hugging Face with the button.", None
259
 
260
+ username = profile.username
261
+ print(f"User: {username}")
262
+
263
  api_url = DEFAULT_API_URL
 
 
264
 
265
  try:
266
  agent = BasicAgent()
267
  except Exception as e:
 
268
  return f"Error initializing agent: {e}", None
269
 
270
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
271
 
 
272
  try:
273
+ resp = requests.get(f"{api_url}/questions", timeout=15)
274
+ resp.raise_for_status()
275
+ questions = resp.json()
276
+ if not questions:
277
+ return "No questions fetched.", None
278
+ print(f"Fetched {len(questions)} questions.")
 
 
279
  except Exception as e:
280
+ return f"Error fetching questions: {e}", None
281
 
282
  results_log = []
283
+ answers = []
284
+
285
+ for i, item in enumerate(questions):
286
  task_id = item.get("task_id")
287
+ question = item.get("question")
288
+ if not task_id or question is None:
289
  continue
290
+
291
  print(f"\n{'='*60}")
292
+ print(f" Q {i+1}/{len(questions)} — {task_id}")
293
+ print(f" {question[:100]}...")
294
  print(f"{'='*60}")
295
+
296
  try:
297
+ answer = agent(question, task_id=task_id)
298
+ answers.append({"task_id": task_id, "submitted_answer": answer})
299
+ results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
 
 
 
 
300
  except Exception as e:
301
+ print(f"Error on {task_id}: {e}")
302
+ results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"ERROR: {e}"})
303
+
304
+ time.sleep(1)
305
+
306
+ if not answers:
307
+ return "No answers produced.", pd.DataFrame(results_log)
308
+
309
+ submission = {"username": username.strip(), "agent_code": agent_code, "answers": answers}
310
+
 
 
 
 
 
 
 
 
 
311
  try:
312
+ resp = requests.post(f"{api_url}/submit", json=submission, timeout=120)
313
+ resp.raise_for_status()
314
+ data = resp.json()
315
+ status = (
316
  f"Submission Successful!\n"
317
+ f"User: {data.get('username')}\n"
318
+ f"Score: {data.get('score', 'N/A')}% "
319
+ f"({data.get('correct_count', '?')}/{data.get('total_attempted', '?')} correct)\n"
320
+ f"Message: {data.get('message', '')}"
321
  )
322
+ return status, pd.DataFrame(results_log)
 
323
  except requests.exceptions.HTTPError as e:
324
+ detail = e.response.text[:500] if e.response else str(e)
325
+ return f"Submission Failed: {detail}", pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
326
  except Exception as e:
327
+ return f"Submission error: {e}", pd.DataFrame(results_log)
328
 
329
 
330
+ # --- Gradio UI ---
331
  with gr.Blocks() as demo:
332
  gr.Markdown("# 🤖 GAIA Agent — Final Assignment")
333
  gr.Markdown(
334
  """
335
+ **Agent**: CodeAgent with Gemini 2.0 Flash (free)
 
336
  **Tools**: Web Search · Webpage Visitor · File Downloader · Image Describer · Audio Transcriber · Python Executor
337
 
338
+ 1. Log in with your HF account
339
+ 2. Click Run to start (takes ~15-20 min)
 
 
340
  """
341
  )
342
 
343
  gr.LoginButton()
344
  run_button = gr.Button("🚀 Run Evaluation & Submit All Answers")
345
+ status_output = gr.Textbox(label="Status", lines=5, interactive=False)
346
+ results_table = gr.DataFrame(label="Results", wrap=True)
347
 
348
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
349
 
350
  if __name__ == "__main__":
351
  print("\n" + "-"*30 + " App Starting " + "-"*30)
352
+ print(f"SPACE_ID: {os.getenv('SPACE_ID', 'not set')}")
353
+ print("-"*60)
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  demo.launch(debug=True, share=False)