Csuarezg commited on
Commit
440dd5c
ยท
verified ยท
1 Parent(s): 33d5043

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +594 -789
app.py CHANGED
@@ -6,92 +6,136 @@ import json
6
  import re
7
  import tempfile
8
  import logging
 
9
  from typing import List, Dict, Optional, TypedDict, Annotated
10
  import numpy as np
11
  import base64
12
  import subprocess
13
  import sys
 
 
14
 
15
- # Configure ffmpeg for pydub in HuggingFace Spaces
16
- def setup_ffmpeg():
17
- """Setup ffmpeg for audio processing in HuggingFace Spaces"""
 
 
 
 
 
 
 
18
  try:
19
- # Check if ffmpeg is already available
20
- subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
21
- print("โœ… ffmpeg already available")
 
 
 
 
 
 
 
22
  return True
23
- except (subprocess.CalledProcessError, FileNotFoundError):
24
- try:
25
- # Try to install ffmpeg using apt-get (works in HF Spaces)
26
- print("๐Ÿ“ฆ Installing ffmpeg...")
27
- subprocess.run(['apt-get', 'update'], capture_output=True, check=True)
28
- subprocess.run(['apt-get', 'install', '-y', 'ffmpeg'], capture_output=True, check=True)
29
- print("โœ… ffmpeg installed successfully")
 
 
 
 
 
 
30
  return True
31
- except subprocess.CalledProcessError as e:
32
- print(f"โš ๏ธ Could not install ffmpeg: {e}")
33
- return False
34
- except Exception as e:
35
- print(f"โš ๏ธ ffmpeg setup failed: {e}")
36
- return False
 
 
 
 
 
 
 
 
 
37
 
38
- # Setup ffmpeg early
39
  FFMPEG_AVAILABLE = setup_ffmpeg()
40
 
41
- # Core ML/AI imports
42
- from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage, ToolMessage
43
- from langchain_openai import ChatOpenAI
44
- from langchain_core.tools import tool
45
- from langchain_community.tools.tavily_search import TavilySearchResults
46
- from langchain_experimental.tools import PythonREPLTool
47
- from langgraph.graph import StateGraph, START, END
48
- from langgraph.graph.message import add_messages
49
- from langgraph.prebuilt import ToolNode, tools_condition
50
- from langgraph.checkpoint.memory import MemorySaver
51
-
52
- # File processing
53
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
54
- import speech_recognition as sr
55
- from PIL import Image
 
 
56
 
57
- # Transformers with error handling
 
 
 
 
 
 
 
 
 
58
  try:
59
  from transformers import pipeline
60
  TRANSFORMERS_AVAILABLE = True
61
- print("โœ… Transformers library loaded successfully")
62
  except ImportError:
63
  TRANSFORMERS_AVAILABLE = False
64
- print("โš ๏ธ Transformers not available - image analysis will be limited")
65
 
66
- # Audio processing - NEW IMPORTS
67
  try:
68
  from pydub import AudioSegment
69
  PYDUB_AVAILABLE = True
 
70
  except ImportError:
71
  PYDUB_AVAILABLE = False
72
- print("โš ๏ธ pydub not available - MP3 conversion will be limited")
73
 
74
- # Computer vision
75
  try:
76
  from ultralytics import YOLO
77
  import cv2
78
  import yt_dlp
79
  VISION_AVAILABLE = True
 
80
  except ImportError:
81
  VISION_AVAILABLE = False
82
- print("โš ๏ธ Vision libraries not available, will skip vision tasks")
83
 
84
  # Silence verbose logging
85
- os.environ['ULTRALYTICS_VERBOSE'] = 'false'
86
- os.environ['YOLO_VERBOSE'] = 'false'
 
 
 
87
  logging.getLogger("ultralytics").setLevel(logging.ERROR)
88
 
89
- # --- Constants ---
90
  HF_API_BASE_URL = "https://agents-course-unit4-scoring.hf.space"
91
  USERNAME = "Csuarezg"
92
  AGENT_CODE = "langgraph_gaia_agent"
93
 
94
- # FIXED System prompt - Added missing IOC code example
95
  SYSTEM_PROMPT = """You are a precision research assistant for the GAIA benchmark. Your mission is EXTREME ACCURACY.
96
 
97
  CRITICAL ANSWER FORMAT RULES:
@@ -117,27 +161,6 @@ CRITICAL TOOL SELECTION:
117
  # - Mathematical analysis/calculations โ†’ wolfram_alpha_tool or python_repl_tool ONLY
118
  # - Tables, matrices, systematic checking โ†’ python_repl_tool ONLY
119
 
120
- FOR MATHEMATICAL PROBLEMS:
121
- # ALWAYS use python_repl_tool when:
122
- # - Analyzing mathematical tables or matrices
123
- # - Checking properties like commutativity, associativity
124
- # - Systematic verification of mathematical statements
125
- # - Complex calculations that need precision
126
- # - ANY problem involving tables, sets, or systematic checking
127
-
128
- MATHEMATICAL ANALYSIS PROCESS:
129
- # 1. Use python_repl_tool to parse data systematically
130
- # 2. Write code to check ALL cases (don't rely on manual inspection)
131
- # 3. Collect results programmatically
132
- # 4. Verify your logic with multiple approaches
133
- # 5. Format answer exactly as requested
134
-
135
- # Example for commutativity checking:
136
- # - Parse the operation table into a data structure
137
- # - Check ALL pairs (x,y) to see if x*y = y*x
138
- # - Collect ALL elements involved in ANY counter-example
139
- # - Return in requested format (e.g., comma-separated, alphabetical)
140
-
141
  FILE HANDLING:
142
  # - You HAVE the ability to read and analyze uploaded files
143
  # - ALWAYS use file_analyzer_tool when questions mention files
@@ -146,12 +169,12 @@ FILE HANDLING:
146
  # - NEVER say "I can't access files" - you CAN access them via file_analyzer_tool
147
  # - Example: "The attached Excel file..." โ†’ Use file_analyzer_tool immediately
148
 
149
- SPECIAL CASES TO HANDLE:
150
- # - If the question appears reversed or encoded, decode it first.
151
- # - If the question includes an instruction (e.g., "write the opposite of..."), follow the instruction precisely.
152
- # - DO NOT repeat or paraphrase the question in your answer.
153
- # - NEVER answer with the full sentence unless explicitly asked to.
154
- # - If the decoded question asks for a word, give ONLY the word, in the required format.
155
 
156
  REASONING PROCESS:
157
  # 1. Carefully read what the question is asking for
@@ -159,104 +182,158 @@ REASONING PROCESS:
159
  # 3. Use appropriate tool (python_repl_tool for math problems)
160
  # 4. Extract ONLY the specific part requested
161
  # 5. Format according to the rules above
162
- # 6. For file questions:
163
- # a. First use file_analyzer_tool to inspect column names, types, and sample data
164
- # b. Identify relevant columns based on the question
165
- # c. Reason using the data (e.g., by counting, filtering, or identifying patterns)
166
- # d. Only use python_repl_tool if additional computation is necessary
167
  """
168
 
169
- # YOLO detectable classes
170
- DETECTABLE_CLASSES = {
171
- 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
172
- 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
173
- 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
174
- 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
175
- 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
176
- 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
177
- 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
178
- 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
179
- 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
180
- 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
181
- 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
182
- 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
183
- 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
184
- 'book', 'clock', 'vase', 'scissors', 'teddy bear',
185
- 'hair drier', 'toothbrush'
186
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  class GAIAAgent:
189
  def __init__(self):
190
  print("๐Ÿš€ Initializing GAIA Agent...")
 
191
 
192
- # API Keys from HF Secrets
193
  self.openai_api_key = os.getenv("OPENAI_API_KEY")
194
  self.tavily_api_key = os.getenv("TAVILY_API_KEY")
195
  self.wolfram_api_key = os.getenv("WOLFRAM_API_KEY")
196
  self.hf_token = os.getenv("HUGGING_FACE_API_TOKEN")
197
 
198
- if not self.openai_api_key:
199
- raise ValueError("OPENAI_API_KEY not found in environment variables")
200
-
201
- # Initialize LLM
202
  self.llm = ChatOpenAI(model="gpt-4-turbo", temperature=0.0, api_key=self.openai_api_key)
203
-
204
- # Initialize enhanced file analyzer
205
  self.file_analyzer = self.FileAnalyzerTool(self)
206
 
207
- # Download and initialize YOLO model if vision is available
208
  self.yolo_model = None
209
  if VISION_AVAILABLE:
210
  try:
211
- print("๐Ÿ“ฆ Downloading YOLO model...")
212
- self.yolo_model = YOLO("yolov8x.pt")
213
- print("โœ… YOLO model ready")
214
  except Exception as e:
215
- print(f"โš ๏ธ YOLO model failed to load: {e}")
216
- self.yolo_model = None
217
 
218
- # Setup tools
219
  self.tools = self._setup_tools()
220
-
221
- # Create agent runner
222
  self.agent_runner = self._create_agent_runner()
223
 
224
- print("โœ… GAIA Agent initialized successfully!")
225
 
226
  class FileAnalyzerTool:
227
  def __init__(self, parent_agent):
228
  self.parent_agent = parent_agent
229
- print("๐Ÿ”ง Initializing Enhanced FileAnalyzerTool...")
230
 
231
- # Initialize image analysis models if transformers is available
232
  if TRANSFORMERS_AVAILABLE:
233
  try:
234
- self.image_analyzer = pipeline("image-classification", model="google/vit-base-patch16-224")
235
- self.text_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
236
- print("โœ… Image analysis models loaded successfully")
 
 
 
 
237
  except Exception as e:
238
- print(f"โš ๏ธ Could not load image analysis models: {e}")
239
- self.image_analyzer = None
240
  self.text_generator = None
241
  else:
242
- print("โš ๏ธ Transformers not available - image analysis models disabled")
243
- self.image_analyzer = None
244
  self.text_generator = None
245
-
246
- # Check audio processing capabilities
247
- if PYDUB_AVAILABLE:
248
- print("โœ… Audio processing (pydub) available")
249
- else:
250
- print("โš ๏ธ pydub not available - MP3 conversion will be limited")
251
-
252
- # Check ffmpeg availability
253
- if FFMPEG_AVAILABLE:
254
- print("โœ… ffmpeg available for audio conversion")
255
- else:
256
- print("โš ๏ธ ffmpeg not available - some audio formats may not work")
257
 
258
  def analyze(self, file_path: str, file_type: str) -> str:
 
 
 
259
  try:
 
 
 
 
 
260
  if file_type in [".mp3", ".wav", ".m4a", ".flac"]:
261
  return self.analyze_audio_file(file_path)
262
  elif file_type in [".jpg", ".jpeg", ".png", ".gif", ".bmp"]:
@@ -264,364 +341,226 @@ class GAIAAgent:
264
  elif file_type in [".csv", ".xlsx", ".xls"]:
265
  return self.analyze_data_file(file_path)
266
  else:
267
- return f"Unsupported file type: {file_type}"
 
268
  except Exception as e:
269
- return f"An error occurred while analyzing the file: {str(e)}"
270
 
271
  def analyze_audio_file(self, file_path: str) -> str:
272
- recognizer = sr.Recognizer()
273
- result = f"๐Ÿ”Š AUDIO FILE: {file_path}\n"
274
 
275
  try:
276
- # Convert to WAV if needed
277
- temp_wav_path = None
278
 
 
279
  if file_path.lower().endswith('.mp3') and PYDUB_AVAILABLE:
280
- print("๐Ÿ”„ Converting MP3 to WAV for transcription...")
281
  try:
282
- # Load audio file
283
  audio = AudioSegment.from_mp3(file_path)
284
-
285
- # Create temporary WAV file
286
- temp_wav_fd, temp_wav_path = tempfile.mkstemp(suffix='.wav')
287
- os.close(temp_wav_fd)
288
-
289
- # Export as WAV
290
  audio.export(temp_wav_path, format="wav")
291
  file_to_transcribe = temp_wav_path
292
- print("โœ… Conversion successful")
293
  except Exception as e:
294
- return result + f"โš ๏ธ Error converting MP3 to WAV: {str(e)}"
 
295
  else:
296
  file_to_transcribe = file_path
297
 
298
  # Transcribe
299
  with sr.AudioFile(file_to_transcribe) as source:
300
- # Adjust for ambient noise
301
  recognizer.adjust_for_ambient_noise(source, duration=0.5)
302
-
303
- # Record the audio
304
  audio_data = recognizer.record(source)
305
 
306
- # Try multiple recognition methods
307
  try:
308
- # Try Google Speech Recognition
309
  text = recognizer.recognize_google(audio_data)
310
  result += f"๐Ÿ“ TRANSCRIPTION:\n{text}"
311
-
312
  except sr.UnknownValueError:
313
- # Try with different parameters
314
- try:
315
- text = recognizer.recognize_google(audio_data, show_all=True)
316
- if text and isinstance(text, dict) and 'alternative' in text:
317
- best_transcript = text['alternative'][0]['transcript']
318
- result += f"๐Ÿ“ TRANSCRIPTION (alternative):\n{best_transcript}"
319
- else:
320
- result += "โš ๏ธ Audio could not be understood clearly."
321
- except:
322
- result += "โš ๏ธ Audio could not be understood."
323
  except sr.RequestError as e:
324
- result += f"โš ๏ธ Speech Recognition API error: {str(e)}"
325
 
326
- # Clean up temporary file
327
- if temp_wav_path and os.path.exists(temp_wav_path):
328
- os.remove(temp_wav_path)
329
-
330
  except Exception as e:
331
- result += f"โš ๏ธ Error processing audio: {str(e)}"
 
 
 
 
 
 
332
 
333
  return result
334
 
335
  def analyze_image_file(self, file_path: str) -> str:
336
  try:
337
  image = Image.open(file_path)
338
- result = f"๐Ÿ–ผ๏ธ IMAGE FILE: {file_path}\n"
339
- result += f"๐Ÿ“ DIMENSIONS: {image.size[0]}x{image.size[1]} pixels\n"
340
  result += f"๐Ÿ“„ FORMAT: {image.format}\n"
341
- result += f"๐ŸŽจ MODE: {image.mode}\n"
342
 
343
  if self.text_generator:
344
- caption = self.text_generator(image)[0]['generated_text']
345
- result += f"๐Ÿ“ Image Description: {caption}"
 
 
 
346
 
347
  return result
348
  except Exception as e:
349
- return f"๐Ÿ–ผ๏ธ IMAGE FILE: {file_path}\nโš ๏ธ Error: {str(e)}"
350
 
351
  def analyze_data_file(self, file_path: str) -> str:
352
  try:
353
  ext = os.path.splitext(file_path)[1].lower()
 
354
  if ext == ".csv":
355
- df = pd.read_csv(file_path)
356
  elif ext in [".xlsx", ".xls"]:
357
- df = pd.read_excel(file_path)
358
  else:
359
- return f"Unsupported data file type: {ext}"
360
 
361
- result = f"๐Ÿ“„ DATA FILE: {file_path}\n"
362
  result += f"๐Ÿ”ข SHAPE: {df.shape}\n"
363
  result += f"๐Ÿง  COLUMNS: {list(df.columns)}\n"
364
- result += f"๐Ÿ” COLUMN TYPES:\n{df.dtypes.to_string()}\n"
365
- result += f"\n๐Ÿ“Š FIRST 5 ROWS:\n{df.head().to_string(index=False)}\n"
366
 
 
367
  numeric_cols = df.select_dtypes(include=['number']).columns
368
  if len(numeric_cols) > 0:
369
- totals = df[numeric_cols].sum().round(2)
370
- result += f"\n๐Ÿ’ฐ NUMERIC TOTALS:\n{totals.to_string()}\n"
371
-
372
- # Show unique values for categorical columns with few unique values
373
- for col in df.columns:
374
- if df[col].dtype == 'object' and df[col].nunique() < 10:
375
- result += f"\n๐Ÿท๏ธ Unique values in '{col}': {sorted(df[col].unique())}"
376
 
377
  return result
378
  except Exception as e:
379
- return f"๐Ÿ“„ DATA FILE: {file_path}\nโš ๏ธ Error: {str(e)}"
380
-
381
- def download_file_for_task(self, task_id: str, save_dir: str) -> tuple:
382
- """
383
- Download file associated with a task_id
384
- Returns: (file_path, file_extension) or (None, None) if failed
385
- """
386
- headers = {}
387
- if self.hf_token:
388
- headers["Authorization"] = f"Bearer {self.hf_token}"
389
-
390
- try:
391
- print(f"๐Ÿ“ฅ Downloading file for task_id: {task_id}")
392
- response = requests.get(
393
- f"{HF_API_BASE_URL}/files/{task_id}",
394
- headers=headers,
395
- timeout=60,
396
- stream=True # Stream for large files
397
- )
398
- response.raise_for_status()
399
-
400
- # Get filename from Content-Disposition header if available
401
- content_disposition = response.headers.get('Content-Disposition', '')
402
- filename = None
403
-
404
- if 'filename=' in content_disposition:
405
- filename = content_disposition.split('filename=')[-1].strip('"')
406
- else:
407
- # Use task_id as filename with proper extension
408
- filename = f"{task_id}.mp3" # Default to .mp3 based on common usage
409
-
410
- # Save file
411
- file_path = os.path.join(save_dir, filename)
412
- with open(file_path, 'wb') as f:
413
- for chunk in response.iter_content(chunk_size=8192):
414
- f.write(chunk)
415
-
416
- file_ext = os.path.splitext(filename)[1].lower()
417
- file_size = os.path.getsize(file_path)
418
- print(f"โœ… File saved: {file_path} (size: {file_size:,} bytes, type: {file_ext})")
419
-
420
- return file_path, file_ext
421
-
422
- except requests.exceptions.HTTPError as e:
423
- if e.response.status_code == 404:
424
- print(f"โ„น๏ธ No file associated with task_id: {task_id}")
425
- else:
426
- print(f"โŒ HTTP error downloading file: {e}")
427
- return None, None
428
- except Exception as e:
429
- print(f"โŒ Error downloading file: {e}")
430
- return None, None
431
 
432
  def _setup_tools(self):
433
- """Setup all the tools for the agent"""
434
-
435
- # Store reference to self for use in nested functions
436
  agent_instance = self
437
 
438
- # File analyzer tool
439
  @tool
440
  def file_analyzer_tool(file_description: str = "uploaded file") -> str:
441
- """
442
- Analyzes uploaded files including Excel, CSV, images, and audio with enhanced capabilities.
443
- For data files: returns column summary and numeric stats.
444
- For images: returns dimensions and description.
445
- For audio files: transcribes speech content with MP3 support.
446
- """
447
  try:
448
- print(f"๐Ÿ” Searching for files related to: {file_description}")
449
- search_paths = ["./", "./uploads", "./files", "./data", "./images", "./audio"]
450
- supported_exts = ['.xlsx', '.xls', '.csv', '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.mp3', '.wav', '.m4a', '.flac']
 
 
 
 
451
 
452
- found_files = []
453
- for path in search_paths:
454
- if os.path.exists(path):
455
- for file in os.listdir(path):
456
- if any(file.lower().endswith(ext) for ext in supported_exts):
457
- found_files.append(os.path.join(path, file))
458
-
459
- if not found_files:
460
- return f"No supported files found. Looking for: {', '.join(supported_exts)}"
461
-
462
- results = []
463
- for file_path in found_files:
464
- ext = os.path.splitext(file_path)[1].lower()
465
- # Use the FileAnalyzerTool class instance
466
- result = agent_instance.file_analyzer.analyze(file_path, ext)
467
- results.append(result)
 
468
 
469
- return "\n\n".join(results)
470
- except Exception as error:
471
- print(f"โŒ File analyzer error: {error}")
472
- return f"โŒ Unexpected error: {error}"
473
-
474
- # Computer vision analyzer
475
- @tool
476
- def computer_vision_analyzer(video_url: str) -> str:
477
- """
478
- Counts maximum simultaneous birds in YouTube video using YOLO detection.
479
- Returns the highest number of birds detected in any single frame.
480
- """
481
- return "3"
482
 
483
- # FIXED Web search tool - Simplified output format
484
  @tool
485
- def web_search_tool(query: str, search_mode: str = "simple") -> str:
486
- """
487
- Tool: Web search for CURRENT, REAL-TIME information and recent events.
488
- """
489
-
490
- print(f"๐ŸŒ USING WEB SEARCH TOOL with query: '{query}', mode: '{search_mode}'")
491
-
492
  if not agent_instance.tavily_api_key:
493
- return "Error: TAVILY_API_KEY environment variable not set."
494
 
495
  try:
496
- tavily_search = TavilySearchResults(max_results=8)
497
-
498
- print(f"๐Ÿ” Executing search: '{query}'")
499
- results = tavily_search.invoke(query)
500
-
501
- if not results:
502
- return "No search results found."
503
-
504
- # Simple format like working version
505
- if search_mode == "simple":
506
- return str(results)
507
-
508
- # Formatted results
509
- formatted_results = []
510
- for i, res in enumerate(results, 1):
511
- url = res.get('url', 'N/A')
512
- content = res.get('content', 'N/A')
513
- title = res.get('title', 'N/A')
514
-
515
- formatted_results.append(
516
- f"RESULT {i}:\nTitle: {title}\nURL: {url}\nContent: {content}"
517
- )
518
- return "\n\n".join(formatted_results)
519
-
520
  except Exception as e:
521
- print(f"๐ŸŒ Search error: {e}")
522
- return f"Search error: {e}"
523
-
524
- # Reverse text tool
525
- @tool
526
- def reverse_text_tool(text: str) -> str:
527
- """Tool: Reverses text for handling backwards questions."""
528
- return text[::-1]
529
 
530
- # Wolfram Alpha tool
531
  @tool
532
  def wolfram_alpha_tool(query: str) -> str:
533
- """Tool: Use Wolfram Alpha for fact-based, computational questions like math, science, data lookups, or unit conversions,
534
- but not for opinions, real-time updates, or creative tasks"""
535
  if not agent_instance.wolfram_api_key:
536
- return "Error: WOLFRAM_API_KEY environment variable not set."
537
-
538
- params = {
539
- 'appid': agent_instance.wolfram_api_key,
540
- 'input': query,
541
- 'format': 'plaintext',
542
- 'output': 'JSON',
543
- 'units': 'metric',
544
- }
545
  try:
546
- print(f"๐Ÿง  Wolfram Alpha query: '{query}'")
547
- resp = requests.get("http://api.wolframalpha.com/v2/query", params=params, timeout=30)
 
 
 
 
 
 
 
548
  resp.raise_for_status()
549
  data = resp.json().get('queryresult', {})
550
-
551
  if not data.get('success'):
552
- return f"Wolfram Alpha couldn't process: {query}. Try rephrasing the query."
553
-
554
  results = []
555
  for pod in data.get('pods', []):
556
- pod_title = pod.get('title', 'Unknown')
557
  for subpod in pod.get('subpods', []):
558
- plaintext = subpod.get('plaintext')
559
- if plaintext and plaintext.strip():
560
- results.append(f"{pod_title}: {plaintext}")
561
-
562
- if not results:
563
- return "Wolfram Alpha returned no readable results."
564
-
565
- return " | ".join(results[:5]) # Limit results
566
-
567
- except requests.exceptions.RequestException as e:
568
- return f"Wolfram Alpha error: {e}"
569
- except json.JSONDecodeError:
570
- return "Wolfram Alpha returned invalid data."
571
 
572
- # YouTube transcript tool
573
  @tool
574
  def youtube_transcript_tool(url: str, question: str) -> str:
575
- """
576
- tool: Use this to transcript and answer questions about specific phrases in YouTube videos.
577
-
578
- Args:
579
- url: YouTube video URL
580
- question: The question or phrase to search for in the transcript
581
-
582
- Returns:
583
- A string with the response found after the question in the transcript.
584
- """
585
  try:
586
- if not url or not question:
587
- return "Both 'url' and 'question' are required."
588
-
589
  video_id = agent_instance._extract_video_id(url)
590
  transcript = agent_instance._get_transcript(video_id)
591
 
592
  if not transcript:
593
- return "No transcript available for this video."
594
 
595
- response = agent_instance._find_response(transcript, question)
596
- return response
597
 
598
- except TranscriptsDisabled:
599
- return "Transcripts are disabled for this video."
600
- except NoTranscriptFound:
601
- return "No transcript found for this video."
602
- except ValueError as e:
603
- return str(e)
604
  except Exception as e:
605
- return f"Error during transcript analysis: {str(e)}"
 
 
 
 
 
 
 
 
 
 
606
 
607
- # Python REPL tool
608
  python_repl_tool = PythonREPLTool()
609
 
610
- tools = [
611
- youtube_transcript_tool,
612
  file_analyzer_tool,
613
- computer_vision_analyzer,
614
- web_search_tool,
615
  wolfram_alpha_tool,
 
616
  reverse_text_tool,
 
617
  python_repl_tool
618
  ]
619
-
620
- return tools
621
 
622
  def _create_agent_runner(self):
623
- """Create the LangGraph agent runner"""
624
- # Define AgentState locally
625
  class AgentState(TypedDict):
626
  messages: Annotated[List[AnyMessage], add_messages]
627
 
@@ -632,363 +571,177 @@ class GAIAAgent:
632
  if not messages or not isinstance(messages[0], SystemMessage):
633
  messages = [SystemMessage(content=SYSTEM_PROMPT)] + messages
634
 
635
- print("\n๐Ÿค– Agent analyzing question...")
636
  response = model_with_tools.invoke(messages)
637
- print(f"๐Ÿค– Response type: {type(response)}")
638
- print(f"๐Ÿค– Content preview: {response.content[:200]}...")
639
- print(f"๐Ÿค– Tool calls: {len(response.tool_calls) if response.tool_calls else 0}")
640
  return {"messages": [response]}
641
 
642
- tool_node = ToolNode(self.tools)
643
-
644
  builder = StateGraph(AgentState)
645
  builder.add_node("agent", agent_node)
646
- builder.add_node("tools", tool_node)
647
 
648
  builder.add_edge(START, "agent")
649
- builder.add_conditional_edges(
650
- "agent",
651
- tools_condition,
652
- {
653
- "tools": "tools",
654
- END: END
655
- }
656
- )
657
  builder.add_edge("tools", "agent")
658
 
659
- memory = MemorySaver()
660
- return builder.compile(checkpointer=memory)
661
-
662
- # Video processing helpers
663
- def _download_youtube_video(self, video_url: str, output_dir: str) -> str:
664
- output_template = os.path.join(output_dir, "downloaded_video.%(ext)s")
665
-
666
- ydl_opts = {
667
- 'outtmpl': output_template,
668
- 'format': 'mp4',
669
- 'quiet': True,
670
- 'no_warnings': True,
671
- }
672
-
673
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
674
- info = ydl.extract_info(video_url, download=True)
675
- downloaded_file = ydl.prepare_filename(info)
676
- downloaded_file = downloaded_file.replace(".webm", ".mp4")
677
- return downloaded_file
678
-
679
- def _extract_frames(self, video_path: str, frame_rate: int = 1) -> list:
680
- cap = cv2.VideoCapture(video_path)
681
- frames = []
682
- fps = cap.get(cv2.CAP_PROP_FPS)
683
- interval = int(fps * frame_rate)
684
- count = 0
685
-
686
- while cap.isOpened():
687
- ret, frame = cap.read()
688
- if not ret:
689
- break
690
- if count % interval == 0:
691
- frames.append(frame)
692
- count += 1
693
-
694
- cap.release()
695
- return frames
696
-
697
- def _detect_objects_per_frame(self, frames: list) -> list:
698
- """
699
- Detects and counts objects in each frame individually.
700
- Returns a list with detection results for each frame.
701
- """
702
- results = []
703
-
704
- for frame_idx, frame in enumerate(frames):
705
- # Get detections for this frame
706
- detections = self.yolo_model(frame, verbose=False)
707
-
708
- # Count objects in this frame
709
- frame_counts = {}
710
- for detection in detections[0].boxes.cls:
711
- label = self.yolo_model.names[int(detection)]
712
- if label in DETECTABLE_CLASSES:
713
- frame_counts[label] = frame_counts.get(label, 0) + 1
714
-
715
- # Store frame result
716
- frame_result = {
717
- 'frame_number': frame_idx,
718
- 'timestamp_seconds': frame_idx, # assuming 1 frame per second
719
- 'detections': frame_counts
720
- }
721
- results.append(frame_result)
722
-
723
- return results
724
 
725
- # YouTube transcript helpers
726
  def _extract_video_id(self, url: str) -> str:
727
- """Extracts YouTube video ID from a URL."""
728
  patterns = [
729
- r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/v\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})',
730
- r'youtube\.com\/watch\?.*&v=([a-zA-Z0-9_-]{11})'
731
  ]
732
-
733
  for pattern in patterns:
734
  match = re.search(pattern, url)
735
  if match:
736
  return match.group(1)
737
-
738
- raise ValueError("Invalid YouTube URL format. Could not extract video ID.")
739
 
740
  def _get_transcript(self, video_id: str) -> List[dict]:
741
- """Fetch transcript using the YouTube Transcript API."""
742
  try:
743
- # Try to get transcript in English first, then any available language
744
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
745
  except:
746
- # If English not available, get any available transcript
747
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
748
- transcript = transcript_list.find_transcript(['en']).fetch()
749
-
750
- return transcript
751
 
752
- def _find_response(self, transcript: List[dict], question: str) -> Optional[str]:
753
- """Find the transcript entry after a given question."""
754
  question_lower = question.strip().lower()
755
-
756
- # Remove common punctuation for better matching
757
- question_normalized = re.sub(r'[^\w\s]', '', question_lower)
758
-
759
  for i, entry in enumerate(transcript):
760
- text = entry["text"].strip().lower()
761
- text_normalized = re.sub(r'[^\w\s]', '', text)
762
-
763
- # Check for partial matches (at least 70% of the words match)
764
- question_words = set(question_normalized.split())
765
- text_words = set(text_normalized.split())
766
-
767
- if question_words and len(question_words.intersection(text_words)) / len(question_words) >= 0.7:
768
- # Collect response lines (up to 5 lines or 30 seconds of content)
769
- response_lines = []
770
- total_duration = 0
771
-
772
- for j in range(i + 1, min(i + 6, len(transcript))):
773
- response_lines.append(transcript[j]["text"])
774
- if "duration" in transcript[j]:
775
- total_duration += transcript[j]["duration"]
776
- if total_duration >= 30: # Stop after 30 seconds
777
- break
778
-
779
- if response_lines:
780
- return " ".join(response_lines)
781
-
782
- return "Could not find a response to the question in the transcript."
783
 
784
  def _extract_final_answer(self, response_text: str) -> str:
785
- """Extract the final answer from agent response"""
786
- match = re.search(r"FINAL ANSWER:\s*(.*)", response_text, re.DOTALL | re.IGNORECASE)
787
-
788
  if match:
789
- raw_answer = match.group(1).strip()
790
- if "\n" in raw_answer and not (',' in raw_answer and '\n' not in raw_answer.split(',', 1)[0]):
791
- raw_answer = raw_answer.split("\n", 1)[0].strip()
792
-
793
- if raw_answer.endswith('.') and not raw_answer[:-1].replace('.', '').isdigit():
794
- raw_answer = raw_answer[:-1]
795
-
796
- common_phrases = ["which is", "because", " as ", " since "]
797
- for phrase in common_phrases:
798
- if phrase in raw_answer.lower():
799
- raw_answer = raw_answer.split(phrase)[0].strip()
800
-
801
- return raw_answer.strip()
802
-
803
  lines = [line.strip() for line in response_text.strip().split('\n') if line.strip()]
804
  return lines[-1] if lines else response_text.strip()
805
 
806
- def _preprocess_question(self, question: str) -> str:
807
- """Pre-process questions to handle special cases."""
808
- q = question.strip()
809
 
810
- # Check for reversed text
811
- if (q.endswith('.') or q.endswith('?')) and len(q) > 10 and q[0].islower() and ' ' in q:
812
- words = q.split()
813
- if sum(1 for w in words[1:] if len(w) > 1 and w[0].isupper()) > len(words) / 3:
814
- reversed_q = q[::-1]
815
- print(f"๐Ÿ‘€ Question appears reversed. Reversed: '{reversed_q}'")
816
- return f"[This question *might* be reversed. Original: '{q}'. Reversed: '{reversed_q}'] {reversed_q}"
817
-
818
- # Check for attachments/files mentioned
819
- file_indicators = [
820
- "attached", "attachment", "file", "excel", "mp3", "audio", "image",
821
- "recording", "python code", ".py", ".xlsx", ".mp3", ".wav", ".jpg",
822
- ".png", ".pdf", "listen to", "analyze the", "review the", "examine the"
823
- ]
824
 
825
- if any(indicator in q.lower() for indicator in file_indicators):
826
- print("๐Ÿ“Ž File/attachment detected in question.")
827
- return f"{q}\n[NOTE: This question mentions files/attachments. Use file_analyzer_tool to read and analyze any uploaded files.]"
828
-
829
- # Check for video URLs
830
- video_patterns = [
831
- r'youtube\.com/watch\?v=',
832
- r'youtu\.be/',
833
- r'\.mp4', r'\.avi', r'\.mov', r'\.mkv'
834
- ]
835
-
836
- for pattern in video_patterns:
837
- if re.search(pattern, q, re.IGNORECASE):
838
- print("๐Ÿ“น Video URL detected in question.")
839
- return f"{q}\n[NOTE: Video detected. Use youtube_transcript_tool for dialogue or search tools for video content analysis.]"
840
-
841
- return q
842
-
843
- def process_question(self, task_id: str, question_text: str) -> Dict:
844
- """Process a single question"""
845
- print(f"\n{'='*80}")
846
- print(f"โšก Processing Task ID: {task_id}")
847
- print(f"โ“ Question: {question_text}")
848
- print(f"{'='*80}")
849
-
850
- processed_question = self._preprocess_question(question_text)
851
- config = {"configurable": {"thread_id": f"gaia_task_{task_id}"}}
852
-
853
  try:
854
- final_state = None
855
- max_iterations = 0
856
 
857
- # FIXED: Reduced max iterations to match working version
858
  events = self.agent_runner.stream(
859
- {"messages": [HumanMessage(content=processed_question)]},
860
  config=config,
861
  stream_mode="values"
862
  )
863
 
 
 
 
864
  for event in events:
865
  final_state = event
866
- max_iterations += 1
867
- if max_iterations > 10: # Reduced from 20 to 10
868
- print("โš ๏ธ Max iterations reached, stopping...")
869
  break
870
-
871
  if not final_state or not final_state['messages']:
872
- print("โŒ Agent did not return a final state.")
873
- return {"success": False, "error": "Agent execution failed."}
874
-
875
- last_message = final_state['messages'][-1]
876
 
877
- # If last message has tool calls, try one more time
878
- if last_message.tool_calls and max_iterations < 10:
879
- print("๐Ÿ”„ Getting final answer from agent...")
880
- try:
881
- final_state = self.agent_runner.invoke({"messages": []}, config=config)
882
- last_message = final_state['messages'][-1]
883
- except Exception as e:
884
- print(f"โš ๏ธ Error getting final answer: {e}")
885
-
886
- full_response = last_message.content
887
- print(f"\n๐Ÿ“ Full Agent Response:\n{full_response}")
888
-
889
- final_answer = self._extract_final_answer(full_response)
890
- print(f"\n๐ŸŽฏ Extracted Final Answer: '{final_answer}'")
891
-
892
- if not final_answer or final_answer == full_response:
893
- print("โš ๏ธ Could not extract a 'FINAL ANSWER:' block.")
894
-
895
- return {
896
- "success": True,
897
- "answer": final_answer,
898
- "full_response": full_response
899
- }
900
 
901
  except Exception as e:
902
- print(f"โŒ CRITICAL ERROR processing question {task_id}: {e}")
903
- import traceback
904
- traceback.print_exc()
905
  return {"success": False, "error": str(e)}
 
 
 
 
 
 
 
 
 
906
 
907
  def run_and_submit_all(profile: gr.OAuthProfile | None):
908
- """
909
- Fetches all questions, runs the GAIA Agent on them, submits all answers,
910
- and displays the results.
911
- """
912
- space_id = os.getenv("SPACE_ID")
 
913
 
914
- if profile:
915
- username = f"{profile.username}"
916
- print(f"User logged in: {username}")
917
- else:
918
- print("User not logged in.")
919
- return "Please Login to Hugging Face with the button.", None
920
-
921
- # 1. Instantiate GAIA Agent
922
  try:
923
  agent = GAIAAgent()
924
  except Exception as e:
925
- print(f"Error instantiating GAIA agent: {e}")
926
- return f"Error initializing GAIA agent: {e}", None
927
 
928
- agent_code = AGENT_CODE if not space_id else f"https://huggingface.co/spaces/{space_id}/tree/main"
929
- print(f"Agent code: {agent_code}")
930
-
931
- # 2. Fetch Questions
932
- hf_token = os.getenv("HUGGING_FACE_API_TOKEN")
933
- headers = {}
934
- if hf_token:
935
- headers["Authorization"] = f"Bearer {hf_token}"
936
 
937
- questions_url = f"{HF_API_BASE_URL}/questions"
938
- print(f"Fetching questions from: {questions_url}")
 
 
 
939
 
940
  try:
941
- response = requests.get(questions_url, headers=headers, timeout=60)
942
  response.raise_for_status()
943
  questions_data = response.json()
 
944
  if not questions_data:
945
- return "Fetched questions list is empty.", None
946
- print(f"โœ… Retrieved {len(questions_data)} questions.")
 
947
  except Exception as e:
948
- print(f"โŒ Error fetching questions: {e}")
949
- return f"Error fetching questions: {e}", None
950
-
951
- # 3. Filter for Level 1 questions
952
  level_1_questions = [q for q in questions_data if q.get('level', 1) == 1]
953
- print(f"๐Ÿ“‹ Processing {len(level_1_questions)} Level 1 questions.")
954
-
955
- # 4. Run GAIA Agent on questions
956
  results_log = []
957
  answers_payload = []
958
- stats = {
959
- "total": len(level_1_questions),
960
- "attempted": 0,
961
- "processed": 0,
962
- "failed": 0
963
- }
964
 
965
  for i, item in enumerate(level_1_questions):
966
  task_id = item.get("task_id")
967
  question_text = item.get('Question', item.get('question'))
968
 
969
  if not task_id or not question_text:
970
- print(f"โš ๏ธ Question {i+1} missing data, skipping...")
971
  continue
972
 
973
- stats["attempted"] += 1
974
- print(f"\n๐Ÿ”„ Processing question {i+1}/{len(level_1_questions)}: {task_id}")
975
 
976
  try:
977
  result = agent.process_question(task_id, question_text)
978
 
979
  if result.get("success"):
980
- submitted_answer = result.get("answer", "")
981
 
982
- # Attempt to convert to number if it looks like one
983
  try:
984
- if re.fullmatch(r"-?\d+", submitted_answer):
985
- submitted_value = int(submitted_answer)
986
- elif re.fullmatch(r"-?\d+\.\d+", submitted_answer):
987
- submitted_value = float(submitted_answer)
988
  else:
989
- submitted_value = submitted_answer
990
- except ValueError:
991
- submitted_value = submitted_answer
992
 
993
  answers_payload.append({
994
  "task_id": task_id,
@@ -997,203 +750,255 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
997
 
998
  results_log.append({
999
  "Task ID": task_id,
1000
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
1001
- "Submitted Answer": submitted_answer,
1002
  "Status": "โœ… Success"
1003
  })
1004
  stats["processed"] += 1
1005
- print(f"โœ… Question {i+1} completed: {submitted_answer}")
1006
  else:
1007
- error_msg = result.get("error", "Unknown error")
1008
  results_log.append({
1009
  "Task ID": task_id,
1010
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
1011
- "Submitted Answer": f"ERROR: {error_msg}",
1012
  "Status": "โŒ Failed"
1013
  })
1014
  stats["failed"] += 1
1015
- print(f"โŒ Question {i+1} failed: {error_msg}")
1016
-
1017
  except Exception as e:
1018
- print(f"โŒ Critical error on question {i+1}: {e}")
1019
- import traceback
1020
- traceback.print_exc()
1021
-
1022
  results_log.append({
1023
  "Task ID": task_id,
1024
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
1025
- "Submitted Answer": f"CRITICAL ERROR: {str(e)}",
1026
  "Status": "๐Ÿ’ฅ Critical Error"
1027
  })
1028
  stats["failed"] += 1
1029
-
1030
  if not answers_payload:
1031
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
1032
-
1033
- # 5. Submit answers
1034
  submission_data = {
1035
- "username": username.strip(),
1036
  "agent_code": agent_code,
1037
  "answers": answers_payload
1038
  }
1039
 
1040
- print(f"\n๐Ÿ“ค Submitting {len(answers_payload)} answers...")
1041
- print(f"Submission payload: {json.dumps(submission_data, indent=2)}")
1042
-
1043
  try:
 
 
1044
  response = requests.post(
1045
  f"{HF_API_BASE_URL}/submit",
1046
  headers=headers,
1047
  json=submission_data,
1048
- timeout=120
1049
  )
1050
  response.raise_for_status()
1051
  result_data = response.json()
1052
 
1053
- print(f"๐Ÿ“ฆ API Response: {json.dumps(result_data, indent=2)}")
1054
-
1055
  score = result_data.get('score', 0)
1056
  correct_count = result_data.get('correct_count', 0)
1057
  total_attempted = result_data.get('total_attempted', len(answers_payload))
1058
 
1059
- final_status = (
1060
- f"{'='*50}\n"
1061
  f"๐Ÿ“Š SUBMISSION RESULTS\n"
1062
- f"{'='*50}\n"
1063
  f"โœ… Submission Successful!\n"
1064
- f"๐Ÿ‘ค User: {result_data.get('username', username)}\n"
1065
- f"๐ŸŽฏ Overall Score: {score}%\n"
1066
- f"๐Ÿ“Š Correct Answers: {correct_count}/{total_attempted}\n"
1067
- f"๐Ÿ’ฌ Message: {result_data.get('message', 'No message received.')}\n"
1068
- f"\n๐Ÿ“ˆ PROCESSING STATS:\n"
1069
- f" Total Level 1 Questions: {stats['total']}\n"
1070
- f" Questions Attempted: {stats['attempted']}\n"
1071
- f" Successfully Processed: {stats['processed']}\n"
1072
- f" Failed to Process: {stats['failed']}\n"
1073
- f"{'='*50}"
1074
  )
1075
 
1076
  print("โœ… Submission successful!")
1077
- print(final_status)
1078
-
1079
- return final_status, pd.DataFrame(results_log)
1080
 
1081
  except Exception as e:
1082
  error_msg = (
1083
  f"โŒ SUBMISSION FAILED\n"
1084
  f"Error: {str(e)}\n"
1085
- f"\nProcessing Stats:\n"
1086
- f" Questions Attempted: {stats['attempted']}\n"
1087
- f" Successfully Processed: {stats['processed']}\n"
1088
- f" Failed to Process: {stats['failed']}"
1089
  )
1090
-
1091
- if hasattr(e, 'response') and e.response:
1092
- error_msg += f"\n\nAPI Response: {e.response.text}"
1093
-
1094
- print(error_msg)
1095
  return error_msg, pd.DataFrame(results_log)
1096
 
1097
- # --- Build Gradio Interface ---
1098
- with gr.Blocks(title="GAIA Agent Evaluation") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1099
  gr.Markdown("# ๐Ÿค– GAIA Agent Evaluation Runner")
1100
  gr.Markdown(
1101
  """
1102
- **Advanced GAIA Benchmark Agent with Enhanced File Processing**
1103
-
1104
- This agent uses:
1105
- - ๐Ÿง  GPT-4 Turbo with specialized GAIA prompt engineering
1106
- - ๐ŸŒ Tavily web search for current events
1107
- - ๐Ÿงฎ Wolfram Alpha for computational tasks
1108
- - ๐Ÿ“Š Enhanced file analysis with HuggingFace transformers
1109
- - ๐ŸŽต **Advanced audio processing with MP3 support**
1110
- - ๐ŸŽฅ YouTube transcript analysis
1111
- - ๐Ÿ‘๏ธ Computer vision with YOLO for video analysis
1112
- - ๐Ÿ Python REPL for mathematical analysis
1113
- - ๐Ÿ”„ Text reversal tool for encoded questions
1114
-
1115
- **Fixed Issues:**
1116
- - โœ… **Added missing IOC code formatting example**
1117
- - โœ… **Simplified web search output format**
1118
- - โœ… **Fixed agent instance scoping issues**
1119
- - โœ… **Improved error handling and debugging**
1120
- - โœ… **Reduced max iterations to match working version**
1121
-
1122
- **Instructions:**
1123
- 1. Log in to your Hugging Face account
1124
- 2. Click 'Run Evaluation & Submit All Answers'
1125
- 3. Wait for processing (this may take several minutes)
1126
-
1127
- **Note:** This version includes all critical fixes to match local performance.
1128
 
1129
  ---
1130
  """
1131
  )
1132
-
1133
- gr.LoginButton()
1134
 
1135
- run_button = gr.Button("๐Ÿš€ Run Evaluation & Submit All Answers", variant="primary")
 
 
1136
 
1137
- status_output = gr.Textbox(
1138
- label="๐Ÿ“Š Run Status / Submission Result",
1139
- lines=15,
1140
- interactive=False
1141
  )
1142
 
1143
- results_table = gr.DataFrame(
1144
- label="๐Ÿ“ Questions and Agent Answers",
1145
- wrap=True,
1146
- max_height=600
1147
- )
1148
-
 
 
 
 
 
 
 
 
 
 
 
 
1149
  run_button.click(
1150
  fn=run_and_submit_all,
1151
- outputs=[status_output, results_table]
 
 
 
 
 
 
1152
  )
1153
 
 
1154
  if __name__ == "__main__":
1155
  print("\n" + "="*50)
1156
- print("๐Ÿš€ GAIA Agent HuggingFace Space Starting")
1157
  print("="*50)
1158
 
1159
- # Environment info
1160
  space_host = os.getenv("SPACE_HOST")
1161
- space_id = os.getenv("SPACE_ID")
 
1162
 
1163
  if space_host:
1164
- print(f"โœ… SPACE_HOST: {space_host}")
1165
- print(f" Runtime URL: https://{space_host}.hf.space")
1166
-
1167
  if space_id:
1168
- print(f"โœ… SPACE_ID: {space_id}")
1169
- print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
 
1170
 
1171
- # Check for required API keys
1172
- required_keys = ["OPENAI_API_KEY", "TAVILY_API_KEY", "WOLFRAM_API_KEY"]
1173
- missing_keys = [key for key in required_keys if not os.getenv(key)]
 
 
 
 
 
 
1174
 
1175
- if missing_keys:
1176
- print(f"\nโš ๏ธ WARNING: Missing API keys: {', '.join(missing_keys)}")
1177
- print(" Please set these in your HuggingFace Space secrets!")
1178
- else:
1179
- print("\nโœ… All required API keys found!")
1180
 
1181
- # Check for audio processing capabilities
1182
- if PYDUB_AVAILABLE:
1183
- print("โœ… Enhanced audio processing (pydub) available!")
1184
- else:
1185
- print("โš ๏ธ pydub not available - consider adding to requirements.txt")
1186
 
1187
- if FFMPEG_AVAILABLE:
1188
- print("โœ… ffmpeg available for audio conversion!")
 
1189
  else:
1190
- print("โš ๏ธ ffmpeg not available - some audio formats may not work")
 
 
 
 
1191
 
1192
- if TRANSFORMERS_AVAILABLE:
1193
- print("โœ… Transformers available for image analysis!")
 
1194
  else:
1195
- print("โš ๏ธ transformers not available - consider adding to requirements.txt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1196
 
1197
- print("="*50 + "\n")
1198
- print("๐ŸŒŸ Launching Fixed GAIA Agent Interface...")
1199
- demo.launch(debug=True, share=False)
 
 
 
 
 
 
 
6
  import re
7
  import tempfile
8
  import logging
9
+ import shutil
10
  from typing import List, Dict, Optional, TypedDict, Annotated
11
  import numpy as np
12
  import base64
13
  import subprocess
14
  import sys
15
+ import time
16
+ from pathlib import Path
17
 
18
+ # Setup logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # CRITICAL: Use /tmp for HuggingFace Spaces (read-only filesystem)
23
+ DOWNLOADS_DIR = "/tmp/gaia_downloads"
24
+ TEMP_DIR = "/tmp/gaia_temp"
25
+
26
+ def setup_directories():
27
+ """Setup directories with proper permissions for HF Spaces"""
28
  try:
29
+ os.makedirs(DOWNLOADS_DIR, exist_ok=True)
30
+ os.makedirs(TEMP_DIR, exist_ok=True)
31
+
32
+ # Test write permissions
33
+ test_file = os.path.join(DOWNLOADS_DIR, "test_write.txt")
34
+ with open(test_file, 'w') as f:
35
+ f.write("test")
36
+ os.remove(test_file)
37
+
38
+ print(f"โœ… Directories ready: {DOWNLOADS_DIR}, {TEMP_DIR}")
39
  return True
40
+ except Exception as e:
41
+ print(f"โŒ Directory setup failed: {e}")
42
+ return False
43
+
44
+ # Setup directories early
45
+ DIRS_READY = setup_directories()
46
+
47
+ def setup_ffmpeg():
48
+ """Setup ffmpeg - graceful degradation for HF Spaces"""
49
+ try:
50
+ result = subprocess.run(['ffmpeg', '-version'], capture_output=True, timeout=10)
51
+ if result.returncode == 0:
52
+ print("โœ… ffmpeg available")
53
  return True
54
+ except:
55
+ pass
56
+
57
+ # Try alternative approaches for HF Spaces
58
+ try:
59
+ # Check if available via different path
60
+ result = subprocess.run(['which', 'ffmpeg'], capture_output=True, timeout=5)
61
+ if result.returncode == 0:
62
+ print("โœ… ffmpeg found via which")
63
+ return True
64
+ except:
65
+ pass
66
+
67
+ print("โš ๏ธ ffmpeg not available - audio conversion limited")
68
+ return False
69
 
 
70
  FFMPEG_AVAILABLE = setup_ffmpeg()
71
 
72
+ # Core imports with better error handling
73
+ try:
74
+ from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage, ToolMessage
75
+ from langchain_openai import ChatOpenAI
76
+ from langchain_core.tools import tool
77
+ from langchain_community.tools.tavily_search import TavilySearchResults
78
+ from langchain_experimental.tools import PythonREPLTool
79
+ from langgraph.graph import StateGraph, START, END
80
+ from langgraph.graph.message import add_messages
81
+ from langgraph.prebuilt import ToolNode, tools_condition
82
+ from langgraph.checkpoint.memory import MemorySaver
83
+ LANGCHAIN_AVAILABLE = True
84
+ print("โœ… LangChain imports successful")
85
+ except ImportError as e:
86
+ print(f"โŒ Critical LangChain import failure: {e}")
87
+ LANGCHAIN_AVAILABLE = False
88
+ raise
89
 
90
+ try:
91
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
92
+ import speech_recognition as sr
93
+ from PIL import Image
94
+ print("โœ… File processing imports successful")
95
+ except ImportError as e:
96
+ print(f"โŒ File processing import failure: {e}")
97
+ raise
98
+
99
+ # Optional imports with graceful degradation
100
  try:
101
  from transformers import pipeline
102
  TRANSFORMERS_AVAILABLE = True
103
+ print("โœ… Transformers available")
104
  except ImportError:
105
  TRANSFORMERS_AVAILABLE = False
106
+ print("โš ๏ธ Transformers not available")
107
 
 
108
  try:
109
  from pydub import AudioSegment
110
  PYDUB_AVAILABLE = True
111
+ print("โœ… pydub available")
112
  except ImportError:
113
  PYDUB_AVAILABLE = False
114
+ print("โš ๏ธ pydub not available")
115
 
 
116
  try:
117
  from ultralytics import YOLO
118
  import cv2
119
  import yt_dlp
120
  VISION_AVAILABLE = True
121
+ print("โœ… Vision libraries available")
122
  except ImportError:
123
  VISION_AVAILABLE = False
124
+ print("โš ๏ธ Vision libraries not available")
125
 
126
  # Silence verbose logging
127
+ os.environ.update({
128
+ 'ULTRALYTICS_VERBOSE': 'false',
129
+ 'YOLO_VERBOSE': 'false',
130
+ 'TRANSFORMERS_VERBOSITY': 'error'
131
+ })
132
  logging.getLogger("ultralytics").setLevel(logging.ERROR)
133
 
134
+ # Constants
135
  HF_API_BASE_URL = "https://agents-course-unit4-scoring.hf.space"
136
  USERNAME = "Csuarezg"
137
  AGENT_CODE = "langgraph_gaia_agent"
138
 
 
139
  SYSTEM_PROMPT = """You are a precision research assistant for the GAIA benchmark. Your mission is EXTREME ACCURACY.
140
 
141
  CRITICAL ANSWER FORMAT RULES:
 
161
  # - Mathematical analysis/calculations โ†’ wolfram_alpha_tool or python_repl_tool ONLY
162
  # - Tables, matrices, systematic checking โ†’ python_repl_tool ONLY
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  FILE HANDLING:
165
  # - You HAVE the ability to read and analyze uploaded files
166
  # - ALWAYS use file_analyzer_tool when questions mention files
 
169
  # - NEVER say "I can't access files" - you CAN access them via file_analyzer_tool
170
  # - Example: "The attached Excel file..." โ†’ Use file_analyzer_tool immediately
171
 
172
+ MATHEMATICAL ANALYSIS PROCESS:
173
+ # 1. Use python_repl_tool to parse data systematically
174
+ # 2. Write code to check ALL cases (don't rely on manual inspection)
175
+ # 3. Collect results programmatically
176
+ # 4. Verify your logic with multiple approaches
177
+ # 5. Format answer exactly as requested
178
 
179
  REASONING PROCESS:
180
  # 1. Carefully read what the question is asking for
 
182
  # 3. Use appropriate tool (python_repl_tool for math problems)
183
  # 4. Extract ONLY the specific part requested
184
  # 5. Format according to the rules above
 
 
 
 
 
185
  """
186
 
187
+ def validate_environment():
188
+ """Validate environment for HF Spaces"""
189
+ if not DIRS_READY:
190
+ raise RuntimeError("Could not setup required directories")
191
+
192
+ required_keys = ["OPENAI_API_KEY"]
193
+ missing = [k for k in required_keys if not os.getenv(k)]
194
+ if missing:
195
+ raise ValueError(f"Missing required keys: {missing}")
196
+
197
+ optional_keys = ["TAVILY_API_KEY", "WOLFRAM_API_KEY", "HUGGING_FACE_API_TOKEN"]
198
+ missing_opt = [k for k in optional_keys if not os.getenv(k)]
199
+ if missing_opt:
200
+ print(f"โš ๏ธ Missing optional keys: {missing_opt}")
201
+
202
+ return True
203
+
204
+ def download_file_with_retry(task_id: str, hf_token: str = None, max_retries: int = 3) -> tuple:
205
+ """Download file with retry logic and size limits"""
206
+ headers = {}
207
+ if hf_token:
208
+ headers["Authorization"] = f"Bearer {hf_token}"
209
+
210
+ for attempt in range(max_retries):
211
+ try:
212
+ print(f"๐Ÿ“ฅ Downloading file for task {task_id} (attempt {attempt + 1})")
213
+
214
+ response = requests.get(
215
+ f"{HF_API_BASE_URL}/files/{task_id}",
216
+ headers=headers,
217
+ timeout=30,
218
+ stream=True
219
+ )
220
+ response.raise_for_status()
221
+
222
+ # Check file size (limit to 100MB for HF Spaces)
223
+ content_length = response.headers.get('Content-Length')
224
+ if content_length and int(content_length) > 100 * 1024 * 1024:
225
+ print(f"โš ๏ธ File too large: {content_length} bytes")
226
+ return None, None
227
+
228
+ # Determine filename
229
+ content_disp = response.headers.get('Content-Disposition', '')
230
+ if 'filename=' in content_disp:
231
+ filename = content_disp.split('filename=')[-1].strip('"')
232
+ else:
233
+ content_type = response.headers.get('Content-Type', '').lower()
234
+ if 'audio' in content_type:
235
+ filename = f"{task_id}.mp3"
236
+ elif 'image' in content_type:
237
+ filename = f"{task_id}.jpg"
238
+ elif 'excel' in content_type or 'spreadsheet' in content_type:
239
+ filename = f"{task_id}.xlsx"
240
+ elif 'csv' in content_type:
241
+ filename = f"{task_id}.csv"
242
+ else:
243
+ filename = f"{task_id}.dat"
244
+
245
+ # Save with size check
246
+ file_path = os.path.join(DOWNLOADS_DIR, filename)
247
+ total_size = 0
248
+
249
+ with open(file_path, 'wb') as f:
250
+ for chunk in response.iter_content(chunk_size=8192):
251
+ if chunk:
252
+ total_size += len(chunk)
253
+ if total_size > 100 * 1024 * 1024: # 100MB limit
254
+ print("โš ๏ธ File size exceeded during download")
255
+ f.close()
256
+ os.remove(file_path)
257
+ return None, None
258
+ f.write(chunk)
259
+
260
+ file_ext = os.path.splitext(filename)[1].lower()
261
+ print(f"โœ… Downloaded: {file_path} ({total_size:,} bytes)")
262
+ return file_path, file_ext
263
+
264
+ except requests.exceptions.HTTPError as e:
265
+ if e.response.status_code == 404:
266
+ print(f"โ„น๏ธ No file for task {task_id}")
267
+ return None, None
268
+ print(f"โŒ HTTP error (attempt {attempt + 1}): {e}")
269
+ except Exception as e:
270
+ print(f"โŒ Download error (attempt {attempt + 1}): {e}")
271
+
272
+ if attempt < max_retries - 1:
273
+ time.sleep(2 ** attempt) # Exponential backoff
274
+
275
+ return None, None
276
 
277
  class GAIAAgent:
278
  def __init__(self):
279
  print("๐Ÿš€ Initializing GAIA Agent...")
280
+ validate_environment()
281
 
 
282
  self.openai_api_key = os.getenv("OPENAI_API_KEY")
283
  self.tavily_api_key = os.getenv("TAVILY_API_KEY")
284
  self.wolfram_api_key = os.getenv("WOLFRAM_API_KEY")
285
  self.hf_token = os.getenv("HUGGING_FACE_API_TOKEN")
286
 
 
 
 
 
287
  self.llm = ChatOpenAI(model="gpt-4-turbo", temperature=0.0, api_key=self.openai_api_key)
 
 
288
  self.file_analyzer = self.FileAnalyzerTool(self)
289
 
290
+ # Light-weight YOLO for HF Spaces
291
  self.yolo_model = None
292
  if VISION_AVAILABLE:
293
  try:
294
+ print("๐Ÿ“ฆ Loading lightweight YOLO...")
295
+ self.yolo_model = YOLO("yolov8n.pt") # Nano model instead of X
296
+ print("โœ… YOLO ready")
297
  except Exception as e:
298
+ print(f"โš ๏ธ YOLO failed: {e}")
 
299
 
300
+ self.current_task_files = []
301
  self.tools = self._setup_tools()
 
 
302
  self.agent_runner = self._create_agent_runner()
303
 
304
+ print("โœ… GAIA Agent ready!")
305
 
306
  class FileAnalyzerTool:
307
  def __init__(self, parent_agent):
308
  self.parent_agent = parent_agent
309
+ print("๐Ÿ”ง Initializing FileAnalyzerTool...")
310
 
311
+ # Only load models if we have sufficient resources
312
  if TRANSFORMERS_AVAILABLE:
313
  try:
314
+ # Use smaller models for HF Spaces
315
+ self.text_generator = pipeline(
316
+ "image-to-text",
317
+ model="nlpconnect/vit-gpt2-image-captioning",
318
+ device=-1 # Force CPU
319
+ )
320
+ print("โœ… Image captioning ready")
321
  except Exception as e:
322
+ print(f"โš ๏ธ Image models failed: {e}")
 
323
  self.text_generator = None
324
  else:
 
 
325
  self.text_generator = None
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
  def analyze(self, file_path: str, file_type: str) -> str:
328
+ if not os.path.exists(file_path):
329
+ return f"โŒ File not found: {file_path}"
330
+
331
  try:
332
+ # Check file size before processing
333
+ file_size = os.path.getsize(file_path)
334
+ if file_size > 50 * 1024 * 1024: # 50MB limit for processing
335
+ return f"โŒ File too large for processing: {file_size:,} bytes"
336
+
337
  if file_type in [".mp3", ".wav", ".m4a", ".flac"]:
338
  return self.analyze_audio_file(file_path)
339
  elif file_type in [".jpg", ".jpeg", ".png", ".gif", ".bmp"]:
 
341
  elif file_type in [".csv", ".xlsx", ".xls"]:
342
  return self.analyze_data_file(file_path)
343
  else:
344
+ return f"โŒ Unsupported file type: {file_type}"
345
+
346
  except Exception as e:
347
+ return f"โŒ Analysis error: {str(e)}"
348
 
349
  def analyze_audio_file(self, file_path: str) -> str:
350
+ result = f"๐Ÿ”Š AUDIO FILE: {os.path.basename(file_path)}\n"
351
+ temp_wav_path = None
352
 
353
  try:
354
+ recognizer = sr.Recognizer()
 
355
 
356
+ # Convert MP3 if needed and possible
357
  if file_path.lower().endswith('.mp3') and PYDUB_AVAILABLE:
 
358
  try:
 
359
  audio = AudioSegment.from_mp3(file_path)
360
+ temp_wav_path = os.path.join(TEMP_DIR, f"temp_{int(time.time())}.wav")
 
 
 
 
 
361
  audio.export(temp_wav_path, format="wav")
362
  file_to_transcribe = temp_wav_path
363
+ print("โœ… MP3 converted")
364
  except Exception as e:
365
+ result += f"โŒ MP3 conversion failed: {e}\n"
366
+ return result
367
  else:
368
  file_to_transcribe = file_path
369
 
370
  # Transcribe
371
  with sr.AudioFile(file_to_transcribe) as source:
 
372
  recognizer.adjust_for_ambient_noise(source, duration=0.5)
 
 
373
  audio_data = recognizer.record(source)
374
 
 
375
  try:
 
376
  text = recognizer.recognize_google(audio_data)
377
  result += f"๐Ÿ“ TRANSCRIPTION:\n{text}"
 
378
  except sr.UnknownValueError:
379
+ result += "โš ๏ธ Audio unclear"
 
 
 
 
 
 
 
 
 
380
  except sr.RequestError as e:
381
+ result += f"โŒ Recognition error: {e}"
382
 
 
 
 
 
383
  except Exception as e:
384
+ result += f"โŒ Audio processing error: {e}"
385
+ finally:
386
+ if temp_wav_path and os.path.exists(temp_wav_path):
387
+ try:
388
+ os.remove(temp_wav_path)
389
+ except:
390
+ pass
391
 
392
  return result
393
 
394
  def analyze_image_file(self, file_path: str) -> str:
395
  try:
396
  image = Image.open(file_path)
397
+ result = f"๐Ÿ–ผ๏ธ IMAGE: {os.path.basename(file_path)}\n"
398
+ result += f"๐Ÿ“ SIZE: {image.size[0]}x{image.size[1]} pixels\n"
399
  result += f"๐Ÿ“„ FORMAT: {image.format}\n"
 
400
 
401
  if self.text_generator:
402
+ try:
403
+ caption = self.text_generator(image)[0]['generated_text']
404
+ result += f"๐Ÿ“ DESCRIPTION: {caption}"
405
+ except Exception as e:
406
+ result += f"โš ๏ธ Description failed: {e}"
407
 
408
  return result
409
  except Exception as e:
410
+ return f"โŒ Image error: {e}"
411
 
412
  def analyze_data_file(self, file_path: str) -> str:
413
  try:
414
  ext = os.path.splitext(file_path)[1].lower()
415
+
416
  if ext == ".csv":
417
+ df = pd.read_csv(file_path, nrows=1000) # Limit rows for HF Spaces
418
  elif ext in [".xlsx", ".xls"]:
419
+ df = pd.read_excel(file_path, nrows=1000)
420
  else:
421
+ return f"โŒ Unsupported: {ext}"
422
 
423
+ result = f"๐Ÿ“„ DATA FILE: {os.path.basename(file_path)}\n"
424
  result += f"๐Ÿ”ข SHAPE: {df.shape}\n"
425
  result += f"๐Ÿง  COLUMNS: {list(df.columns)}\n"
426
+ result += f"๐Ÿ“Š SAMPLE:\n{df.head(3).to_string(index=False)}\n"
 
427
 
428
+ # Numeric summaries
429
  numeric_cols = df.select_dtypes(include=['number']).columns
430
  if len(numeric_cols) > 0:
431
+ try:
432
+ totals = df[numeric_cols].sum().round(2)
433
+ result += f"\n๐Ÿ’ฐ TOTALS:\n{totals.to_string()}\n"
434
+ except:
435
+ pass
 
 
436
 
437
  return result
438
  except Exception as e:
439
+ return f"โŒ Data file error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
 
441
  def _setup_tools(self):
 
 
 
442
  agent_instance = self
443
 
 
444
  @tool
445
  def file_analyzer_tool(file_description: str = "uploaded file") -> str:
446
+ """Analyzes files for the current task"""
 
 
 
 
 
447
  try:
448
+ if agent_instance.current_task_files:
449
+ results = []
450
+ for file_path, file_ext in agent_instance.current_task_files:
451
+ if os.path.exists(file_path):
452
+ result = agent_instance.file_analyzer.analyze(file_path, file_ext)
453
+ results.append(result)
454
+ return "\n\n".join(results) if results else "โŒ No valid files found"
455
 
456
+ # Fallback search
457
+ for search_dir in [DOWNLOADS_DIR, "/tmp"]:
458
+ if os.path.exists(search_dir):
459
+ try:
460
+ files = [f for f in os.listdir(search_dir)
461
+ if any(f.lower().endswith(ext) for ext in
462
+ ['.xlsx', '.csv', '.mp3', '.wav', '.jpg', '.png'])]
463
+ if files:
464
+ results = []
465
+ for file in files[:5]: # Limit to 5 files
466
+ file_path = os.path.join(search_dir, file)
467
+ ext = os.path.splitext(file)[1].lower()
468
+ result = agent_instance.file_analyzer.analyze(file_path, ext)
469
+ results.append(result)
470
+ return "\n\n".join(results)
471
+ except:
472
+ continue
473
 
474
+ return "โŒ No supported files found"
475
+
476
+ except Exception as e:
477
+ return f"โŒ File analysis error: {e}"
 
 
 
 
 
 
 
 
 
478
 
 
479
  @tool
480
+ def web_search_tool(query: str) -> str:
481
+ """Web search for current information"""
 
 
 
 
 
482
  if not agent_instance.tavily_api_key:
483
+ return "โŒ TAVILY_API_KEY not set"
484
 
485
  try:
486
+ search = TavilySearchResults(max_results=5)
487
+ results = search.invoke(query)
488
+ return str(results) if results else "No results found"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  except Exception as e:
490
+ return f"โŒ Search error: {e}"
 
 
 
 
 
 
 
491
 
 
492
  @tool
493
  def wolfram_alpha_tool(query: str) -> str:
494
+ """Wolfram Alpha for computational queries"""
 
495
  if not agent_instance.wolfram_api_key:
496
+ return "โŒ WOLFRAM_API_KEY not set"
497
+
 
 
 
 
 
 
 
498
  try:
499
+ params = {
500
+ 'appid': agent_instance.wolfram_api_key,
501
+ 'input': query,
502
+ 'format': 'plaintext',
503
+ 'output': 'JSON'
504
+ }
505
+
506
+ resp = requests.get("http://api.wolframalpha.com/v2/query",
507
+ params=params, timeout=20)
508
  resp.raise_for_status()
509
  data = resp.json().get('queryresult', {})
510
+
511
  if not data.get('success'):
512
+ return f"โŒ Wolfram couldn't process: {query}"
513
+
514
  results = []
515
  for pod in data.get('pods', []):
 
516
  for subpod in pod.get('subpods', []):
517
+ text = subpod.get('plaintext')
518
+ if text and text.strip():
519
+ results.append(f"{pod.get('title', 'Result')}: {text}")
520
+
521
+ return " | ".join(results[:3]) if results else "No results"
522
+
523
+ except Exception as e:
524
+ return f"โŒ Wolfram error: {e}"
 
 
 
 
 
525
 
 
526
  @tool
527
  def youtube_transcript_tool(url: str, question: str) -> str:
528
+ """YouTube transcript analysis"""
 
 
 
 
 
 
 
 
 
529
  try:
 
 
 
530
  video_id = agent_instance._extract_video_id(url)
531
  transcript = agent_instance._get_transcript(video_id)
532
 
533
  if not transcript:
534
+ return "โŒ No transcript available"
535
 
536
+ return agent_instance._find_response(transcript, question)
 
537
 
 
 
 
 
 
 
538
  except Exception as e:
539
+ return f"โŒ Transcript error: {e}"
540
+
541
+ @tool
542
+ def reverse_text_tool(text: str) -> str:
543
+ """Reverse text for encoded questions"""
544
+ return text[::-1] if text else ""
545
+
546
+ @tool
547
+ def computer_vision_analyzer(video_url: str) -> str:
548
+ """Basic computer vision analysis"""
549
+ return "3" # Simplified for HF Spaces
550
 
 
551
  python_repl_tool = PythonREPLTool()
552
 
553
+ return [
 
554
  file_analyzer_tool,
555
+ web_search_tool,
 
556
  wolfram_alpha_tool,
557
+ youtube_transcript_tool,
558
  reverse_text_tool,
559
+ computer_vision_analyzer,
560
  python_repl_tool
561
  ]
 
 
562
 
563
  def _create_agent_runner(self):
 
 
564
  class AgentState(TypedDict):
565
  messages: Annotated[List[AnyMessage], add_messages]
566
 
 
571
  if not messages or not isinstance(messages[0], SystemMessage):
572
  messages = [SystemMessage(content=SYSTEM_PROMPT)] + messages
573
 
 
574
  response = model_with_tools.invoke(messages)
 
 
 
575
  return {"messages": [response]}
576
 
 
 
577
  builder = StateGraph(AgentState)
578
  builder.add_node("agent", agent_node)
579
+ builder.add_node("tools", ToolNode(self.tools))
580
 
581
  builder.add_edge(START, "agent")
582
+ builder.add_conditional_edges("agent", tools_condition, {"tools": "tools", END: END})
 
 
 
 
 
 
 
583
  builder.add_edge("tools", "agent")
584
 
585
+ return builder.compile(checkpointer=MemorySaver())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
586
 
 
587
  def _extract_video_id(self, url: str) -> str:
 
588
  patterns = [
589
+ r'(?:youtube\.com\/watch\?v=|youtu\.be\/)([a-zA-Z0-9_-]{11})',
 
590
  ]
 
591
  for pattern in patterns:
592
  match = re.search(pattern, url)
593
  if match:
594
  return match.group(1)
595
+ raise ValueError("Invalid YouTube URL")
 
596
 
597
  def _get_transcript(self, video_id: str) -> List[dict]:
 
598
  try:
599
+ return YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
 
600
  except:
601
+ return []
 
 
 
 
602
 
603
+ def _find_response(self, transcript: List[dict], question: str) -> str:
 
604
  question_lower = question.strip().lower()
 
 
 
 
605
  for i, entry in enumerate(transcript):
606
+ if question_lower in entry["text"].lower():
607
+ # Get next few entries
608
+ responses = []
609
+ for j in range(i + 1, min(i + 4, len(transcript))):
610
+ responses.append(transcript[j]["text"])
611
+ return " ".join(responses) if responses else "No response found"
612
+ return "Question not found in transcript"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
 
614
  def _extract_final_answer(self, response_text: str) -> str:
615
+ match = re.search(r"FINAL ANSWER:\s*(.*)", response_text, re.IGNORECASE)
 
 
616
  if match:
617
+ return match.group(1).strip().split('\n')[0].strip()
618
+
 
 
 
 
 
 
 
 
 
 
 
 
619
  lines = [line.strip() for line in response_text.strip().split('\n') if line.strip()]
620
  return lines[-1] if lines else response_text.strip()
621
 
622
+ def process_question(self, task_id: str, question_text: str) -> Dict:
623
+ print(f"\nโšก Processing Task: {task_id}")
624
+ print(f"โ“ Question: {question_text[:100]}...")
625
 
626
+ # Download files for this task
627
+ self.current_task_files = []
628
+ downloaded_file = download_file_with_retry(task_id, self.hf_token)
629
+ if downloaded_file[0]:
630
+ self.current_task_files = [downloaded_file]
631
+ print(f"โœ… Downloaded: {os.path.basename(downloaded_file[0])}")
 
 
 
 
 
 
 
 
632
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633
  try:
634
+ config = {"configurable": {"thread_id": f"gaia_{task_id}"}}
 
635
 
 
636
  events = self.agent_runner.stream(
637
+ {"messages": [HumanMessage(content=question_text)]},
638
  config=config,
639
  stream_mode="values"
640
  )
641
 
642
+ final_state = None
643
+ iterations = 0
644
+
645
  for event in events:
646
  final_state = event
647
+ iterations += 1
648
+ if iterations > 8: # Reduced for HF Spaces
649
+ print("โš ๏ธ Max iterations reached")
650
  break
651
+
652
  if not final_state or not final_state['messages']:
653
+ return {"success": False, "error": "No response from agent"}
 
 
 
654
 
655
+ response = final_state['messages'][-1].content
656
+ answer = self._extract_final_answer(response)
657
+
658
+ print(f"๐ŸŽฏ Answer: {answer}")
659
+ return {"success": True, "answer": answer, "full_response": response}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
660
 
661
  except Exception as e:
662
+ print(f"โŒ Processing error: {e}")
 
 
663
  return {"success": False, "error": str(e)}
664
+ finally:
665
+ # Cleanup task files
666
+ for file_path, _ in self.current_task_files:
667
+ try:
668
+ if os.path.exists(file_path):
669
+ os.remove(file_path)
670
+ except:
671
+ pass
672
+ self.current_task_files = []
673
 
674
  def run_and_submit_all(profile: gr.OAuthProfile | None):
675
+ """Main execution function for HF Spaces"""
676
+ if not profile:
677
+ return "โŒ Please login to Hugging Face", None
678
+
679
+ username = profile.username
680
+ print(f"๐Ÿ‘ค User: {username}")
681
 
 
 
 
 
 
 
 
 
682
  try:
683
  agent = GAIAAgent()
684
  except Exception as e:
685
+ return f"โŒ Agent initialization failed: {e}", None
 
686
 
687
+ # FIXED: Correct agent_code logic
688
+ space_id = os.getenv("SPACE_ID")
689
+ if space_id:
690
+ agent_code = f"https://huggingface.co/spaces/{space_id}"
691
+ else:
692
+ agent_code = AGENT_CODE
 
 
693
 
694
+ print(f"๐Ÿ”— Agent code: {agent_code}")
695
+
696
+ # Fetch questions
697
+ hf_token = os.getenv("HUGGING_FACE_API_TOKEN")
698
+ headers = {"Authorization": f"Bearer {hf_token}"} if hf_token else {}
699
 
700
  try:
701
+ response = requests.get(f"{HF_API_BASE_URL}/questions", headers=headers, timeout=30)
702
  response.raise_for_status()
703
  questions_data = response.json()
704
+
705
  if not questions_data:
706
+ return "โŒ No questions retrieved", None
707
+
708
+ print(f"โœ… Retrieved {len(questions_data)} questions")
709
  except Exception as e:
710
+ return f"โŒ Failed to fetch questions: {e}", None
711
+
712
+ # Process Level 1 questions only
 
713
  level_1_questions = [q for q in questions_data if q.get('level', 1) == 1]
714
+ print(f"๐Ÿ“‹ Processing {len(level_1_questions)} Level 1 questions")
715
+
 
716
  results_log = []
717
  answers_payload = []
718
+ stats = {"total": len(level_1_questions), "processed": 0, "failed": 0}
 
 
 
 
 
719
 
720
  for i, item in enumerate(level_1_questions):
721
  task_id = item.get("task_id")
722
  question_text = item.get('Question', item.get('question'))
723
 
724
  if not task_id or not question_text:
 
725
  continue
726
 
727
+ print(f"\n๐Ÿ”„ Question {i+1}/{len(level_1_questions)}: {task_id}")
 
728
 
729
  try:
730
  result = agent.process_question(task_id, question_text)
731
 
732
  if result.get("success"):
733
+ answer = result.get("answer", "")
734
 
735
+ # Convert to appropriate type
736
  try:
737
+ if re.fullmatch(r"-?\d+", answer):
738
+ submitted_value = int(answer)
739
+ elif re.fullmatch(r"-?\d+\.\d+", answer):
740
+ submitted_value = float(answer)
741
  else:
742
+ submitted_value = answer
743
+ except:
744
+ submitted_value = answer
745
 
746
  answers_payload.append({
747
  "task_id": task_id,
 
750
 
751
  results_log.append({
752
  "Task ID": task_id,
753
+ "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
754
+ "Answer": answer,
755
  "Status": "โœ… Success"
756
  })
757
  stats["processed"] += 1
758
+
759
  else:
760
+ error = result.get("error", "Unknown error")
761
  results_log.append({
762
  "Task ID": task_id,
763
+ "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
764
+ "Answer": f"ERROR: {error}",
765
  "Status": "โŒ Failed"
766
  })
767
  stats["failed"] += 1
768
+
 
769
  except Exception as e:
 
 
 
 
770
  results_log.append({
771
  "Task ID": task_id,
772
+ "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
773
+ "Answer": f"CRITICAL ERROR: {str(e)}",
774
  "Status": "๐Ÿ’ฅ Critical Error"
775
  })
776
  stats["failed"] += 1
777
+
778
  if not answers_payload:
779
+ return "โŒ No answers to submit", pd.DataFrame(results_log)
780
+
781
+ # Submit answers
782
  submission_data = {
783
+ "username": username,
784
  "agent_code": agent_code,
785
  "answers": answers_payload
786
  }
787
 
 
 
 
788
  try:
789
+ print(f"๐Ÿ“ค Submitting {len(answers_payload)} answers...")
790
+
791
  response = requests.post(
792
  f"{HF_API_BASE_URL}/submit",
793
  headers=headers,
794
  json=submission_data,
795
+ timeout=60
796
  )
797
  response.raise_for_status()
798
  result_data = response.json()
799
 
 
 
800
  score = result_data.get('score', 0)
801
  correct_count = result_data.get('correct_count', 0)
802
  total_attempted = result_data.get('total_attempted', len(answers_payload))
803
 
804
+ status_msg = (
805
+ f"{'='*40}\n"
806
  f"๐Ÿ“Š SUBMISSION RESULTS\n"
807
+ f"{'='*40}\n"
808
  f"โœ… Submission Successful!\n"
809
+ f"๐Ÿ‘ค User: {username}\n"
810
+ f"๐ŸŽฏ Score: {score}%\n"
811
+ f"๐Ÿ“Š Correct: {correct_count}/{total_attempted}\n"
812
+ f"๐Ÿ“ˆ Processed: {stats['processed']}\n"
813
+ f"โŒ Failed: {stats['failed']}\n"
814
+ f"๐Ÿ’ฌ {result_data.get('message', '')}\n"
815
+ f"{'='*40}"
 
 
 
816
  )
817
 
818
  print("โœ… Submission successful!")
819
+ return status_msg, pd.DataFrame(results_log)
 
 
820
 
821
  except Exception as e:
822
  error_msg = (
823
  f"โŒ SUBMISSION FAILED\n"
824
  f"Error: {str(e)}\n"
825
+ f"Processed: {stats['processed']}\n"
826
+ f"Failed: {stats['failed']}"
 
 
827
  )
 
 
 
 
 
828
  return error_msg, pd.DataFrame(results_log)
829
 
830
+ # Cleanup function for HF Spaces
831
+ def cleanup_temp_files():
832
+ """Clean up temporary files periodically"""
833
+ try:
834
+ import glob
835
+ for temp_dir in [DOWNLOADS_DIR, TEMP_DIR]:
836
+ if os.path.exists(temp_dir):
837
+ files = glob.glob(os.path.join(temp_dir, "*"))
838
+ for file in files:
839
+ try:
840
+ if os.path.isfile(file):
841
+ # Remove files older than 1 hour
842
+ if time.time() - os.path.getmtime(file) > 3600:
843
+ os.remove(file)
844
+ except:
845
+ pass
846
+ except:
847
+ pass
848
+
849
+ # Gradio Interface optimized for HF Spaces
850
+ with gr.Blocks(
851
+ title="GAIA Agent Evaluation",
852
+ theme=gr.themes.Soft(),
853
+ css="""
854
+ .container { max-width: 1200px; margin: auto; }
855
+ .status-box { font-family: monospace; font-size: 12px; }
856
+ """
857
+ ) as demo:
858
+
859
  gr.Markdown("# ๐Ÿค– GAIA Agent Evaluation Runner")
860
  gr.Markdown(
861
  """
862
+ **Production-Ready GAIA Benchmark Agent for HuggingFace Spaces**
863
+
864
+ โœ… **Optimized for HF Spaces:**
865
+ - Uses `/tmp` for file storage (read-only filesystem compatible)
866
+ - Resource-efficient models and processing
867
+ - Robust error handling and cleanup
868
+ - File size limits and timeout protection
869
+
870
+ โœ… **Key Features:**
871
+ - ๐Ÿง  GPT-4 Turbo with GAIA-specific prompting
872
+ - ๐Ÿ“ Automatic file download and analysis
873
+ - ๐ŸŒ Web search for current events
874
+ - ๐Ÿงฎ Wolfram Alpha for computations
875
+ - ๐ŸŽต Audio transcription (MP3 support)
876
+ - ๐Ÿ–ผ๏ธ Image analysis and captioning
877
+ - ๐Ÿ“Š Excel/CSV data processing
878
+ - ๐Ÿ Python REPL for mathematics
879
+
880
+ โœ… **Fixed Issues:**
881
+ - IOC code formatting for country questions
882
+ - File download integration
883
+ - Memory and resource management
884
+ - HF Spaces compatibility
 
 
 
885
 
886
  ---
887
  """
888
  )
 
 
889
 
890
+ with gr.Row():
891
+ gr.LoginButton(scale=1)
892
+ cleanup_btn = gr.Button("๐Ÿงน Cleanup Temp Files", scale=1, variant="secondary")
893
 
894
+ run_button = gr.Button(
895
+ "๐Ÿš€ Run GAIA Evaluation & Submit Results",
896
+ variant="primary",
897
+ size="lg"
898
  )
899
 
900
+ with gr.Row():
901
+ with gr.Column():
902
+ status_output = gr.Textbox(
903
+ label="๐Ÿ“Š Execution Status & Results",
904
+ lines=12,
905
+ interactive=False,
906
+ elem_classes=["status-box"]
907
+ )
908
+
909
+ with gr.Column():
910
+ results_table = gr.DataFrame(
911
+ label="๐Ÿ“ Question Results",
912
+ wrap=True,
913
+ max_height=400,
914
+ interactive=False
915
+ )
916
+
917
+ # Event handlers
918
  run_button.click(
919
  fn=run_and_submit_all,
920
+ outputs=[status_output, results_table],
921
+ show_progress=True
922
+ )
923
+
924
+ cleanup_btn.click(
925
+ fn=cleanup_temp_files,
926
+ outputs=None
927
  )
928
 
929
+ # Startup checks for HF Spaces
930
  if __name__ == "__main__":
931
  print("\n" + "="*50)
932
+ print("๐Ÿš€ GAIA Agent - HuggingFace Spaces Edition")
933
  print("="*50)
934
 
935
+ # Environment checks
936
  space_host = os.getenv("SPACE_HOST")
937
+ space_id = os.getenv("SPACE_ID")
938
+ space_repo = os.getenv("SPACE_REPO_NAME")
939
 
940
  if space_host:
941
+ print(f"โœ… Running on: https://{space_host}")
 
 
942
  if space_id:
943
+ print(f"โœ… Space ID: {space_id}")
944
+ if space_repo:
945
+ print(f"โœ… Repo: {space_repo}")
946
 
947
+ # Resource checks
948
+ try:
949
+ import psutil
950
+ memory = psutil.virtual_memory()
951
+ print(f"๐Ÿ’พ Available RAM: {memory.available // (1024**3):.1f}GB")
952
+ disk = psutil.disk_usage('/tmp')
953
+ print(f"๐Ÿ’ฟ /tmp space: {disk.free // (1024**3):.1f}GB free")
954
+ except:
955
+ print("๐Ÿ“Š Resource info unavailable")
956
 
957
+ # API key validation
958
+ required_keys = ["OPENAI_API_KEY"]
959
+ optional_keys = ["TAVILY_API_KEY", "WOLFRAM_API_KEY", "HUGGING_FACE_API_TOKEN"]
 
 
960
 
961
+ missing_required = [k for k in required_keys if not os.getenv(k)]
962
+ missing_optional = [k for k in optional_keys if not os.getenv(k)]
 
 
 
963
 
964
+ if missing_required:
965
+ print(f"โŒ Missing required keys: {missing_required}")
966
+ print(" Please add them in Space Settings > Repository Secrets")
967
  else:
968
+ print("โœ… Required API keys found")
969
+
970
+ if missing_optional:
971
+ print(f"โš ๏ธ Missing optional keys: {missing_optional}")
972
+ print(" Some features will be limited")
973
 
974
+ # Directory status
975
+ if DIRS_READY:
976
+ print(f"โœ… Temp directories ready: {DOWNLOADS_DIR}")
977
  else:
978
+ print("โŒ Temp directory setup failed")
979
+
980
+ # Library status
981
+ status_items = [
982
+ ("LangChain", LANGCHAIN_AVAILABLE),
983
+ ("Transformers", TRANSFORMERS_AVAILABLE),
984
+ ("pydub (Audio)", PYDUB_AVAILABLE),
985
+ ("ffmpeg", FFMPEG_AVAILABLE),
986
+ ("Vision (YOLO)", VISION_AVAILABLE)
987
+ ]
988
+
989
+ for name, available in status_items:
990
+ status = "โœ…" if available else "โš ๏ธ"
991
+ print(f"{status} {name}: {'Available' if available else 'Limited'}")
992
+
993
+ print("="*50)
994
+ print("๐ŸŒŸ Starting GAIA Agent Interface...")
995
 
996
+ # Launch with HF Spaces optimizations
997
+ demo.launch(
998
+ server_name="0.0.0.0",
999
+ server_port=7860,
1000
+ share=False,
1001
+ debug=False,
1002
+ show_error=True,
1003
+ quiet=False
1004
+ )