vissutagunawan commited on
Commit
4691df3
ยท
verified ยท
1 Parent(s): 21886e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +415 -47
app.py CHANGED
@@ -204,35 +204,126 @@ def extract_numbers(text: str) -> str:
204
  return f"Error extracting numbers: {str(e)}"
205
 
206
  @tool
207
- def count_items(text: str, item_type: str = "words") -> str:
208
- """Counts different types of items in text.
209
 
210
  Args:
211
- text: The text to analyze
212
- item_type: What to count ("words", "characters", "lines", "sentences")
213
 
214
  Returns:
215
- The count as a string
216
  """
217
  try:
218
- if item_type == "words":
219
- words = text.split()
220
- return str(len(words))
221
- elif item_type == "characters":
222
- return str(len(text))
223
- elif item_type == "lines":
224
- lines = text.split('\n')
225
- return str(len(lines))
226
- elif item_type == "sentences":
227
- import re
228
- sentences = re.split(r'[.!?]+', text)
229
- sentences = [s.strip() for s in sentences if s.strip()]
230
- return str(len(sentences))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  else:
232
- return f"Unknown item type: {item_type}"
233
 
234
  except Exception as e:
235
- return f"Error counting items: {str(e)}"
236
 
237
  def setup_authentication():
238
  """Setup HuggingFace authentication for the app."""
@@ -300,7 +391,9 @@ class GAIAAgent:
300
  calculate_math,
301
  analyze_data,
302
  extract_numbers,
303
- count_items
 
 
304
  ]
305
 
306
  # Create the CodeAgent with enhanced capabilities
@@ -311,12 +404,13 @@ class GAIAAgent:
311
  add_base_tools=True, # Adds DuckDuckGoSearchTool and other base tools
312
  additional_authorized_imports=[
313
  'requests', 'bs4', 'json', 'csv', 'math', 'statistics',
314
- 're', 'urllib.parse', 'base64', 'datetime', 'calendar'
 
315
  ],
316
- max_steps=10, # Allow multiple reasoning steps
317
  verbosity_level=1 # Reduce verbosity for cleaner output
318
  )
319
- print("โœ… GAIA Agent initialized successfully with enhanced tools and base toolkit")
320
  except Exception as e:
321
  print(f"โŒ Error initializing agent: {e}")
322
  raise e
@@ -326,49 +420,323 @@ class GAIAAgent:
326
  try:
327
  print(f"๐Ÿค– Processing question: {question[:100]}...")
328
 
329
- # Enhanced prompt with specific instructions for GAIA
330
- enhanced_prompt = f"""You are a helpful AI assistant designed to answer questions accurately and concisely.
331
-
332
- IMPORTANT INSTRUCTIONS:
333
- 1. Read the question carefully and understand what is being asked
334
- 2. Use the available tools when you need external information or calculations
335
- 3. For mathematical problems, use the calculate_math tool or write Python code
336
- 4. For web searches, use DuckDuckGoSearchTool and visit_webpage when needed
337
- 5. Break down complex problems into steps
338
- 6. Give ONLY the final answer - no explanations, no "FINAL ANSWER:" prefix
339
- 7. Be precise with numbers and dates
340
- 8. If the answer is a number, return just the number
341
- 9. If the answer is text, return just the text without quotes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
 
343
  Question: {question}
344
 
345
- Answer:"""
346
 
347
- # Run the agent with error handling for quota issues
348
  try:
349
  result = self.agent.run(enhanced_prompt)
350
  except Exception as api_error:
351
- if "402" in str(api_error) or "Payment Required" in str(api_error) or "exceeded" in str(api_error):
352
- print(f"โš ๏ธ API quota exceeded, trying simpler approach...")
353
- # Fallback: try to answer with basic tools only
354
- result = f"Unable to process due to API limits: {str(api_error)}"
355
  else:
356
  raise api_error
357
 
358
- # Clean up the result to ensure it's just the answer
359
  if isinstance(result, str):
360
- # Remove common prefixes and suffixes
361
  result = result.strip()
362
 
363
- # Remove "FINAL ANSWER:" if present
 
 
 
 
 
 
 
 
364
  result = re.sub(r'^(FINAL\s*ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
365
  result = re.sub(r'^(ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
366
  result = re.sub(r'^(RESULT\s*:?\s*)', '', result, flags=re.IGNORECASE)
 
367
 
368
- # Remove quotes if the entire answer is wrapped in quotes
369
  if (result.startswith('"') and result.endswith('"')) or (result.startswith("'") and result.endswith("'")):
370
  result = result[1:-1]
371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  result = result.strip()
373
 
374
  print(f"โœ… Agent response: {result}")
 
204
  return f"Error extracting numbers: {str(e)}"
205
 
206
  @tool
207
+ def process_file_content(file_url: str) -> str:
208
+ """Downloads and processes content from a file URL, supporting various formats.
209
 
210
  Args:
211
+ file_url: URL to a file (PDF, CSV, TXT, etc.)
 
212
 
213
  Returns:
214
+ The processed content of the file as text
215
  """
216
  try:
217
+ import requests
218
+ from urllib.parse import urlparse
219
+ import mimetypes
220
+
221
+ headers = {
222
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
223
+ }
224
+
225
+ response = requests.get(file_url, headers=headers, timeout=30)
226
+ response.raise_for_status()
227
+
228
+ # Get content type
229
+ content_type = response.headers.get('content-type', '').lower()
230
+
231
+ # Process based on content type
232
+ if 'text/' in content_type or 'csv' in content_type:
233
+ return response.text
234
+ elif 'json' in content_type:
235
+ return json.dumps(response.json(), indent=2)
236
+ else:
237
+ # For binary files, return info about the file
238
+ return f"Binary file detected. Size: {len(response.content)} bytes. Content-Type: {content_type}"
239
+
240
+ except Exception as e:
241
+ return f"Error processing file: {str(e)}"
242
+
243
+ @tool
244
+ def solve_equation(equation: str) -> str:
245
+ """Solves mathematical equations and expressions symbolically.
246
+
247
+ Args:
248
+ equation: Mathematical equation to solve (e.g., "x^2 + 2*x - 3 = 0")
249
+
250
+ Returns:
251
+ The solution to the equation
252
+ """
253
+ try:
254
+ import sympy as sp
255
+ import re
256
+
257
+ # Clean the equation
258
+ equation = equation.replace('=', '==')
259
+
260
+ # Define common variables
261
+ x, y, z, t = sp.symbols('x y z t')
262
+ variables = {'x': x, 'y': y, 'z': z, 't': t}
263
+
264
+ # Replace common math functions
265
+ equation = re.sub(r'\bsqrt\b', 'sp.sqrt', equation)
266
+ equation = re.sub(r'\bsin\b', 'sp.sin', equation)
267
+ equation = re.sub(r'\bcos\b', 'sp.cos', equation)
268
+ equation = re.sub(r'\btan\b', 'sp.tan', equation)
269
+ equation = re.sub(r'\blog\b', 'sp.log', equation)
270
+ equation = re.sub(r'\bexp\b', 'sp.exp', equation)
271
+
272
+ # Parse and solve
273
+ expr = eval(equation, {"sp": sp, "x": x, "y": y, "z": z, "t": t})
274
+
275
+ if '==' in equation:
276
+ # It's an equation to solve
277
+ solution = sp.solve(expr, x)
278
+ return str(solution)
279
+ else:
280
+ # It's an expression to simplify
281
+ simplified = sp.simplify(expr)
282
+ return str(simplified)
283
+
284
+ except Exception as e:
285
+ return f"Error solving equation: {str(e)}"
286
+
287
+ @tool
288
+ def parse_structured_data(data: str, format_type: str = "auto") -> str:
289
+ """Parses and analyzes structured data (CSV, JSON, etc.).
290
+
291
+ Args:
292
+ data: The structured data as a string
293
+ format_type: Format type ("csv", "json", "auto")
294
+
295
+ Returns:
296
+ Analysis of the structured data
297
+ """
298
+ try:
299
+ import pandas as pd
300
+ import json
301
+ from io import StringIO
302
+
303
+ if format_type == "auto":
304
+ # Auto-detect format
305
+ data_clean = data.strip()
306
+ if data_clean.startswith('{') or data_clean.startswith('['):
307
+ format_type = "json"
308
+ elif ',' in data_clean and '\n' in data_clean:
309
+ format_type = "csv"
310
+
311
+ if format_type == "json":
312
+ parsed = json.loads(data)
313
+ return json.dumps(parsed, indent=2)
314
+ elif format_type == "csv":
315
+ df = pd.read_csv(StringIO(data))
316
+ result = f"DataFrame shape: {df.shape}\n"
317
+ result += f"Columns: {list(df.columns)}\n"
318
+ result += f"First 5 rows:\n{df.head().to_string()}\n"
319
+ if df.select_dtypes(include=['number']).columns.any():
320
+ result += f"Numerical summary:\n{df.describe().to_string()}"
321
+ return result
322
  else:
323
+ return f"Unsupported format: {format_type}"
324
 
325
  except Exception as e:
326
+ return f"Error parsing data: {str(e)}"
327
 
328
  def setup_authentication():
329
  """Setup HuggingFace authentication for the app."""
 
391
  calculate_math,
392
  analyze_data,
393
  extract_numbers,
394
+ process_file_content,
395
+ solve_equation,
396
+ parse_structured_data
397
  ]
398
 
399
  # Create the CodeAgent with enhanced capabilities
 
404
  add_base_tools=True, # Adds DuckDuckGoSearchTool and other base tools
405
  additional_authorized_imports=[
406
  'requests', 'bs4', 'json', 'csv', 'math', 'statistics',
407
+ 're', 'urllib.parse', 'base64', 'datetime', 'calendar',
408
+ 'pandas', 'numpy', 'sympy', 'scipy'
409
  ],
410
+ max_steps=15, # Increased for complex multi-step reasoning
411
  verbosity_level=1 # Reduce verbosity for cleaner output
412
  )
413
+ print("โœ… GAIA Agent initialized successfully with PRO model and enhanced tools")
414
  except Exception as e:
415
  print(f"โŒ Error initializing agent: {e}")
416
  raise e
 
420
  try:
421
  print(f"๐Ÿค– Processing question: {question[:100]}...")
422
 
423
+ # Enhanced GAIA-optimized prompt
424
+ enhanced_prompt = f"""You are an expert AI assistant designed to excel at the GAIA benchmark. You must answer questions with perfect accuracy using a systematic approach.
425
+
426
+ CRITICAL INSTRUCTIONS FOR GAIA SUCCESS:
427
+ 1. ANALYZE THE QUESTION: Read carefully and identify what type of question this is:
428
+ - Mathematical calculation or equation
429
+ - Information retrieval from web/files
430
+ - Data analysis or statistics
431
+ - Multi-step reasoning problem
432
+ - Factual lookup
433
+
434
+ 2. CHOOSE YOUR APPROACH:
435
+ - For math: Use calculate_math tool or solve_equation for complex equations
436
+ - For web info: Use DuckDuckGoSearchTool then visit_webpage for details
437
+ - For files: Use process_file_content to download and analyze
438
+ - For data: Use analyze_data or parse_structured_data
439
+ - For numbers in text: Use extract_numbers first
440
+
441
+ 3. BE SYSTEMATIC:
442
+ - Break complex questions into steps
443
+ - Use multiple tools if needed
444
+ - Verify your reasoning
445
+ - Double-check calculations
446
+
447
+ 4. ANSWER FORMAT:
448
+ - Give ONLY the final answer
449
+ - No explanations, no "FINAL ANSWER:" prefix
450
+ - For numbers: just the number (e.g., "42", not "42.0")
451
+ - For text: just the text without quotes
452
+ - Be precise with units, dates, and formatting
453
+
454
+ 5. ACCURACY IS PARAMOUNT:
455
+ - GAIA requires exact matches
456
+ - Round numbers appropriately
457
+ - Use proper case and spelling
458
+ - Include units when relevant
459
 
460
  Question: {question}
461
 
462
+ Think step by step, use the appropriate tools, and provide only the final answer:"""
463
 
464
+ # Run the agent with enhanced error handling
465
  try:
466
  result = self.agent.run(enhanced_prompt)
467
  except Exception as api_error:
468
+ if "402" in str(api_error) or "Payment Required" in str(api_error):
469
+ print(f"โš ๏ธ API quota issue (you have Pro, this shouldn't happen): {api_error}")
470
+ result = f"API Error: {str(api_error)}"
 
471
  else:
472
  raise api_error
473
 
474
+ # Enhanced answer cleaning for GAIA precision
475
  if isinstance(result, str):
 
476
  result = result.strip()
477
 
478
+ # Remove any explanatory text before the answer
479
+ lines = result.split('\n')
480
+ for i, line in enumerate(lines):
481
+ line = line.strip()
482
+ if line and not line.startswith(('Step', 'First', 'Next', 'Then', 'Finally', 'Therefore', 'So,', 'Thus')):
483
+ result = line
484
+ break
485
+
486
+ # Remove common prefixes
487
  result = re.sub(r'^(FINAL\s*ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
488
  result = re.sub(r'^(ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
489
  result = re.sub(r'^(RESULT\s*:?\s*)', '', result, flags=re.IGNORECASE)
490
+ result = re.sub(r'^(THE\s*ANSWER\s*IS\s*:?\s*)', '', result, flags=re.IGNORECASE)
491
 
492
+ # Remove quotes if the entire answer is wrapped
493
  if (result.startswith('"') and result.endswith('"')) or (result.startswith("'") and result.endswith("'")):
494
  result = result[1:-1]
495
 
496
+ # Clean up decimal numbers (e.g., "42.0" -> "42")
497
+ if re.match(r'^\d+\.0+
498
+
499
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
500
+ """
501
+ Fetches all questions, runs the GAIAAgent on them, submits all answers,
502
+ and displays the results.
503
+ """
504
+ # --- Determine HF Space Runtime URL and Repo URL ---
505
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
506
+
507
+ if profile:
508
+ username = f"{profile.username}"
509
+ print(f"User logged in: {username}")
510
+ else:
511
+ print("User not logged in.")
512
+ return "Please Login to Hugging Face with the button.", None
513
+
514
+ api_url = DEFAULT_API_URL
515
+ questions_url = f"{api_url}/questions"
516
+ submit_url = f"{api_url}/submit"
517
+
518
+ # 1. Instantiate Enhanced Agent
519
+ try:
520
+ print("๐Ÿš€ Initializing GAIA Agent with smolagents...")
521
+ agent = GAIAAgent()
522
+ print("โœ… Enhanced agent ready for GAIA benchmark!")
523
+ except Exception as e:
524
+ error_msg = f"Error initializing agent: {e}"
525
+ print(f"โŒ {error_msg}")
526
+ return error_msg, None
527
+
528
+ # In the case of an app running as a hugging Face space, this link points toward your codebase
529
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
530
+ print(f"Agent code link: {agent_code}")
531
+
532
+ # 2. Fetch Questions
533
+ print(f"๐Ÿ“ฅ Fetching questions from: {questions_url}")
534
+ try:
535
+ response = requests.get(questions_url, timeout=15)
536
+ response.raise_for_status()
537
+ questions_data = response.json()
538
+ if not questions_data:
539
+ print("Fetched questions list is empty.")
540
+ return "Fetched questions list is empty or invalid format.", None
541
+ print(f"โœ… Fetched {len(questions_data)} questions from GAIA benchmark.")
542
+ except requests.exceptions.RequestException as e:
543
+ print(f"โŒ Error fetching questions: {e}")
544
+ return f"Error fetching questions: {e}", None
545
+ except requests.exceptions.JSONDecodeError as e:
546
+ print(f"โŒ Error decoding JSON response from questions endpoint: {e}")
547
+ print(f"Response text: {response.text[:500]}")
548
+ return f"Error decoding server response for questions: {e}", None
549
+ except Exception as e:
550
+ print(f"โŒ An unexpected error occurred fetching questions: {e}")
551
+ return f"An unexpected error occurred fetching questions: {e}", None
552
+
553
+ # 3. Run Enhanced Agent
554
+ results_log = []
555
+ answers_payload = []
556
+ print(f"๐Ÿค– Running enhanced GAIA agent on {len(questions_data)} questions...")
557
+
558
+ for i, item in enumerate(questions_data, 1):
559
+ task_id = item.get("task_id")
560
+ question_text = item.get("question")
561
+ if not task_id or question_text is None:
562
+ print(f"โš ๏ธ Skipping item with missing task_id or question: {item}")
563
+ continue
564
+
565
+ print(f"\n๐Ÿ“ Processing question {i}/{len(questions_data)} (ID: {task_id})")
566
+ try:
567
+ submitted_answer = agent(question_text)
568
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
569
+ results_log.append({
570
+ "Task ID": task_id,
571
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
572
+ "Submitted Answer": submitted_answer
573
+ })
574
+ print(f"โœ… Answer for {task_id}: {submitted_answer}")
575
+ except Exception as e:
576
+ error_msg = f"AGENT ERROR: {e}"
577
+ print(f"โŒ Error running agent on task {task_id}: {e}")
578
+ answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
579
+ results_log.append({
580
+ "Task ID": task_id,
581
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
582
+ "Submitted Answer": error_msg
583
+ })
584
+
585
+ if not answers_payload:
586
+ print("โŒ Agent did not produce any answers to submit.")
587
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
588
+
589
+ # 4. Prepare Submission
590
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
591
+ status_update = f"๐Ÿš€ Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..."
592
+ print(status_update)
593
+
594
+ # 5. Submit
595
+ print(f"๐Ÿ“ค Submitting {len(answers_payload)} answers to: {submit_url}")
596
+ try:
597
+ response = requests.post(submit_url, json=submission_data, timeout=60)
598
+ response.raise_for_status()
599
+ result_data = response.json()
600
+
601
+ score = result_data.get('score', 'N/A')
602
+ correct_count = result_data.get('correct_count', '?')
603
+ total_attempted = result_data.get('total_attempted', '?')
604
+
605
+ final_status = (
606
+ f"๐ŸŽ‰ Submission Successful!\n"
607
+ f"๐Ÿ‘ค User: {result_data.get('username')}\n"
608
+ f"๐Ÿ“Š Overall Score: {score}% ({correct_count}/{total_attempted} correct)\n"
609
+ f"๐ŸŽฏ Target: >30% for certification\n"
610
+ f"๐Ÿ’ฌ Message: {result_data.get('message', 'No message received.')}"
611
+ )
612
+
613
+ if isinstance(score, (int, float)) and score >= 30:
614
+ final_status += f"\n๐Ÿ† CONGRATULATIONS! You've achieved the target score of 30%!"
615
+ elif isinstance(score, (int, float)):
616
+ final_status += f"\n๐Ÿ“ˆ Keep improving! You need {30-score:.1f}% more to reach the target."
617
+
618
+ print("โœ… Submission successful!")
619
+ results_df = pd.DataFrame(results_log)
620
+ return final_status, results_df
621
+
622
+ except requests.exceptions.HTTPError as e:
623
+ error_detail = f"Server responded with status {e.response.status_code}."
624
+ try:
625
+ error_json = e.response.json()
626
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
627
+ except requests.exceptions.JSONDecodeError:
628
+ error_detail += f" Response: {e.response.text[:500]}"
629
+ status_message = f"โŒ Submission Failed: {error_detail}"
630
+ print(status_message)
631
+ results_df = pd.DataFrame(results_log)
632
+ return status_message, results_df
633
+ except requests.exceptions.Timeout:
634
+ status_message = "โŒ Submission Failed: The request timed out."
635
+ print(status_message)
636
+ results_df = pd.DataFrame(results_log)
637
+ return status_message, results_df
638
+ except requests.exceptions.RequestException as e:
639
+ status_message = f"โŒ Submission Failed: Network error - {e}"
640
+ print(status_message)
641
+ results_df = pd.DataFrame(results_log)
642
+ return status_message, results_df
643
+ except Exception as e:
644
+ status_message = f"โŒ An unexpected error occurred during submission: {e}"
645
+ print(status_message)
646
+ results_df = pd.DataFrame(results_log)
647
+ return status_message, results_df
648
+
649
+
650
+ # --- Build Gradio Interface using Blocks ---
651
+ with gr.Blocks(title="GAIA Agent Evaluation") as demo:
652
+ gr.Markdown("# ๐Ÿค– Enhanced GAIA Agent Evaluation Runner")
653
+ gr.Markdown(
654
+ """
655
+ **Enhanced Agent for GAIA Benchmark Certification**
656
+
657
+ This enhanced agent uses Hugging Face's **smolagents** framework with multiple specialized tools:
658
+ - ๐Ÿ” **Web Search**: DuckDuckGoSearchTool (from base toolkit) for finding information
659
+ - ๐Ÿ **Python Interpreter**: Code execution capabilities (from base toolkit)
660
+ - ๐ŸŒ **Web Scraping**: Custom webpage visitor for content extraction
661
+ - ๐Ÿงฎ **Mathematics**: Advanced calculation capabilities
662
+ - ๐Ÿ“Š **Data Analysis**: Statistical analysis of numerical data
663
+ - ๐Ÿ”ข **Number Extraction**: Intelligent number parsing from text
664
+ - ๐Ÿ“ **Text Analysis**: Counting and text processing utilities
665
+ - ๐Ÿค– **LLM Model**: Llama-3.1-8B-Instruct for advanced reasoning
666
+
667
+ **Instructions:**
668
+ 1. ๐Ÿ”„ **Clone this space** and customize the agent as needed
669
+ 2. ๐Ÿ”‘ **Log in** to your Hugging Face account using the button below
670
+ 3. ๐Ÿš€ **Click 'Run Evaluation'** to test your agent on GAIA benchmark questions
671
+ 4. ๐ŸŽฏ **Target**: Score >30% for course certification
672
+
673
+ **Goal**: Answer GAIA level 1 validation questions with exact match precision.
674
+
675
+ ---
676
+ โš ๏ธ **Note**: Processing all questions may take several minutes due to the complexity of reasoning required.
677
+ """
678
+ )
679
+
680
+ gr.LoginButton()
681
+
682
+ run_button = gr.Button("๐Ÿš€ Run Evaluation & Submit All Answers", variant="primary", size="lg")
683
+
684
+ status_output = gr.Textbox(
685
+ label="๐Ÿ“Š Evaluation Status & Results",
686
+ lines=8,
687
+ interactive=False,
688
+ placeholder="Click the button above to start the evaluation..."
689
+ )
690
+
691
+ results_table = gr.DataFrame(
692
+ label="๐Ÿ“‹ Questions and Agent Responses",
693
+ wrap=True,
694
+ headers=["Task ID", "Question", "Submitted Answer"]
695
+ )
696
+
697
+ run_button.click(
698
+ fn=run_and_submit_all,
699
+ outputs=[status_output, results_table]
700
+ )
701
+
702
+ if __name__ == "__main__":
703
+ print("\n" + "="*60)
704
+ print("๐Ÿค– ENHANCED GAIA AGENT STARTING UP")
705
+ print("="*60)
706
+
707
+ # Setup authentication
708
+ print("๐Ÿ” Setting up HuggingFace authentication...")
709
+ auth_success = setup_authentication()
710
+
711
+ # Check for SPACE_HOST and SPACE_ID at startup for information
712
+ space_host_startup = os.getenv("SPACE_HOST")
713
+ space_id_startup = os.getenv("SPACE_ID")
714
+
715
+ if space_host_startup:
716
+ print(f"โœ… SPACE_HOST found: {space_host_startup}")
717
+ print(f" ๐ŸŒ Runtime URL: https://{space_host_startup}.hf.space")
718
+ else:
719
+ print("โ„น๏ธ SPACE_HOST environment variable not found (running locally?).")
720
+ if not auth_success:
721
+ print("๐Ÿ’ก For local testing, you may need to run:")
722
+ print(" from huggingface_hub import notebook_login")
723
+ print(" notebook_login()")
724
+
725
+ if space_id_startup:
726
+ print(f"โœ… SPACE_ID found: {space_id_startup}")
727
+ print(f" ๐Ÿ“ Repo URL: https://huggingface.co/spaces/{space_id_startup}")
728
+ print(f" ๐Ÿ”— Code URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
729
+ else:
730
+ print("โ„น๏ธ SPACE_ID environment variable not found (running locally?).")
731
+
732
+ print("="*60)
733
+ print("๐Ÿš€ Launching Enhanced GAIA Agent Interface...")
734
+ print("๐ŸŽฏ Target: >30% score on GAIA benchmark")
735
+ print("="*60 + "\n")
736
+
737
+ demo.launch(debug=True, share=False), result):
738
+ result = str(int(float(result)))
739
+
740
  result = result.strip()
741
 
742
  print(f"โœ… Agent response: {result}")