vissutagunawan commited on
Commit
b72239b
·
verified ·
1 Parent(s): 4691df3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -243
app.py CHANGED
@@ -365,7 +365,7 @@ class GAIAAgent:
365
  # Initialize the model with fallback options
366
  try:
367
  # Try powerful model first - but use one that's more widely available
368
- model_id = "meta-llama/Llama-3.1-8B-Instruct"
369
  self.model = InferenceClientModel(model_id=model_id)
370
  print(f"✅ Model initialized successfully: {model_id}")
371
  except Exception as e:
@@ -494,247 +494,7 @@ Think step by step, use the appropriate tools, and provide only the final answer
494
  result = result[1:-1]
495
 
496
  # Clean up decimal numbers (e.g., "42.0" -> "42")
497
- if re.match(r'^\d+\.0+
498
-
499
- def run_and_submit_all(profile: gr.OAuthProfile | None):
500
- """
501
- Fetches all questions, runs the GAIAAgent on them, submits all answers,
502
- and displays the results.
503
- """
504
- # --- Determine HF Space Runtime URL and Repo URL ---
505
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
506
-
507
- if profile:
508
- username = f"{profile.username}"
509
- print(f"User logged in: {username}")
510
- else:
511
- print("User not logged in.")
512
- return "Please Login to Hugging Face with the button.", None
513
-
514
- api_url = DEFAULT_API_URL
515
- questions_url = f"{api_url}/questions"
516
- submit_url = f"{api_url}/submit"
517
-
518
- # 1. Instantiate Enhanced Agent
519
- try:
520
- print("🚀 Initializing GAIA Agent with smolagents...")
521
- agent = GAIAAgent()
522
- print("✅ Enhanced agent ready for GAIA benchmark!")
523
- except Exception as e:
524
- error_msg = f"Error initializing agent: {e}"
525
- print(f"❌ {error_msg}")
526
- return error_msg, None
527
-
528
- # In the case of an app running as a hugging Face space, this link points toward your codebase
529
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
530
- print(f"Agent code link: {agent_code}")
531
-
532
- # 2. Fetch Questions
533
- print(f"📥 Fetching questions from: {questions_url}")
534
- try:
535
- response = requests.get(questions_url, timeout=15)
536
- response.raise_for_status()
537
- questions_data = response.json()
538
- if not questions_data:
539
- print("Fetched questions list is empty.")
540
- return "Fetched questions list is empty or invalid format.", None
541
- print(f"✅ Fetched {len(questions_data)} questions from GAIA benchmark.")
542
- except requests.exceptions.RequestException as e:
543
- print(f"❌ Error fetching questions: {e}")
544
- return f"Error fetching questions: {e}", None
545
- except requests.exceptions.JSONDecodeError as e:
546
- print(f"❌ Error decoding JSON response from questions endpoint: {e}")
547
- print(f"Response text: {response.text[:500]}")
548
- return f"Error decoding server response for questions: {e}", None
549
- except Exception as e:
550
- print(f"❌ An unexpected error occurred fetching questions: {e}")
551
- return f"An unexpected error occurred fetching questions: {e}", None
552
-
553
- # 3. Run Enhanced Agent
554
- results_log = []
555
- answers_payload = []
556
- print(f"🤖 Running enhanced GAIA agent on {len(questions_data)} questions...")
557
-
558
- for i, item in enumerate(questions_data, 1):
559
- task_id = item.get("task_id")
560
- question_text = item.get("question")
561
- if not task_id or question_text is None:
562
- print(f"⚠️ Skipping item with missing task_id or question: {item}")
563
- continue
564
-
565
- print(f"\n📝 Processing question {i}/{len(questions_data)} (ID: {task_id})")
566
- try:
567
- submitted_answer = agent(question_text)
568
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
569
- results_log.append({
570
- "Task ID": task_id,
571
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
572
- "Submitted Answer": submitted_answer
573
- })
574
- print(f"✅ Answer for {task_id}: {submitted_answer}")
575
- except Exception as e:
576
- error_msg = f"AGENT ERROR: {e}"
577
- print(f"❌ Error running agent on task {task_id}: {e}")
578
- answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
579
- results_log.append({
580
- "Task ID": task_id,
581
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
582
- "Submitted Answer": error_msg
583
- })
584
-
585
- if not answers_payload:
586
- print("❌ Agent did not produce any answers to submit.")
587
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
588
-
589
- # 4. Prepare Submission
590
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
591
- status_update = f"🚀 Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..."
592
- print(status_update)
593
-
594
- # 5. Submit
595
- print(f"📤 Submitting {len(answers_payload)} answers to: {submit_url}")
596
- try:
597
- response = requests.post(submit_url, json=submission_data, timeout=60)
598
- response.raise_for_status()
599
- result_data = response.json()
600
-
601
- score = result_data.get('score', 'N/A')
602
- correct_count = result_data.get('correct_count', '?')
603
- total_attempted = result_data.get('total_attempted', '?')
604
-
605
- final_status = (
606
- f"🎉 Submission Successful!\n"
607
- f"👤 User: {result_data.get('username')}\n"
608
- f"📊 Overall Score: {score}% ({correct_count}/{total_attempted} correct)\n"
609
- f"🎯 Target: >30% for certification\n"
610
- f"💬 Message: {result_data.get('message', 'No message received.')}"
611
- )
612
-
613
- if isinstance(score, (int, float)) and score >= 30:
614
- final_status += f"\n🏆 CONGRATULATIONS! You've achieved the target score of 30%!"
615
- elif isinstance(score, (int, float)):
616
- final_status += f"\n📈 Keep improving! You need {30-score:.1f}% more to reach the target."
617
-
618
- print("✅ Submission successful!")
619
- results_df = pd.DataFrame(results_log)
620
- return final_status, results_df
621
-
622
- except requests.exceptions.HTTPError as e:
623
- error_detail = f"Server responded with status {e.response.status_code}."
624
- try:
625
- error_json = e.response.json()
626
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
627
- except requests.exceptions.JSONDecodeError:
628
- error_detail += f" Response: {e.response.text[:500]}"
629
- status_message = f"❌ Submission Failed: {error_detail}"
630
- print(status_message)
631
- results_df = pd.DataFrame(results_log)
632
- return status_message, results_df
633
- except requests.exceptions.Timeout:
634
- status_message = "❌ Submission Failed: The request timed out."
635
- print(status_message)
636
- results_df = pd.DataFrame(results_log)
637
- return status_message, results_df
638
- except requests.exceptions.RequestException as e:
639
- status_message = f"❌ Submission Failed: Network error - {e}"
640
- print(status_message)
641
- results_df = pd.DataFrame(results_log)
642
- return status_message, results_df
643
- except Exception as e:
644
- status_message = f"❌ An unexpected error occurred during submission: {e}"
645
- print(status_message)
646
- results_df = pd.DataFrame(results_log)
647
- return status_message, results_df
648
-
649
-
650
- # --- Build Gradio Interface using Blocks ---
651
- with gr.Blocks(title="GAIA Agent Evaluation") as demo:
652
- gr.Markdown("# 🤖 Enhanced GAIA Agent Evaluation Runner")
653
- gr.Markdown(
654
- """
655
- **Enhanced Agent for GAIA Benchmark Certification**
656
-
657
- This enhanced agent uses Hugging Face's **smolagents** framework with multiple specialized tools:
658
- - 🔍 **Web Search**: DuckDuckGoSearchTool (from base toolkit) for finding information
659
- - 🐍 **Python Interpreter**: Code execution capabilities (from base toolkit)
660
- - 🌐 **Web Scraping**: Custom webpage visitor for content extraction
661
- - 🧮 **Mathematics**: Advanced calculation capabilities
662
- - 📊 **Data Analysis**: Statistical analysis of numerical data
663
- - 🔢 **Number Extraction**: Intelligent number parsing from text
664
- - 📝 **Text Analysis**: Counting and text processing utilities
665
- - 🤖 **LLM Model**: Llama-3.1-8B-Instruct for advanced reasoning
666
-
667
- **Instructions:**
668
- 1. 🔄 **Clone this space** and customize the agent as needed
669
- 2. 🔑 **Log in** to your Hugging Face account using the button below
670
- 3. 🚀 **Click 'Run Evaluation'** to test your agent on GAIA benchmark questions
671
- 4. 🎯 **Target**: Score >30% for course certification
672
-
673
- **Goal**: Answer GAIA level 1 validation questions with exact match precision.
674
-
675
- ---
676
- ⚠️ **Note**: Processing all questions may take several minutes due to the complexity of reasoning required.
677
- """
678
- )
679
-
680
- gr.LoginButton()
681
-
682
- run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary", size="lg")
683
-
684
- status_output = gr.Textbox(
685
- label="📊 Evaluation Status & Results",
686
- lines=8,
687
- interactive=False,
688
- placeholder="Click the button above to start the evaluation..."
689
- )
690
-
691
- results_table = gr.DataFrame(
692
- label="📋 Questions and Agent Responses",
693
- wrap=True,
694
- headers=["Task ID", "Question", "Submitted Answer"]
695
- )
696
-
697
- run_button.click(
698
- fn=run_and_submit_all,
699
- outputs=[status_output, results_table]
700
- )
701
-
702
- if __name__ == "__main__":
703
- print("\n" + "="*60)
704
- print("🤖 ENHANCED GAIA AGENT STARTING UP")
705
- print("="*60)
706
-
707
- # Setup authentication
708
- print("🔐 Setting up HuggingFace authentication...")
709
- auth_success = setup_authentication()
710
-
711
- # Check for SPACE_HOST and SPACE_ID at startup for information
712
- space_host_startup = os.getenv("SPACE_HOST")
713
- space_id_startup = os.getenv("SPACE_ID")
714
-
715
- if space_host_startup:
716
- print(f"✅ SPACE_HOST found: {space_host_startup}")
717
- print(f" 🌐 Runtime URL: https://{space_host_startup}.hf.space")
718
- else:
719
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
720
- if not auth_success:
721
- print("💡 For local testing, you may need to run:")
722
- print(" from huggingface_hub import notebook_login")
723
- print(" notebook_login()")
724
-
725
- if space_id_startup:
726
- print(f"✅ SPACE_ID found: {space_id_startup}")
727
- print(f" 📁 Repo URL: https://huggingface.co/spaces/{space_id_startup}")
728
- print(f" 🔗 Code URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
729
- else:
730
- print("ℹ️ SPACE_ID environment variable not found (running locally?).")
731
-
732
- print("="*60)
733
- print("🚀 Launching Enhanced GAIA Agent Interface...")
734
- print("🎯 Target: >30% score on GAIA benchmark")
735
- print("="*60 + "\n")
736
-
737
- demo.launch(debug=True, share=False), result):
738
  result = str(int(float(result)))
739
 
740
  result = result.strip()
@@ -916,7 +676,7 @@ with gr.Blocks(title="GAIA Agent Evaluation") as demo:
916
  - 📊 **Data Analysis**: Statistical analysis of numerical data
917
  - 🔢 **Number Extraction**: Intelligent number parsing from text
918
  - 📝 **Text Analysis**: Counting and text processing utilities
919
- - 🤖 **LLM Model**: Llama-3.1-8B-Instruct for advanced reasoning
920
 
921
  **Instructions:**
922
  1. 🔄 **Clone this space** and customize the agent as needed
 
365
  # Initialize the model with fallback options
366
  try:
367
  # Try powerful model first - but use one that's more widely available
368
+ model_id = "meta-llama/Llama-3.3-70B-Instruct"
369
  self.model = InferenceClientModel(model_id=model_id)
370
  print(f"✅ Model initialized successfully: {model_id}")
371
  except Exception as e:
 
494
  result = result[1:-1]
495
 
496
  # Clean up decimal numbers (e.g., "42.0" -> "42")
497
+ if re.match(r'^\d+\.0+$', result):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
  result = str(int(float(result)))
499
 
500
  result = result.strip()
 
676
  - 📊 **Data Analysis**: Statistical analysis of numerical data
677
  - 🔢 **Number Extraction**: Intelligent number parsing from text
678
  - 📝 **Text Analysis**: Counting and text processing utilities
679
+ - 🤖 **LLM Model**: Llama-3.3-70B-Instruct for advanced reasoning
680
 
681
  **Instructions:**
682
  1. 🔄 **Clone this space** and customize the agent as needed