Agent_Course_Final_Assignment

Sleeping

App Files Files Community

Chris commited on May 29, 2025

Commit

83178da

1 Parent(s): 3a3e679

Final 5.8.3

Browse files

Files changed (5) hide show

src/__pycache__/app.cpython-310.pyc +0 -0
src/app.py +344 -41
src/production_deployment_guide.md +27 -4
src/tools/__pycache__/web_search_tool.cpython-310.pyc +0 -0
src/tools/web_search_tool.py +73 -27

src/__pycache__/app.cpython-310.pyc CHANGED Viewed

Binary files a/src/__pycache__/app.cpython-310.pyc and b/src/__pycache__/app.cpython-310.pyc differ

src/app.py CHANGED Viewed

@@ -13,6 +13,9 @@ import pandas as pd
 from typing import Optional, Tuple, Dict
 import tempfile
 from pathlib import Path
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -25,11 +28,207 @@ from models.qwen_client import QwenClient
 # Constants for Unit 4 API
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class GAIAAgentApp:
     """Production GAIA Agent Application with Unit 4 API integration"""
     def __init__(self, hf_token: Optional[str] = None):
         """Initialize the application with optional HF token"""
         try:
             # Try main QwenClient first
             from models.qwen_client import QwenClient
@@ -39,23 +238,30 @@ class GAIAAgentApp:
             # Test if client is working
             test_result = self.llm_client.generate("Test", max_tokens=5)
             if not test_result.success:
-                logger.warning("⚠️ Main client test failed, falling back to simple client")
                 raise Exception("Main client not working")
             self.initialized = True
             logger.info("✅ GAIA Agent system initialized with main client")
         except Exception as e:
-            logger.warning(f"⚠️ Main client failed ({e}), trying simple client...")
-            try:
-                # Fallback to simple client
-                from models.simple_client import SimpleClient
-                self.llm_client = SimpleClient(hf_token=hf_token)
-                self.workflow = SimpleGAIAWorkflow(self.llm_client)
-                self.initialized = True
-                logger.info("✅ GAIA Agent system initialized with simple client fallback")
-            except Exception as fallback_error:
-                logger.error(f"❌ Both main and fallback clients failed: {fallback_error}")
                 self.initialized = False
     @classmethod
@@ -92,7 +298,7 @@ class GAIAAgentApp:
         """
         if not self.initialized:
-            return "❌ System not initialized", "Please check logs for errors", ""
         if not question.strip():
             return "❌ Please provide a question", "", ""
@@ -302,6 +508,32 @@ def check_oauth_scopes(oauth_token: str) -> Dict[str, any]:
 def format_auth_status(profile: gr.OAuthProfile | None) -> str:
     """Format authentication status for display in UI"""
     if not profile:
         return """
 ### 🔐 Authentication Status: Not Logged In
@@ -311,6 +543,8 @@ Please log in to access GAIA evaluation features.
 **What you can do:**
 - ✅ Manual question testing (limited functionality)
 - ❌ Official GAIA benchmark evaluation (requires login)
 """
     username = profile.username
@@ -360,7 +594,7 @@ Please log in to access GAIA evaluation features.
         status_parts.extend([
             "",
             "💡 **Note**: Your OAuth token has limited scopes (common with Gradio OAuth).",
-            "The system automatically uses reliable fallback methods to ensure functionality."
         ])
     return "\n".join(status_parts)
@@ -368,18 +602,32 @@ Please log in to access GAIA evaluation features.
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions from Unit 4 API, runs the GAIA Agent on them, submits all answers,
-    and displays the results. Also returns updated authentication status.
     """
     # Get authentication status for display
     auth_status = format_auth_status(profile)
     # Get space info for code submission
     space_id = os.getenv("SPACE_ID")
-    if profile:
         username = f"{profile.username}"
         oauth_token = getattr(profile, 'oauth_token', None) or getattr(profile, 'token', None)
-        logger.info(f"User logged in: {username}, Token available: {oauth_token is not None}")
         # Check if OAuth token has sufficient scopes
         if oauth_token:
@@ -397,29 +645,27 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             except Exception as e:
                 logger.warning(f"⚠️ Could not validate OAuth token: {e}")
     else:
-        logger.info("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None, auth_status
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate GAIA Agent with OAuth token or fallback
     try:
-        if oauth_token:
-            logger.info("Creating GAIA Agent with validated OAuth token")
-            agent = GAIAAgentApp.create_with_oauth_token(oauth_token)
-        else:
-            logger.info("Creating GAIA Agent with fallback authentication (limited OAuth scopes detected)")
-            agent = GAIAAgentApp()  # This will automatically fallback to SimpleClient
         if not agent.initialized:
-            return "Error: GAIA Agent failed to initialize - using SimpleClient fallback for limited OAuth", None, auth_status
     except Exception as e:
         logger.error(f"Error instantiating agent: {e}")
-        return f"Error initializing GAIA Agent: {e}", None, auth_status
     # Agent code URL
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Development"
@@ -433,17 +679,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         questions_data = response.json()
         if not questions_data:
             logger.error("Fetched questions list is empty.")
-            return "Fetched questions list is empty or invalid format.", None, auth_status
         logger.info(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         logger.error(f"Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None, auth_status
     except requests.exceptions.JSONDecodeError as e:
         logger.error(f"Error decoding JSON response from questions endpoint: {e}")
-        return f"Error decoding server response for questions: {e}", None, auth_status
     except Exception as e:
         logger.error(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None, auth_status
     # 3. Run GAIA Agent
     results_log = []
@@ -478,7 +724,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not answers_payload:
         logger.error("GAIA Agent did not produce any answers to submit.")
-        return "GAIA Agent did not produce any answers to submit.", pd.DataFrame(results_log), auth_status
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
@@ -491,16 +737,37 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"🎉 GAIA Agent Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
         )
         logger.info("Submission successful.")
         results_df = pd.DataFrame(results_log)
-        return final_status, results_df, auth_status
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
@@ -511,22 +778,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         status_message = f"Submission Failed: {error_detail}"
         logger.error(status_message)
         results_df = pd.DataFrame(results_log)
-        return status_message, results_df, auth_status
     except requests.exceptions.Timeout:
         status_message = "Submission Failed: The request timed out."
         logger.error(status_message)
         results_df = pd.DataFrame(results_log)
-        return status_message, results_df, auth_status
     except requests.exceptions.RequestException as e:
         status_message = f"Submission Failed: Network error - {e}"
         logger.error(status_message)
         results_df = pd.DataFrame(results_log)
-        return status_message, results_df, auth_status
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         logger.error(status_message)
         results_df = pd.DataFrame(results_log)
-        return status_message, results_df, auth_status
 def create_interface():
     """Create the Gradio interface with both Unit 4 API and manual testing"""
@@ -864,6 +1131,29 @@ def create_interface():
                     label="Questions and GAIA Agent Answers",
                     wrap=True
                 )
         gr.Markdown("---")
@@ -932,9 +1222,22 @@ def create_interface():
                 )
         # Event handlers for Unit 4 API
         unit4_run_button.click(
-            fn=run_and_submit_all,
-            outputs=[unit4_status_output, unit4_results_table, auth_status_display]
         )
         # Refresh authentication status

 from typing import Optional, Tuple, Dict
 import tempfile
 from pathlib import Path
+import json
+from datetime import datetime
+import csv
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 # Constants for Unit 4 API
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+class GAIAResultLogger:
+    """
+    Logger for GAIA evaluation results with export functionality
+    """
+    def __init__(self):
+        self.results_dir = Path("results")
+        self.results_dir.mkdir(exist_ok=True)
+    def log_evaluation_results(self, username: str, questions_data: list, results_log: list,
+                             final_result: dict, execution_time: float) -> dict:
+        """
+        Log complete evaluation results to multiple formats
+        Returns paths to generated files
+        """
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        base_filename = f"gaia_evaluation_{username}_{timestamp}"
+        files_created = {}
+        try:
+            # 1. CSV Export (for easy sharing)
+            csv_path = self.results_dir / f"{base_filename}.csv"
+            self._save_csv_results(csv_path, results_log, final_result)
+            files_created["csv"] = str(csv_path)
+            # 2. Detailed JSON Export
+            json_path = self.results_dir / f"{base_filename}.json"
+            detailed_results = self._create_detailed_results(
+                username, questions_data, results_log, final_result, execution_time, timestamp
+            )
+            self._save_json_results(json_path, detailed_results)
+            files_created["json"] = str(json_path)
+            # 3. Summary Report
+            summary_path = self.results_dir / f"{base_filename}_summary.md"
+            self._save_summary_report(summary_path, detailed_results)
+            files_created["summary"] = str(summary_path)
+            logger.info(f"✅ Results logged to {len(files_created)} files: {list(files_created.keys())}")
+        except Exception as e:
+            logger.error(f"❌ Error logging results: {e}")
+            files_created["error"] = str(e)
+        return files_created
+    def _save_csv_results(self, path: Path, results_log: list, final_result: dict):
+        """Save results in CSV format for easy sharing"""
+        with open(path, 'w', newline='', encoding='utf-8') as csvfile:
+            if not results_log:
+                return
+            fieldnames = list(results_log[0].keys()) + ['Correct', 'Score']
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            # Header
+            writer.writeheader()
+            # Add overall results info
+            score = final_result.get('score', 'N/A')
+            correct_count = final_result.get('correct_count', 'N/A')
+            total_attempted = final_result.get('total_attempted', len(results_log))
+            # Write each result
+            for i, row in enumerate(results_log):
+                row_data = row.copy()
+                row_data['Correct'] = 'Unknown'  # We don't get individual correct/incorrect from API
+                row_data['Score'] = f"{score}% ({correct_count}/{total_attempted})" if i == 0 else ""
+                writer.writerow(row_data)
+    def _create_detailed_results(self, username: str, questions_data: list, results_log: list,
+                                final_result: dict, execution_time: float, timestamp: str) -> dict:
+        """Create comprehensive results dictionary"""
+        return {
+            "metadata": {
+                "username": username,
+                "timestamp": timestamp,
+                "execution_time_seconds": execution_time,
+                "total_questions": len(questions_data),
+                "total_processed": len(results_log),
+                "system_info": {
+                    "gradio_version": "4.44.0",
+                    "python_version": "3.x",
+                    "space_id": os.getenv("SPACE_ID", "local"),
+                    "space_host": os.getenv("SPACE_HOST", "local")
+                }
+            },
+            "evaluation_results": {
+                "overall_score": final_result.get('score', 'N/A'),
+                "correct_count": final_result.get('correct_count', 'N/A'),
+                "total_attempted": final_result.get('total_attempted', len(results_log)),
+                "success_rate": f"{final_result.get('score', 0)}%",
+                "api_message": final_result.get('message', 'No message'),
+                "submission_successful": 'score' in final_result
+            },
+            "question_details": [
+                {
+                    "index": i + 1,
+                    "task_id": item.get("task_id"),
+                    "question": item.get("question"),
+                    "level": item.get("Level", "Unknown"),
+                    "file_name": item.get("file_name", ""),
+                    "submitted_answer": next(
+                        (r["Submitted Answer"] for r in results_log if r.get("Task ID") == item.get("task_id")),
+                        "No answer"
+                    ),
+                    "question_length": len(item.get("question", "")),
+                    "answer_length": len(next(
+                        (r["Submitted Answer"] for r in results_log if r.get("Task ID") == item.get("task_id")),
+                        ""
+                    ))
+                }
+                for i, item in enumerate(questions_data)
+            ],
+            "processing_summary": {
+                "questions_by_level": self._analyze_questions_by_level(questions_data),
+                "questions_with_files": len([q for q in questions_data if q.get("file_name")]),
+                "average_question_length": sum(len(q.get("question", "")) for q in questions_data) / len(questions_data) if questions_data else 0,
+                "average_answer_length": sum(len(r.get("Submitted Answer", "")) for r in results_log) / len(results_log) if results_log else 0,
+                "processing_time_per_question": execution_time / len(results_log) if results_log else 0
+            },
+            "raw_results_log": results_log,
+            "api_response": final_result
+        }
+    def _analyze_questions_by_level(self, questions_data: list) -> dict:
+        """Analyze question distribution by level"""
+        level_counts = {}
+        for q in questions_data:
+            level = q.get("Level", "Unknown")
+            level_counts[level] = level_counts.get(level, 0) + 1
+        return level_counts
+    def _save_json_results(self, path: Path, detailed_results: dict):
+        """Save detailed results in JSON format"""
+        with open(path, 'w', encoding='utf-8') as jsonfile:
+            json.dump(detailed_results, jsonfile, indent=2, ensure_ascii=False)
+    def _save_summary_report(self, path: Path, detailed_results: dict):
+        """Save human-readable summary report"""
+        metadata = detailed_results["metadata"]
+        results = detailed_results["evaluation_results"]
+        summary = detailed_results["processing_summary"]
+        report = f"""# GAIA Agent Evaluation Report
+## Summary
+- **User**: {metadata['username']}
+- **Date**: {metadata['timestamp']}
+- **Overall Score**: {results['overall_score']}% ({results['correct_count']}/{results['total_attempted']} correct)
+- **Execution Time**: {metadata['execution_time_seconds']:.2f} seconds
+- **Submission Status**: {'✅ Success' if results['submission_successful'] else '❌ Failed'}
+## Question Analysis
+- **Total Questions**: {metadata['total_questions']}
+- **Successfully Processed**: {metadata['total_processed']}
+- **Questions with Files**: {summary['questions_with_files']}
+- **Average Question Length**: {summary['average_question_length']:.0f} characters
+- **Average Answer Length**: {summary['average_answer_length']:.0f} characters
+- **Processing Time per Question**: {summary['processing_time_per_question']:.2f} seconds
+## Questions by Level
+"""
+        for level, count in summary['questions_by_level'].items():
+            report += f"- **Level {level}**: {count} questions\n"
+        report += f"""
+## API Response
+{results['api_message']}
+## System Information
+- **Space ID**: {metadata['system_info']['space_id']}
+- **Space Host**: {metadata['system_info']['space_host']}
+- **Gradio Version**: {metadata['system_info']['gradio_version']}
+---
+*Report generated automatically by GAIA Agent System*
+"""
+        with open(path, 'w', encoding='utf-8') as f:
+            f.write(report)
+    def get_latest_results(self, username: str = None) -> list:
+        """Get list of latest result files"""
+        pattern = f"gaia_evaluation_{username}_*" if username else "gaia_evaluation_*"
+        files = list(self.results_dir.glob(pattern))
+        files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
+        return files[:10]  # Return 10 most recent
 class GAIAAgentApp:
     """Production GAIA Agent Application with Unit 4 API integration"""
     def __init__(self, hf_token: Optional[str] = None):
         """Initialize the application with optional HF token"""
+        # Priority order: 1) passed hf_token, 2) HF_TOKEN env var
+        if not hf_token:
+            hf_token = os.getenv("HF_TOKEN")
         try:
             # Try main QwenClient first
             from models.qwen_client import QwenClient
             # Test if client is working
             test_result = self.llm_client.generate("Test", max_tokens=5)
             if not test_result.success:
+                logger.error(f"❌ Main client test failed: {test_result}")
                 raise Exception("Main client not working")
             self.initialized = True
             logger.info("✅ GAIA Agent system initialized with main client")
         except Exception as e:
+            logger.error(f"❌ Main client failed ({e})")
+            # Only fallback to simple client if no HF token is available
+            if not hf_token:
+                logger.warning("⚠️ No HF token available, trying simple client...")
+                try:
+                    # Fallback to simple client
+                    from models.simple_client import SimpleClient
+                    self.llm_client = SimpleClient(hf_token=hf_token)
+                    self.workflow = SimpleGAIAWorkflow(self.llm_client)
+                    self.initialized = True
+                    logger.info("✅ GAIA Agent system initialized with simple client fallback")
+                except Exception as fallback_error:
+                    logger.error(f"❌ Both main and fallback clients failed: {fallback_error}")
+                    self.initialized = False
+            else:
+                logger.error("❌ Main client failed despite having HF token - not falling back to simple client")
                 self.initialized = False
     @classmethod
         """
         if not self.initialized:
+            return "❌ System not initialized", "", ""
         if not question.strip():
             return "❌ Please provide a question", "", ""
 def format_auth_status(profile: gr.OAuthProfile | None) -> str:
     """Format authentication status for display in UI"""
+    # Check for HF_TOKEN first
+    hf_token = os.getenv("HF_TOKEN")
+    if hf_token:
+        # HF_TOKEN is available - this is the best case scenario
+        return """
+### 🎯 Authentication Status: HF_TOKEN Environment Variable
+**🚀 FULL SYSTEM CAPABILITIES ENABLED**
+**Authentication Source**: HF_TOKEN environment variable
+**Scopes**: read, inference (full access)
+**Available Features:**
+- ✅ **Advanced Model Access**: Full Qwen model capabilities (7B/32B/72B)
+- ✅ **High Performance**: 30%+ expected GAIA score
+- ✅ **Complete Pipeline**: All agents and tools fully functional
+- ✅ **Web Research**: Full DuckDuckGo search capabilities
+- ✅ **File Processing**: Complete multi-format file handling
+- ✅ **Manual Testing**: Individual question processing
+- ✅ **Official Evaluation**: GAIA benchmark submission
+💡 **Status**: Optimal configuration for GAIA benchmark performance.
+"""
     if not profile:
         return """
 ### 🔐 Authentication Status: Not Logged In
 **What you can do:**
 - ✅ Manual question testing (limited functionality)
 - ❌ Official GAIA benchmark evaluation (requires login)
+**For Best Performance**: Set HF_TOKEN as a Space secret for full capabilities.
 """
     username = profile.username
         status_parts.extend([
             "",
             "💡 **Note**: Your OAuth token has limited scopes (common with Gradio OAuth).",
+            "For best performance, set HF_TOKEN as a Space secret for full model access."
         ])
     return "\n".join(status_parts)
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions from Unit 4 API, runs the GAIA Agent on them, submits all answers,
+    and displays the results. Also returns updated authentication status and downloadable files.
     """
+    start_time = time.time()
+    # Initialize result logger
+    result_logger = GAIAResultLogger()
     # Get authentication status for display
     auth_status = format_auth_status(profile)
     # Get space info for code submission
     space_id = os.getenv("SPACE_ID")
+    # Priority order for token: 1) HF_TOKEN env var, 2) OAuth token
+    hf_token = os.getenv("HF_TOKEN")
+    oauth_token = None
+    username = "unknown_user"
+    if hf_token:
+        logger.info("🎯 Using HF_TOKEN environment variable for authentication")
+        oauth_token = hf_token
+        username = "hf_token_user"
+    elif profile:
         username = f"{profile.username}"
         oauth_token = getattr(profile, 'oauth_token', None) or getattr(profile, 'token', None)
+        logger.info(f"User logged in: {username}, OAuth token available: {oauth_token is not None}")
         # Check if OAuth token has sufficient scopes
         if oauth_token:
             except Exception as e:
                 logger.warning(f"⚠️ Could not validate OAuth token: {e}")
     else:
+        logger.info("User not logged in and no HF_TOKEN available.")
+        return "Please either login to Hugging Face or set HF_TOKEN environment variable.", None, auth_status, None, None, None
+    if not oauth_token:
+        return "No valid authentication token available. Please login or set HF_TOKEN environment variable.", None, auth_status, None, None, None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate GAIA Agent with token
     try:
+        logger.info("🚀 Creating GAIA Agent with authenticated token")
+        agent = GAIAAgentApp.create_with_oauth_token(oauth_token)
         if not agent.initialized:
+            return "Error: GAIA Agent failed to initialize", None, auth_status, None, None, None
     except Exception as e:
         logger.error(f"Error instantiating agent: {e}")
+        return f"Error initializing GAIA Agent: {e}", None, auth_status, None, None, None
     # Agent code URL
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Development"
         questions_data = response.json()
         if not questions_data:
             logger.error("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None, auth_status, None, None, None
         logger.info(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         logger.error(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None, auth_status, None, None, None
     except requests.exceptions.JSONDecodeError as e:
         logger.error(f"Error decoding JSON response from questions endpoint: {e}")
+        return f"Error decoding server response for questions: {e}", None, auth_status, None, None, None
     except Exception as e:
         logger.error(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None, auth_status, None, None, None
     # 3. Run GAIA Agent
     results_log = []
     if not answers_payload:
         logger.error("GAIA Agent did not produce any answers to submit.")
+        return "GAIA Agent did not produce any answers to submit.", pd.DataFrame(results_log), auth_status, None, None, None
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
         response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
+        # Calculate execution time
+        execution_time = time.time() - start_time
+        # 6. Log results to files
+        logger.info("📝 Logging evaluation results...")
+        logged_files = result_logger.log_evaluation_results(
+            username=username,
+            questions_data=questions_data,
+            results_log=results_log,
+            final_result=result_data,
+            execution_time=execution_time
+        )
+        # Prepare download files
+        csv_file = logged_files.get("csv")
+        json_file = logged_files.get("json")
+        summary_file = logged_files.get("summary")
         final_status = (
             f"🎉 GAIA Agent Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Execution Time: {execution_time:.2f} seconds\n"
+            f"Message: {result_data.get('message', 'No message received.')}\n\n"
+            f"📁 Results saved to {len([f for f in [csv_file, json_file, summary_file] if f])} files for sharing."
         )
         logger.info("Submission successful.")
         results_df = pd.DataFrame(results_log)
+        return final_status, results_df, auth_status, csv_file, json_file, summary_file
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
         status_message = f"Submission Failed: {error_detail}"
         logger.error(status_message)
         results_df = pd.DataFrame(results_log)
+        return status_message, results_df, auth_status, None, None, None
     except requests.exceptions.Timeout:
         status_message = "Submission Failed: The request timed out."
         logger.error(status_message)
         results_df = pd.DataFrame(results_log)
+        return status_message, results_df, auth_status, None, None, None
     except requests.exceptions.RequestException as e:
         status_message = f"Submission Failed: Network error - {e}"
         logger.error(status_message)
         results_df = pd.DataFrame(results_log)
+        return status_message, results_df, auth_status, None, None, None
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         logger.error(status_message)
         results_df = pd.DataFrame(results_log)
+        return status_message, results_df, auth_status, None, None, None
 def create_interface():
     """Create the Gradio interface with both Unit 4 API and manual testing"""
                     label="Questions and GAIA Agent Answers",
                     wrap=True
                 )
+                # Download section
+                gr.Markdown("### 📁 Download Results")
+                gr.Markdown("After evaluation completes, download your results in different formats:")
+                with gr.Row():
+                    csv_download = gr.File(
+                        label="📊 CSV Results",
+                        visible=False,
+                        interactive=False
+                    )
+                    json_download = gr.File(
+                        label="🔍 Detailed JSON",
+                        visible=False,
+                        interactive=False
+                    )
+                    summary_download = gr.File(
+                        label="📋 Summary Report",
+                        visible=False,
+                        interactive=False
+                    )
         gr.Markdown("---")
                 )
         # Event handlers for Unit 4 API
+        def handle_evaluation_results(profile):
+            """Handle evaluation and update download visibility"""
+            results = run_and_submit_all(profile)
+            status, table, auth_status, csv_file, json_file, summary_file = results
+            # Update download file visibility and values
+            csv_update = gr.update(value=csv_file, visible=csv_file is not None)
+            json_update = gr.update(value=json_file, visible=json_file is not None)
+            summary_update = gr.update(value=summary_file, visible=summary_file is not None)
+            return status, table, auth_status, csv_update, json_update, summary_update
         unit4_run_button.click(
+            fn=handle_evaluation_results,
+            outputs=[unit4_status_output, unit4_results_table, auth_status_display,
+                    csv_download, json_download, summary_download]
         )
         # Refresh authentication status

src/production_deployment_guide.md CHANGED Viewed

@@ -143,18 +143,41 @@ suggested_storage: "small"
 ### 4. Environment Variables (Space Secrets)
-**⚠️ IMPORTANT**: Do NOT set `HF_TOKEN` as a Space secret!
-The system uses OAuth authentication in production.
-Optional environment variables:
 ```bash
-# Only set these if needed for specific features
 LANGCHAIN_TRACING_V2=true           # Optional: LangSmith tracing
 LANGCHAIN_API_KEY=your_key_here     # Optional: LangSmith API key
 LANGCHAIN_PROJECT=gaia-agent        # Optional: LangSmith project
 ```
 ### 5. Authentication Flow in Production
 ```python

 ### 4. Environment Variables (Space Secrets)
+**🎯 CRITICAL: Set HF_TOKEN for Full Model Access**
+To get the **real GAIA Agent performance** (not SimpleClient fallback), you **MUST** set `HF_TOKEN` as a Space secret:
 ```bash
+# Required for full model access and GAIA performance
+HF_TOKEN=hf_your_token_here                # REQUIRED: Your HuggingFace token
+```
+**How to set HF_TOKEN:**
+1. Go to your Space settings in HuggingFace
+2. Navigate to "Repository secrets"
+3. Add new secret:
+   - **Name**: `HF_TOKEN`
+   - **Value**: Your HuggingFace token (from [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens))
+⚠️ **IMPORTANT**: Do NOT set `HF_TOKEN` as a regular environment variable - use Space secrets for security.
+**Token Requirements:**
+- Token must have **`read`** and **`inference`** scopes
+- Generate token at: https://huggingface.co/settings/tokens
+- Select "Fine-grained" token type
+- Enable both scopes for full functionality
+**Optional environment variables:**
+```bash
+# Optional: LangSmith tracing (if you want observability)
 LANGCHAIN_TRACING_V2=true           # Optional: LangSmith tracing
 LANGCHAIN_API_KEY=your_key_here     # Optional: LangSmith API key
 LANGCHAIN_PROJECT=gaia-agent        # Optional: LangSmith project
 ```
+**⚠️ DO NOT SET**: The system automatically handles OAuth in production when HF_TOKEN is available.
 ### 5. Authentication Flow in Production
 ```python

src/tools/__pycache__/web_search_tool.cpython-310.pyc CHANGED Viewed

Binary files a/src/tools/__pycache__/web_search_tool.cpython-310.pyc and b/src/tools/__pycache__/web_search_tool.cpython-310.pyc differ

src/tools/web_search_tool.py CHANGED Viewed

@@ -88,17 +88,21 @@ class WebSearchTool(BaseTool):
     def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """
-        Search the web using DuckDuckGo with retry mechanisms
         """
-        max_retries = 3
-        retry_delay = 1.0
-        for attempt in range(max_retries):
             try:
-                logger.info(f"Searching web for: {query} (attempt {attempt + 1}/{max_retries})")
-                # Perform DuckDuckGo search with timeout
                 with DDGS() as ddgs:
                     search_results = list(ddgs.text(
                         keywords=query,
                         max_results=limit,
@@ -107,10 +111,8 @@ class WebSearchTool(BaseTool):
                     ))
                 if not search_results:
-                    if attempt < max_retries - 1:
                         logger.warning(f"No results on attempt {attempt + 1}, retrying...")
-                        time.sleep(retry_delay)
-                        retry_delay *= 2  # Exponential backoff
                         continue
                     else:
                         return {
@@ -154,30 +156,74 @@ class WebSearchTool(BaseTool):
                     "total_results": len(results),
                     "message": f"Found {len(results)} web search results"
                 }
             except Exception as e:
-                logger.warning(f"Web search attempt {attempt + 1} failed: {str(e)}")
-                if attempt < max_retries - 1:
-                    time.sleep(retry_delay)
-                    retry_delay *= 2  # Exponential backoff
-                    continue
                 else:
-                    # Final attempt failed, but don't raise exception
-                    logger.error(f"Web search failed after {max_retries} attempts: {str(e)}")
-                    return {
-                        "query": query,
-                        "found": False,
-                        "message": f"Web search failed after retries: {str(e)}",
-                        "results": [],
-                        "error_type": "search_failure"
-                    }
-        # Should not reach here, but just in case
         return {
             "query": query,
             "found": False,
-            "message": "Unexpected search failure",
-            "results": []
         }
     def _extract_content_from_url(self, url: str) -> Dict[str, Any]:

     def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """
+        Search the web using DuckDuckGo with enhanced rate limiting handling
         """
+        for attempt in range(3):
             try:
+                logger.info(f"Searching web for: {query} (attempt {attempt + 1}/3)")
+                # Progressive delays to handle rate limiting
+                if attempt > 0:
+                    delay = 5 * (2 ** (attempt - 1))  # 5s, 10s delays
+                    logger.info(f"Waiting {delay}s before retry due to rate limiting...")
+                    time.sleep(delay)
                 with DDGS() as ddgs:
+                    # Use DuckDuckGo search with proper parameters
                     search_results = list(ddgs.text(
                         keywords=query,
                         max_results=limit,
                     ))
                 if not search_results:
+                    if attempt < 2:
                         logger.warning(f"No results on attempt {attempt + 1}, retrying...")
                         continue
                     else:
                         return {
                     "total_results": len(results),
                     "message": f"Found {len(results)} web search results"
                 }
             except Exception as e:
+                error_msg = str(e)
+                if "ratelimit" in error_msg.lower() or "rate limit" in error_msg.lower() or "403" in error_msg or "202" in error_msg or "429" in error_msg:
+                    logger.warning(f"Web search attempt {attempt + 1} failed: {error_msg}")
+                    if attempt < 2:
+                        continue
                 else:
+                    logger.error(f"Web search attempt {attempt + 1} failed with non-rate-limit error: {error_msg}")
+                    if attempt < 2:
+                        continue
+        # If all attempts failed, try fallback search strategy
+        logger.warning("All DuckDuckGo attempts failed, trying fallback search strategy...")
+        return self._fallback_search(query)
+    def _fallback_search(self, query: str) -> Dict[str, Any]:
+        """
+        Fallback search strategy when DuckDuckGo is completely unavailable
+        """
+        try:
+            # Try a simple Wikipedia search as fallback
+            import wikipedia
+            wikipedia.set_lang("en")
+            # Extract key terms from query for Wikipedia search
+            search_terms = query.replace("site:", "").strip()
+            try:
+                # Search Wikipedia pages
+                wiki_results = wikipedia.search(search_terms, results=3)
+                if wiki_results:
+                    fallback_results = []
+                    for i, page_title in enumerate(wiki_results[:2], 1):
+                        try:
+                            page = wikipedia.page(page_title)
+                            summary = page.summary[:200] + "..." if len(page.summary) > 200 else page.summary
+                            web_result = WebSearchResult(
+                                title=f"{page_title} (Wikipedia)",
+                                url=page.url,
+                                snippet=summary
+                            )
+                            fallback_results.append(web_result.to_dict())
+                        except:
+                            continue
+                    if fallback_results:
+                        return {
+                            "query": query,
+                            "found": True,
+                            "results": fallback_results,
+                            "total_results": len(fallback_results),
+                            "message": f"Using Wikipedia fallback search. Found {len(fallback_results)} results"
+                        }
+            except:
+                pass
+        except ImportError:
+            pass
+        # Last resort: return a helpful message
         return {
             "query": query,
             "found": False,
+            "message": "❌ Web search failed due to rate limiting. Please try again later or provide the information directly.",
+            "results": [],
+            "error_type": "search_failure"
         }
     def _extract_content_from_url(self, url: str) -> Dict[str, Any]: