Agent_Course_Final_Assignment

Sleeping

App Files Files Community

Chris commited on May 29, 2025

Commit

6f7648f

1 Parent(s): f798fcd

Final 6.5.3

Browse files

Files changed (1) hide show

src/app.py +207 -307

src/app.py CHANGED Viewed

@@ -1282,32 +1282,28 @@ def create_interface():
                     """
 ### 🔐 Authentication Status: Not Logged In
-Please log in to access GAIA evaluation features with full inference access.
-**What you can do:**
-- ✅ Manual question testing (limited functionality)
-- ❌ Official GAIA benchmark evaluation (requires login)
-**🔑 OAuth Configuration**: Login now requests both `read` and `inference` scopes for optimal performance.
-**📈 Expected Performance**: 30%+ GAIA score with full inference access.
 """,
                     elem_classes=["oauth-login"]
                 )
                 with gr.Row():
-                    login_button = gr.LoginButton(
-                        value="🔑 Login with Full Inference Access",
-                        # Note: Gradio 4.44.0 may not support scopes parameter directly
-                        # The scopes will be configured at the interface level
-                    )
                     refresh_auth_button = gr.Button("🔄 Refresh Auth Status", variant="secondary", scale=1)
-                    debug_auth_button = gr.Button("🔍 Debug OAuth", variant="secondary", scale=1)
                 unit4_run_button = gr.Button(
-                    "🔒 Login Required for GAIA Evaluation",
                     variant="primary",
-                    scale=2,
-                    interactive=False  # Disabled until login
                 )
                 unit4_status_output = gr.Textbox(
@@ -1426,344 +1422,242 @@ Please log in to access GAIA evaluation features with full inference access.
                     elem_classes=["reasoning-box"]
                 )
-        # Event handlers for Unit 4 API
-        def handle_evaluation_results(request: gr.Request):
-            """Handle evaluation and update download visibility"""
-            # Use the same OAuth profile extraction logic that works in other functions
-            profile = None
-            oauth_token = None
-            username = None
             try:
-                # Try to get OAuth info from request using multiple methods
-                if hasattr(request, 'user') and request.user:
-                    username = getattr(request.user, 'username', None)
-                    if username:
-                        # Create a profile-like object if we have user info
-                        class GradioProfile:
-                            def __init__(self, username):
-                                self.username = username
-                                self.oauth_token = None
-                        profile = GradioProfile(username)
-                        logger.info(f"🔑 Found user via request.user: {username}")
-                # Try additional Gradio OAuth attributes
-                if not profile:
-                    # Check for standard Gradio OAuth profile
-                    for attr in ['oauth_profile', 'profile', 'user_profile']:
-                        if hasattr(request, attr):
-                            oauth_profile = getattr(request, attr)
-                            logger.info(f"🔍 DEBUG: Found request.{attr} = {type(oauth_profile)}")
-                            if oauth_profile and hasattr(oauth_profile, 'username'):
-                                profile = oauth_profile
-                                username = oauth_profile.username
-                                logger.info(f"🔑 Found profile via request.{attr}: {username}")
-                                break
-                # Alternative: Check session or headers for OAuth token
-                if hasattr(request, 'session'):
-                    session = request.session
-                    logger.info(f"🔍 DEBUG: Session available, keys: {list(session.keys()) if hasattr(session, 'keys') else 'no keys method'}")
-                    oauth_token = session.get('oauth_token') or session.get('access_token')
-                    if oauth_token:
-                        logger.info("🔑 Found OAuth token in session")
-                # Check request headers for authorization
-                if hasattr(request, 'headers'):
-                    auth_header = request.headers.get('authorization', '')
-                    logger.info(f"🔍 DEBUG: Authorization header present: {bool(auth_header)}")
-                    if auth_header.startswith('Bearer '):
-                        oauth_token = auth_header[7:]
-                        logger.info("🔑 Found OAuth token in headers")
-                # Try to extract token from profile if we have one
-                if profile and not oauth_token:
-                    profile_attrs = [attr for attr in dir(profile) if not attr.startswith('_')]
-                    logger.info(f"🔍 DEBUG: Profile attributes: {profile_attrs}")
-                    for token_attr in ['oauth_token', 'token', 'access_token', 'id_token', 'bearer_token']:
-                        if hasattr(profile, token_attr):
-                            token = getattr(profile, token_attr)
-                            if token:
-                                oauth_token = token
-                                logger.info(f"🔑 Found OAuth token via profile.{token_attr}")
-                                break
-                # If we found a token, add it to the profile
-                if oauth_token and profile:
-                    profile.oauth_token = oauth_token
-                    logger.info(f"✅ OAuth profile created: user={username}, token=present")
-                elif profile and not oauth_token:
-                    logger.info(f"✅ OAuth profile created: user={username}, token=missing")
-                elif not profile and not oauth_token:
-                    logger.warning("⚠️ No OAuth profile or token found in request")
             except Exception as e:
-                logger.error(f"❌ Error extracting OAuth profile: {e}")
-                profile = None
-            results = run_and_submit_all(profile)
-            status, table, auth_status, csv_file, json_file, summary_file = results
-            # Update download file visibility and values
-            csv_update = gr.update(value=csv_file, visible=csv_file is not None)
-            json_update = gr.update(value=json_file, visible=json_file is not None)
-            summary_update = gr.update(value=summary_file, visible=summary_file is not None)
-            return status, table, auth_status, csv_update, json_update, summary_update
-        def refresh_auth_status(request: gr.Request):
-            """Refresh authentication status display with enhanced debugging"""
             try:
-                # Use Gradio's built-in OAuth support
-                # In newer Gradio versions with HF Spaces, OAuth info should be accessible
-                profile = None
-                oauth_token = None
-                username = None
-                # Try to get OAuth info from request
-                if hasattr(request, 'user') and request.user:
-                    username = getattr(request.user, 'username', None)
-                    if username:
-                        # Create a profile-like object if we have user info
-                        class GradioProfile:
-                            def __init__(self, username):
-                                self.username = username
-                                self.oauth_token = None
-                        profile = GradioProfile(username)
-                # Alternative: Check session or headers for OAuth token
-                if hasattr(request, 'session'):
-                    session = request.session
-                    oauth_token = session.get('oauth_token') or session.get('access_token')
-                # Check request headers for authorization
-                if hasattr(request, 'headers'):
-                    auth_header = request.headers.get('authorization', '')
-                    if auth_header.startswith('Bearer '):
-                        oauth_token = auth_header[7:]
-                # If we found a token, add it to the profile
-                if oauth_token and profile:
-                    profile.oauth_token = oauth_token
-                logger.info(f"🔍 OAuth Debug - Profile: {profile is not None}, Username: {username}, Token: {oauth_token is not None}")
-                return format_auth_status(profile)
-            except Exception as e:
-                logger.error(f"❌ Error in refresh_auth_status: {e}")
-                # Fallback: Check environment variables and provide helpful info
-                oauth_scopes = os.getenv("OAUTH_SCOPES")
-                oauth_client_id = os.getenv("OAUTH_CLIENT_ID")
-                if oauth_client_id and oauth_scopes:
-                    return f"""
-### 🔍 OAuth Configuration Detected
-**🏠 Space OAuth**: ✅ Configured with scopes: {oauth_scopes}
-**⚠️ Authentication Detection Issue**: {str(e)}
-**🔧 Gradio OAuth Integration**: The Space has OAuth enabled, but we're having trouble accessing your authentication status through the Gradio interface.
-**💡 This is likely a Gradio version compatibility issue**. Your login should still work for the GAIA evaluation.
-**🎯 Try This**: Click "🚀 Run GAIA Evaluation & Submit All Answers" button - it may work even if the status display has issues.
 """
-                else:
-                    return f"### ❌ Authentication Error\n\nError checking auth status: {str(e)}"
-        def check_login_state(request: gr.Request):
-            """Check if user is logged in and update UI accordingly with enhanced detection"""
-            try:
-                # Simplified approach - just try to determine if user is logged in
-                # without accessing request.user directly
-                # Check if OAuth environment is configured
-                oauth_client_id = os.getenv("OAUTH_CLIENT_ID")
-                oauth_scopes = os.getenv("OAUTH_SCOPES")
-                if oauth_client_id and oauth_scopes:
-                    # OAuth is configured, assume user can log in
-                    # Enable the button and let the actual authentication happen at runtime
-                    auth_status = f"""
-### 🏠 OAuth Configured Space
-**🔑 OAuth Status**: Space is configured with OAuth scopes: {oauth_scopes}
-**🎯 Ready for GAIA Evaluation**: Click the button below to start evaluation with your HuggingFace login.
-**💡 Note**: Authentication happens when you click "Run GAIA Evaluation" - you'll be prompted to login if needed.
 """
-                    button_update = gr.update(interactive=True, value="🚀 Run GAIA Evaluation & Submit All Answers")
-                    logger.info("✅ OAuth environment detected, enabling GAIA evaluation")
-                    return auth_status, button_update
-                else:
-                    # No OAuth configured
-                    auth_status = format_auth_status(None)
-                    button_update = gr.update(interactive=False, value="🔒 OAuth Not Configured")
-                    logger.info("ℹ️ No OAuth environment detected")
-                    return auth_status, button_update
-            except Exception as e:
-                logger.error(f"❌ Error in check_login_state: {e}")
-                # Return safe defaults
-                auth_status = f"### ❌ Error\n\nError checking login state: {str(e)}"
-                button_update = gr.update(interactive=False, value="🔒 Login Error")
-                return auth_status, button_update
         # Set up automatic login state checking
         interface.load(
-            fn=check_login_state,
-            outputs=[auth_status_display, unit4_run_button]
         )
         unit4_run_button.click(
-            fn=handle_evaluation_results,
-            inputs=[],  # No inputs needed - profile comes from session
-            outputs=[unit4_status_output, unit4_results_table, auth_status_display,
-                    csv_download, json_download, summary_download]
         )
         # Refresh authentication status manually
         refresh_auth_button.click(
-            fn=refresh_auth_status,
-            outputs=[auth_status_display]
-        )
-        # Debug OAuth information
-        def debug_oauth_info(request: gr.Request):
-            """Debug function to show OAuth information"""
-            try:
-                debug_info = []
-                debug_info.append("# 🔍 OAuth Debug Information\n")
-                # Check HuggingFace Spaces OAuth Environment Variables
-                debug_info.append("## 🏠 HuggingFace Spaces OAuth Environment")
-                oauth_client_id = os.getenv("OAUTH_CLIENT_ID")
-                oauth_client_secret = os.getenv("OAUTH_CLIENT_SECRET")
-                oauth_scopes = os.getenv("OAUTH_SCOPES")
-                openid_provider_url = os.getenv("OPENID_PROVIDER_URL")
-                debug_info.append(f"**OAUTH_CLIENT_ID**: {oauth_client_id is not None}")
-                debug_info.append(f"**OAUTH_CLIENT_SECRET**: {oauth_client_secret is not None}")
-                debug_info.append(f"**OAUTH_SCOPES**: {oauth_scopes}")
-                debug_info.append(f"**OPENID_PROVIDER_URL**: {openid_provider_url}")
-                if oauth_scopes:
-                    scopes_list = oauth_scopes.split()
-                    debug_info.append(f"**Available Scopes**: {', '.join(scopes_list)}")
-                    # Check for both 'inference-api' and 'inference' as valid inference scopes
-                    has_inference = 'inference-api' in scopes_list or 'inference' in scopes_list
-                    debug_info.append(f"**Has inference scope**: {has_inference}")
-                else:
-                    debug_info.append("**⚠️ No OAuth scopes configured**")
-                # Check README.md OAuth configuration
-                debug_info.append("\n## 📄 README.md OAuth Configuration")
-                try:
-                    with open('README.md', 'r') as f:
-                        readme_content = f.read()
-                        has_oauth = 'hf_oauth: true' in readme_content
-                        has_scopes = 'hf_oauth_scopes:' in readme_content
-                        has_inference = 'inference-api' in readme_content
-                        debug_info.append(f"**hf_oauth: true**: {has_oauth}")
-                        debug_info.append(f"**hf_oauth_scopes defined**: {has_scopes}")
-                        debug_info.append(f"**inference-api scope**: {has_inference}")
-                except Exception as readme_error:
-                    debug_info.append(f"**README.md check error**: {readme_error}")
-                # Environment Variables
-                debug_info.append("\n## 🔧 Environment Variables")
-                hf_token = os.getenv("HF_TOKEN")
-                debug_info.append(f"**HF_TOKEN Available**: {hf_token is not None}")
-                if hf_token:
-                    debug_info.append(f"**HF_TOKEN Length**: {len(hf_token)} chars")
-                space_host = os.getenv("SPACE_HOST")
-                space_id = os.getenv("SPACE_ID")
-                debug_info.append(f"**SPACE_HOST**: {space_host}")
-                debug_info.append(f"**SPACE_ID**: {space_id}")
-                # Gradio-specific OAuth checks
-                debug_info.append("\n## 🎨 Gradio OAuth Integration")
-                try:
-                    import gradio as gr
-                    debug_info.append(f"**Gradio Version**: {gr.__version__}")
-                    debug_info.append(f"**OAuth Profile Support**: Gradio should handle OAuth automatically in HF Spaces")
-                except Exception as gradio_error:
-                    debug_info.append(f"**Gradio OAuth Error**: {gradio_error}")
-                # Authentication Test
-                debug_info.append("\n## 🧪 Authentication Test")
-                if oauth_client_id and oauth_scopes:
-                    debug_info.append("**✅ OAuth Environment**: Properly configured")
-                    # Check for both scope formats
-                    has_inference_scope = "inference-api" in oauth_scopes or "inference" in oauth_scopes
-                    if has_inference_scope:
-                        debug_info.append("**✅ inference-api Scope**: Available for Qwen model access")
-                        debug_info.append("**🎯 Expected Behavior**: Login should provide Qwen model access")
-                    else:
-                        debug_info.append("**❌ inference-api Scope**: Missing - Qwen models won't work")
-                        debug_info.append("**🔧 Fix**: Add 'inference-api' to hf_oauth_scopes in README.md")
-                else:
-                    debug_info.append("**❌ OAuth Environment**: Not properly configured")
-                # Success Indicators
-                debug_info.append("\n## ✅ Success Indicators")
-                if oauth_client_id:
-                    debug_info.append("- ✅ OAuth is enabled for this Space")
-                else:
-                    debug_info.append("- ❌ OAuth is not enabled (missing OAUTH_CLIENT_ID)")
-                # Check for both scope formats in success indicators
-                inference_available = oauth_scopes and ("inference-api" in oauth_scopes or "inference" in oauth_scopes)
-                if inference_available:
-                    debug_info.append("- ✅ inference-api scope is configured")
-                    debug_info.append("- ✅ Should have Qwen model access when logged in")
-                else:
-                    debug_info.append("- ❌ inference-api scope is missing")
-                    debug_info.append("- ❌ Will not have Qwen model access")
-                # Login status detection (avoid AuthenticationMiddleware error)
-                debug_info.append("\n## 👤 Login Status")
-                debug_info.append("**Note**: Due to Gradio OAuth integration, login status is detected at runtime")
-                debug_info.append("**Current Status**: Check by clicking 'Run GAIA Evaluation' - you'll be prompted to login if needed")
-                return "\n".join(debug_info)
-            except Exception as e:
-                return f"# ❌ Debug Error\n\nError during OAuth debug: {str(e)}"
-        debug_auth_button.click(
-            fn=debug_oauth_info,
             outputs=[auth_status_display]
         )
         # Event handlers for manual testing
-        def process_and_update(question, file_input, show_reasoning):
             """Process question with authentication check"""
             if not question.strip():
                 return "❌ Please provide a question", "", "", gr.update(visible=False)
-            # Check for authentication
             hf_token = os.getenv("HF_TOKEN")
-            if not hf_token:
                 error_msg = """
 ## ❌ Authentication Required
 **This system requires authentication to access Qwen models and LangGraph workflow.**
 **How to authenticate:**
-1. 🔑 **Set HF_TOKEN**: Add your HuggingFace token as an environment variable
 2. 🌐 **Use Official Evaluation**: Login via the GAIA Benchmark section above
 3. 📝 **Get Token**: Visit https://huggingface.co/settings/tokens to create one with `inference` permissions
@@ -1771,9 +1665,15 @@ Please log in to access GAIA evaluation features with full inference access.
 """
                 return error_msg, "", "", gr.update(visible=False)
             try:
                 # Create authenticated app instance for this request
-                app = GAIAAgentApp(hf_token=hf_token)
                 # Process the question
                 answer, details, reasoning = app.process_question_detailed(question, file_input, show_reasoning)
@@ -1808,7 +1708,7 @@ Please log in to access GAIA evaluation features with full inference access.
 {str(ve)}
-**Solution**: Please ensure your HF_TOKEN has `inference` permissions.
 """
                 return error_msg, "", "", gr.update(visible=False)

                     """
 ### 🔐 Authentication Status: Not Logged In
+Please log in to access GAIA evaluation with Qwen models and LangGraph workflow.
+**What you need:**
+- 🔑 HuggingFace login with `read` and `inference` permissions
+- 🤖 Access to Qwen 2.5 models via HF Inference API
+- 🧠 LangGraph multi-agent system capabilities
+**Expected Performance**: 30%+ GAIA score with full LangGraph workflow and Qwen models.
 """,
                     elem_classes=["oauth-login"]
                 )
+                # Add Gradio's built-in OAuth login button
+                gr.LoginButton()
                 with gr.Row():
                     refresh_auth_button = gr.Button("🔄 Refresh Auth Status", variant="secondary", scale=1)
                 unit4_run_button = gr.Button(
+                    "🚀 Run GAIA Evaluation & Submit All Answers",
                     variant="primary",
+                    scale=2
                 )
                 unit4_status_output = gr.Textbox(
                     elem_classes=["reasoning-box"]
                 )
+        # Event handlers for Unit 4 API - Using Gradio's built-in OAuth
+        def run_gaia_evaluation(oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
+            """Run GAIA evaluation using Gradio's built-in OAuth"""
+            start_time = time.time()
+            # Initialize result logger
+            result_logger = GAIAResultLogger()
+            # Check authentication using Gradio's OAuth parameters
+            if oauth_token is None or profile is None:
+                return "❌ Authentication Required: Please login with HuggingFace to access GAIA evaluation.", None, None, None, None, None
+            username = profile.username if profile else "unknown_user"
+            hf_token = oauth_token.token if oauth_token else None
+            if not hf_token:
+                return "❌ OAuth Token Missing: Could not extract authentication token. Please logout and login again.", None, None, None, None, None
+            logger.info(f"✅ Starting GAIA evaluation for user: {username}")
+            # Rest of the function exactly as in run_and_submit_all but using oauth_token.token
+            api_url = DEFAULT_API_URL
+            questions_url = f"{api_url}/questions"
+            submit_url = f"{api_url}/submit"
+            # Get space info for code submission
+            space_id = os.getenv("SPACE_ID")
+            # 1. Instantiate GAIA Agent with LangGraph workflow
             try:
+                logger.info("🚀 Creating GAIA Agent with LangGraph workflow and Qwen models")
+                agent = GAIAAgentApp.create_with_oauth_token(hf_token)
+                if not agent.initialized:
+                    return "❌ System Error: GAIA Agent failed to initialize with LangGraph workflow", None, None, None, None, None
+                logger.info("✅ GAIA Agent initialized successfully")
+            except ValueError as ve:
+                logger.error(f"Authentication error: {ve}")
+                return f"❌ Authentication Error: {ve}", None, None, None, None, None
+            except RuntimeError as re:
+                logger.error(f"System initialization error: {re}")
+                return f"❌ System Error: {re}", None, None, None, None, None
             except Exception as e:
+                logger.error(f"Unexpected error initializing agent: {e}")
+                return f"❌ Unexpected Error: {e}. Please check your authentication and try again.", None, None, None, None, None
+            # Agent code URL
+            agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Development"
+            logger.info(f"Agent code URL: {agent_code}")
+            # 2. Fetch Questions
+            logger.info(f"Fetching questions from: {questions_url}")
+            try:
+                response = requests.get(questions_url, timeout=15)
+                response.raise_for_status()
+                questions_data = response.json()
+                if not questions_data:
+                    logger.error("Fetched questions list is empty.")
+                    return "❌ Fetched questions list is empty or invalid format.", None, None, None, None, None
+                logger.info(f"✅ Fetched {len(questions_data)} questions.")
+            except requests.exceptions.RequestException as e:
+                logger.error(f"Error fetching questions: {e}")
+                return f"❌ Error fetching questions: {e}", None, None, None, None, None
+            except Exception as e:
+                logger.error(f"An unexpected error occurred fetching questions: {e}")
+                return f"❌ An unexpected error occurred fetching questions: {e}", None, None, None, None, None
+            # 3. Run GAIA Agent on questions
+            results_log = []
+            answers_payload = []
+            logger.info(f"🤖 Running GAIA Agent on {len(questions_data)} questions with LangGraph workflow...")
+            for i, item in enumerate(questions_data, 1):
+                task_id = item.get("task_id")
+                question_text = item.get("question")
+                if not task_id or question_text is None:
+                    logger.warning(f"Skipping item with missing task_id or question: {item}")
+                    continue
+                logger.info(f"Processing question {i}/{len(questions_data)}: {task_id}")
+                try:
+                    submitted_answer = agent(question_text)
+                    answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+                    results_log.append({
+                        "Task ID": task_id,
+                        "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                        "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
+                    })
+                    logger.info(f"✅ Question {i} processed successfully")
+                except Exception as e:
+                    logger.error(f"Error running GAIA agent on task {task_id}: {e}")
+                    error_answer = f"AGENT ERROR: {str(e)}"
+                    answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
+                    results_log.append({
+                        "Task ID": task_id,
+                        "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                        "Submitted Answer": error_answer
+                    })
+            if not answers_payload:
+                logger.error("GAIA Agent did not produce any answers to submit.")
+                return "❌ GAIA Agent did not produce any answers to submit.", pd.DataFrame(results_log), None, None, None, None
+            # 4. Prepare and submit results
+            submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+            status_update = f"🚀 GAIA Agent finished processing {len(answers_payload)} questions. Submitting results for user '{username}'..."
+            logger.info(status_update)
+            # 5. Submit to Unit 4 API
+            logger.info(f"📤 Submitting {len(answers_payload)} answers to: {submit_url}")
             try:
+                response = requests.post(submit_url, json=submission_data, timeout=120)
+                response.raise_for_status()
+                result_data = response.json()
+                # Calculate execution time
+                execution_time = time.time() - start_time
+                # 6. Log results to files
+                logger.info("📝 Logging evaluation results...")
+                logged_files = result_logger.log_evaluation_results(
+                    username=username,
+                    questions_data=questions_data,
+                    results_log=results_log,
+                    final_result=result_data,
+                    execution_time=execution_time
+                )
+                # Prepare download files
+                csv_file = logged_files.get("csv")
+                json_file = logged_files.get("json")
+                summary_file = logged_files.get("summary")
+                final_status = (
+                    f"🎉 GAIA Agent Evaluation Complete!\n"
+                    f"👤 User: {result_data.get('username')}\n"
+                    f"🏆 Overall Score: {result_data.get('score', 'N/A')}% "
+                    f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+                    f"⏱️ Execution Time: {execution_time:.2f} seconds\n"
+                    f"💬 API Response: {result_data.get('message', 'No message received.')}\n\n"
+                    f"📁 Results saved to {len([f for f in [csv_file, json_file, summary_file] if f])} files for download."
+                )
+                logger.info("✅ GAIA evaluation completed successfully")
+                results_df = pd.DataFrame(results_log)
+                # Update download file visibility and values
+                csv_update = gr.update(value=csv_file, visible=csv_file is not None)
+                json_update = gr.update(value=json_file, visible=json_file is not None)
+                summary_update = gr.update(value=summary_file, visible=summary_file is not None)
+                return final_status, results_df, csv_update, json_update, summary_update
+            except requests.exceptions.HTTPError as e:
+                error_detail = f"Server responded with status {e.response.status_code}."
+                try:
+                    error_json = e.response.json()
+                    error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+                except requests.exceptions.JSONDecodeError:
+                    error_detail += f" Response: {e.response.text[:500]}"
+                status_message = f"❌ Submission Failed: {error_detail}"
+                logger.error(status_message)
+                results_df = pd.DataFrame(results_log)
+                return status_message, results_df, None, None, None
+            except Exception as e:
+                status_message = f"❌ An unexpected error occurred during submission: {e}"
+                logger.error(status_message)
+                results_df = pd.DataFrame(results_log)
+                return status_message, results_df, None, None, None
+        def update_auth_status(profile: gr.OAuthProfile | None):
+            """Update authentication status display using Gradio's OAuth"""
+            if profile is None:
+                return """
+### 🔐 Authentication Status: Not Logged In
+Please click the "Sign in with Hugging Face" button above to access GAIA evaluation.
+**What you need:**
+- 🔑 HuggingFace login with `read` and `inference` permissions
+- 🤖 Access to Qwen 2.5 models via HF Inference API
+- 🧠 LangGraph multi-agent system capabilities
+**Expected Performance**: 30%+ GAIA score with full LangGraph workflow and Qwen models.
 """
+            else:
+                return f"""
+### 🔐 Authentication Status: ✅ Logged In as {profile.username}
+**✅ Ready for GAIA Evaluation!**
+- ✅ **OAuth Profile**: {profile.name or profile.username}
+- ✅ **Qwen Model Access**: Available via HF Inference API
+- ✅ **LangGraph Workflow**: Multi-agent orchestration ready
+- ✅ **Official Evaluation**: Click "Run GAIA Evaluation" to start
+🎯 **Expected Results**: 30%+ GAIA score with full LangGraph workflow and Qwen models.
 """
         # Set up automatic login state checking
         interface.load(
+            fn=update_auth_status,
+            outputs=[auth_status_display]
         )
         unit4_run_button.click(
+            fn=run_gaia_evaluation,
+            inputs=[],  # Gradio automatically injects OAuth parameters
+            outputs=[unit4_status_output, unit4_results_table, csv_download, json_download, summary_download]
         )
         # Refresh authentication status manually
         refresh_auth_button.click(
+            fn=update_auth_status,
             outputs=[auth_status_display]
         )
         # Event handlers for manual testing
+        def process_and_update(question, file_input, show_reasoning, oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
             """Process question with authentication check"""
             if not question.strip():
                 return "❌ Please provide a question", "", "", gr.update(visible=False)
+            # Check for authentication - prioritize HF_TOKEN, then OAuth
             hf_token = os.getenv("HF_TOKEN")
+            if not hf_token and (oauth_token is None or profile is None):
                 error_msg = """
 ## ❌ Authentication Required
 **This system requires authentication to access Qwen models and LangGraph workflow.**
 **How to authenticate:**
+1. 🔑 **Login with HuggingFace**: Use the "Sign in with Hugging Face" button above
 2. 🌐 **Use Official Evaluation**: Login via the GAIA Benchmark section above
 3. 📝 **Get Token**: Visit https://huggingface.co/settings/tokens to create one with `inference` permissions
 """
                 return error_msg, "", "", gr.update(visible=False)
+            # Use HF_TOKEN if available, otherwise use OAuth token
+            auth_token = hf_token if hf_token else (oauth_token.token if oauth_token else None)
+            if not auth_token:
+                return "❌ No valid authentication token found", "", "", gr.update(visible=False)
             try:
                 # Create authenticated app instance for this request
+                app = GAIAAgentApp(hf_token=auth_token)
                 # Process the question
                 answer, details, reasoning = app.process_question_detailed(question, file_input, show_reasoning)
 {str(ve)}
+**Solution**: Please ensure your authentication has `inference` permissions.
 """
                 return error_msg, "", "", gr.update(visible=False)