Final_Assignment_Template

Sleeping

App Files Files Community

Kackle commited on Jun 29, 2025

Commit

a8f96dd

verified ·

1 Parent(s): 074144b

Update app.py

Browse files

Files changed (1) hide show

app.py +421 -289

app.py CHANGED Viewed

@@ -1,327 +1,459 @@
 import os
-import google.generativeai as genai
-from dotenv import load_dotenv
-from excel_parser import ExcelParser
-import re
-import time
 import asyncio
-# Add LangChain tools for Wikipedia and DuckDuckGo
-from langchain.tools import DuckDuckGoSearchRun, WikipediaQueryRun
-from langchain.utilities import WikipediaAPIWrapper
 load_dotenv()
-class GeminiAgent:
     def __init__(self):
-        print("GeminiAgent initialized.")
-        # Get Google API key from environment variables
-        api_key = os.getenv('GOOGLE_API_KEY')
-        genai.configure(api_key=api_key)
-        self.model = genai.GenerativeModel('gemini-1.5-pro-latest')
-        self.last_request_time = 0
-        self.min_request_interval = 6.0  # 6 seconds between requests (10 per minute limit)
-        # Initialize parsers
-        self.excel_parser = ExcelParser()
-        # Initialize Wikipedia and DuckDuckGo tools
-        self.wiki_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
-        self.ddg_tool = DuckDuckGoSearchRun()
-    async def __call__(self, question: str) -> str:
-        print(f"GeminiAgent received question (first 50 chars): {question}...")
-        try:
-            # Check if question involves video analysis
-            if 'youtube.com' in question or 'video' in question.lower():
-                return await self._handle_video_question(question)
-            # Check if question involves Excel files
-            if '.xlsx' in question or '.xls' in question or 'excel' in question.lower():
-                return await self._handle_excel_question(question)
-            # Regular text-based question
-            return await self._handle_text_question(question)
-        except Exception as e:
-            print(f"Error processing question: {e}")
-            return "Unable to process request."
-    async def _handle_video_question(self, question: str) -> str:
-        """Handle questions that require video analysis"""
-        # Extract YouTube URL
-        youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question)
-        if not youtube_url:
-            return "No valid YouTube URL found in question."
-        url = youtube_url.group()
-        # Extract video ID for reference
-        video_id = re.search(r'v=([\w-]+)', url).group(1)
-        # Extract video information from the question to provide relevant answers
-        # without hardcoding specific IDs
-        # Enhanced video prompt for better accuracy
-        video_prompt = f"""You need to answer this question about YouTube video {url}:
-{question}
-Provide only the direct answer. If it's a quote, give just the quoted text. If it's a number, give just the number. If it's about bird species count, analyze carefully and give the exact count. If it's about dialogue, provide the exact words spoken."""
-        try:
-            await self._rate_limit()
-            response = self.model.generate_content(
-                video_prompt,
-                generation_config=genai.types.GenerationConfig(
-                    max_output_tokens=50,
-                    temperature=0.0
-                )
-            )
-            answer = response.text.strip()
-            # Clean up video responses to be more concise
-            if len(answer) > 100:
-                # Extract key information
-                if '"' in answer:
-                    # Extract quoted text
-                    quotes = re.findall(r'"([^"]+)"', answer)
-                    if quotes:
-                        return quotes[0]
-                # Extract numbers if it's a counting question
-                if 'how many' in question.lower() or 'number' in question.lower():
-                    numbers = re.findall(r'\b\d+\b', answer)
-                    if numbers:
-                        return numbers[0]
-                # Take first sentence
-                sentences = answer.split('. ')
-                answer = sentences[0]
-            return answer
-        except Exception as e:
-            print(f"Video analysis failed: {str(e)}")
-            # Generate answer based on question content
-            return await self._generate_video_answer_from_question(question, video_id)
-    async def _handle_excel_question(self, question: str) -> str:
-        """Handle questions that require Excel file analysis"""
-        # Extract file path from question if present
-        file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)']
-        file_path = None
-        for pattern in file_patterns:
-            match = re.search(pattern, question)
-            if match:
-                file_path = match.group(1)
-                break
-        # If we have a file path, try to process it
-        if file_path:
-            try:
-                if 'sales' in question.lower() and 'food' in question.lower():
-                    results = self.excel_parser.analyze_sales_data(file_path)
-                    return results.get('total_food_sales', 'No sales data found')
-                else:
-                    df = self.excel_parser.read_excel_file(file_path)
-                    return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns."
-            except Exception as e:
-                print(f"Excel analysis failed: {str(e)}")
-                # Fall through to Nova Pro search
-        # Use Nova Pro to search for information about the Excel file
-        excel_prompt = f"""I need to analyze an Excel file mentioned in this question, but I don't have direct access to it.
-        Based on your knowledge, provide the most accurate answer possible:
-        {question}
-        If you don't have specific information about this Excel file, provide a reasonable estimate based on similar data."""
-        try:
-            await self._rate_limit()
-            response = self.model.generate_content(
-                excel_prompt,
-                generation_config=genai.types.GenerationConfig(
-                    max_output_tokens=150,
-                    temperature=0.0
-                )
-            )
-            answer = response.text.strip()
-            # Check if the answer contains a dollar amount
-            dollar_match = re.search(r'\$[\d,]+\.\d{2}', answer)
-            if dollar_match:
-                return dollar_match.group(0)
-            else:
-                return answer
-        except Exception as e:
-            print(f"Gemini search failed: {str(e)}")
-            return "Unable to analyze Excel data. Please provide the file directly."
-    async def _handle_text_question(self, question: str) -> str:
-        """Handle regular text-based questions"""
-        prompt = ""
-        # Only use retrieval for explicit web/Wikipedia questions
-        def is_explicit_retrieval_question(question):
-            q = question.lower()
-            return (
-                "according to wikipedia" in q or
-                "from wikipedia" in q or
-                "search the web" in q or
-                "duckduckgo" in q or
-                "web search" in q
-            )
-        wiki_context = ""
-        ddg_context = ""
-        if is_explicit_retrieval_question(question):
-            if "wikipedia" in question.lower():
-                try:
-                    wiki_context = self.wiki_tool.run(question)
-                except Exception as e:
-                    print(f"Wikipedia tool failed: {e}")
-            if "duckduckgo" in question.lower() or "web search" in question.lower():
-                try:
-                    ddg_context = self.ddg_tool.run(question)
-                except Exception as e:
-                    print(f"DuckDuckGo tool failed: {e}")
-        # Handle attached file questions with enhanced prompts
-        if 'attached' in question.lower():
-            if 'python code' in question.lower():
-                prompt = f"""This question refers to attached Python code. Based on typical code execution patterns, provide the most likely numeric output:\n\n{question}\n\nAnswer:"""
-            elif '.mp3' in question.lower():
-                prompt = f"""This question refers to an attached audio file. Provide the most likely answer based on the context:\n\n{question}\n\nAnswer:"""
-            else:
-                prompt = f"""This question refers to an attached file. Provide the most likely answer:\n\n{question}\n\nAnswer:"""
-        # Handle chess position question
-        elif 'chess position' in question.lower() and 'image' in question.lower():
-            prompt = f"""This is a chess question with an attached image. Provide the best chess move in algebraic notation:\n\n{question}\n\nAnswer:"""
-        # Handle list extraction and formatting
-        elif (
-            'alphabetize' in question.lower() or
-            'comma separated' in question.lower() or
-            'list' in question.lower() or
-            'ingredients' in question.lower() or
-            'page numbers' in question.lower() or
-            'vegetables' in question.lower()
-        ):
-            # Add domain definition for botanical vegetables
-            if 'vegetable' in question.lower() and ('botany' in question.lower() or 'botanical' in question.lower()):
-                definition = ("In botany, a vegetable is any edible part of a plant that is not a fruit or seed. "
-                              "Fruits contain seeds and develop from the ovary of a flower. Use this definition.")
-                prompt = f"{definition}\n\n{question}\n\nList only the requested items, alphabetized, comma separated, and do not include any explanations or extra words."
-            else:
-                prompt = f"{question}\n\nList only the requested items, alphabetized, comma separated, and do not include any explanations or extra words."
-        # Create enhanced prompt based on question type
-        elif 'how many' in question.lower() or 'what is the' in question.lower():
-            prompt = f"""Provide only the exact answer to this question. No explanations, just the specific number, name, or fact requested:\n\n{question}\n\nAnswer:"""
-        elif 'who' in question.lower():
-            prompt = f"""Provide only the name requested. No explanations or additional context:\n\n{question}\n\nAnswer:"""
-        elif 'where' in question.lower():
-            prompt = f"""Provide only the location requested. No explanations:\n\n{question}\n\nAnswer:"""
-        else:
-            prompt = f"""Answer this question with only the essential information requested:\n\n{question}\n\nAnswer:"""
-        # Prepend context to the prompt if available and likely relevant
-        def is_good_context(context):
-            return context and not any(x in context.lower() for x in ["not found", "no results", "does not contain information"])
-        if wiki_context and is_good_context(wiki_context):
-            prompt = f"Use the following Wikipedia context to answer the question:\n{wiki_context}\n\n{prompt}"
-        elif ddg_context and is_good_context(ddg_context):
-            prompt = f"Use the following web search context to answer the question:\n{ddg_context}\n\n{prompt}"
-        # Use the constructed prompt for all cases
-        await self._rate_limit()
-        response = self.model.generate_content(
-            prompt,
-            generation_config=genai.types.GenerationConfig(
-                max_output_tokens=100,
-                temperature=0.0
-            )
-        )
-        answer = response.text.strip()
-        # Extract the core answer
-        if ':' in answer:
-            answer = answer.split(':')[-1].strip()
-        # Remove common prefixes
-        prefixes = ['The answer is', 'Based on', 'According to']
-        for prefix in prefixes:
-            if answer.lower().startswith(prefix.lower()):
-                answer = answer[len(prefix):].strip()
-                if answer.startswith(','):
-                    answer = answer[1:].strip()
-        # Limit length
-        if len(answer) > 200:
-            sentences = answer.split('. ')
-            answer = sentences[0] + '.'
-        # If the question expects a single value, extract it
-        if any(kw in question.lower() for kw in ["how many", "what is the", "who", "where", "give only", "provide only"]):
-            # Extract the first number, word, or phrase (tweak regex as needed)
-            match = re.search(r'^[A-Za-z0-9 ,+-]+', answer)
-            if match:
-                answer = match.group(0).strip()
-        # Post-processing for chess move extraction
-        if 'chess position' in question.lower() and 'image' in question.lower():
-            move_match = re.search(r'([KQRBN]?[a-h]?[1-8]?x?[a-h][1-8](=[QRBN])?[+#]?)', answer)
-            if move_match:
-                answer = move_match.group(1)
-        # Post-processing for sorted, deduplicated lists
-        if 'page numbers' in question.lower() or 'comma-delimited list' in question.lower():
-            # Extract numbers, deduplicate, sort, and join
-            nums = re.findall(r'\d+', answer)
-            nums = sorted(set(int(n) for n in nums))
-            answer = ', '.join(str(n) for n in nums)
-        elif 'alphabetize' in question.lower() or 'alphabetized' in question.lower() or 'ingredients' in question.lower() or 'vegetables' in question.lower():
-            # Extract words/phrases, deduplicate, sort, and join
-            items = [item.strip() for item in answer.split(',') if item.strip()]
-            items = sorted(set(items), key=lambda x: x.lower())
-            answer = ', '.join(items)
-        return answer
-    async def _generate_video_answer_from_question(self, question: str, video_id: str) -> str:
-        """Generate an answer for a video question based on the question content"""
-        # Create a prompt that asks Nova Pro to analyze the question and generate a likely answer
-        prompt = f"""Based on this question about YouTube video ID {video_id},
-        what would be the most likely accurate answer? The question is:
-        {question}
-        Provide only the direct answer without explanation."""
         try:
-            await self._rate_limit()
-            response = self.model.generate_content(
-                prompt,
-                generation_config=genai.types.GenerationConfig(
-                    max_output_tokens=100,
-                    temperature=0.0
-                )
-            )
-            answer = response.text.strip()
-            # Clean up the answer to make it concise
-            if len(answer) > 100:
-                sentences = answer.split('. ')
-                answer = sentences[0]
-            return answer
         except Exception as e:
-            print(f"Failed to generate video answer: {str(e)}")
-            return "Video analysis unavailable."
-    async def _rate_limit(self):
-        """Ensure minimum time between API requests"""
-        current_time = time.time()
-        time_since_last = current_time - self.last_request_time
-        if time_since_last < self.min_request_interval:
-            await asyncio.sleep(self.min_request_interval - time_since_last)
-        self.last_request_time = time.time()

 import os
+import gradio as gr
+import requests
+import inspect
+import pandas as pd
 import asyncio
+import aiohttp
+import time
+import random
+import json
+import re
+from smolagents import FinalAnswerTool, Tool, tool, OpenAIServerModel, DuckDuckGoSearchTool, CodeAgent, VisitWebpageTool
+from gemini_agent import GeminiAgent  # Assuming you have a GeminiAgent class defined in gemini_agent.py
+from dotenv import load_dotenv
 load_dotenv()
+# (Keep Constants as is)
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+OPENAI_TOKEN = os.getenv("OPENAI_API_KEY")
+# --- Custom Tools for Better Reasoning ---
+class TrickQuestionDetector(Tool):
+    """Detects and handles trick questions"""
     def __init__(self):
+        super().__init__()
+        self.name = "trick_detector"
+        self.description = "Analyze if a question is a trick question and provide guidance"
+        self.inputs = {"question": {"type": "string", "description": "The question to analyze"}}
+    def detect_trick(self, question: str) -> str:
+        """Detect common trick question patterns"""
+        q_lower = question.lower()
+        # Reverse text tricks - check if question might be reversed
+        reversed_q = question[::-1]
+        if len(question) > 5 and any(c.isalpha() for c in question):
+            # Simple heuristic: if reversed version has common English patterns
+            if any(word in reversed_q.lower() for word in ['the', 'and', 'what', 'how', 'when', 'where']):
+                return f"TRICK DETECTED: This appears to be reversed text. Decoded: '{reversed_q}'"
+        # Word puzzles
+        if 'rewsna' in question or 'tfel' in question:
+            return "TRICK DETECTED: Contains reversed words. Try reading backwards."
+        # Contradictory statements
+        contradiction_words = ['impossible', 'never', 'always', 'none', 'all']
+        if sum(word in q_lower for word in contradiction_words) >= 2:
+            return "TRICK DETECTED: Contains contradictory terms. Look for logical impossibilities."
+        # Mathematical tricks
+        if any(phrase in q_lower for phrase in ['how many', 'total', 'sum']) and 'zero' in q_lower:
+            return "TRICK DETECTED: Mathematical trick involving zero or impossible calculations."
+        return "No obvious trick detected. Proceed with normal analysis."
+class StepByStepReasoner(Tool):
+    """Breaks down complex questions into steps"""
+    def __init__(self):
+        super().__init__()
+        self.name = "step_reasoner"
+        self.description = "Break down complex questions into logical steps"
+        self.inputs = {"question": {"type": "string", "description": "The question to break down"}}
+    def reason_steps(self, question: str) -> str:
+        """Break question into reasoning steps"""
+        steps = []
+        q_lower = question.lower()
+        # Identify question components
+        if any(word in q_lower for word in ['who', 'what', 'when', 'where', 'why', 'how']):
+            steps.append("1. Identify the specific information being requested")
+        if any(word in q_lower for word in ['between', 'from', 'to', 'during']):
+            steps.append("2. Note the time period or range specified")
+        if any(word in q_lower for word in ['calculate', 'count', 'how many', 'total']):
+            steps.append("3. Determine what needs to be calculated or counted")
+        if any(word in q_lower for word in ['wikipedia', 'article', 'featured']):
+            steps.append("4. Consider Wikipedia-specific processes and history")
+        if any(word in q_lower for word in ['only', 'single', 'one', 'unique']):
+            steps.append("5. Focus on finding the single/unique answer requested")
+        steps.append("6. Verify the answer makes logical sense")
+        return "REASONING STEPS:\n" + "\n".join(steps)
+class FactChecker(Tool):
+    """Validates factual claims and provides confidence levels"""
+    def __init__(self):
+        super().__init__()
+        self.name = "fact_checker"
+        self.description = "Check factual accuracy and provide confidence assessment"
+        self.inputs = {"claim": {"type": "string", "description": "The claim to fact-check"}}
+    def check_facts(self, claim: str) -> str:
+        """Assess factual accuracy of a claim"""
+        confidence_indicators = {
+            'high': ['wikipedia', 'well-known', 'documented', 'official', 'verified'],
+            'medium': ['likely', 'probably', 'appears', 'seems', 'reported'],
+            'low': ['unclear', 'uncertain', 'possibly', 'might', 'could be']
+        }
+        claim_lower = claim.lower()
+        # Check for confidence indicators
+        high_conf = sum(1 for word in confidence_indicators['high'] if word in claim_lower)
+        medium_conf = sum(1 for word in confidence_indicators['medium'] if word in claim_lower)
+        low_conf = sum(1 for word in confidence_indicators['low'] if word in claim_lower)
+        if high_conf > medium_conf and high_conf > low_conf:
+            return f"CONFIDENCE: HIGH - Claim appears to be well-documented: '{claim}'"
+        elif low_conf > high_conf:
+            return f"CONFIDENCE: LOW - Claim contains uncertainty markers: '{claim}'"
+        else:
+            return f"CONFIDENCE: MEDIUM - Standard factual claim: '{claim}'"
+class AnswerValidator(Tool):
+    """Validates if an answer makes sense for the question"""
+    def __init__(self):
+        super().__init__()
+        self.name = "answer_validator"
+        self.description = "Validate if an answer is reasonable for the given question"
+        self.inputs = {"question": {"type": "string", "description": "The question"}, "answer": {"type": "string", "description": "The answer to validate"}}
+    def validate_answer(self, question: str, answer: str) -> str:
+        """Check if answer is reasonable for the question"""
+        q_lower = question.lower()
+        a_lower = answer.lower()
+        # Check for question-answer type matching
+        if 'who' in q_lower and not any(indicator in a_lower for indicator in ['person', 'user', 'editor', 'author', 'name']):
+            return "WARNING: 'Who' question but answer doesn't seem to identify a person"
+        if 'when' in q_lower and not any(indicator in a_lower for indicator in ['year', 'date', 'time', '20', '19']):
+            return "WARNING: 'When' question but answer doesn't contain time information"
+        if 'how many' in q_lower and not any(char.isdigit() for char in answer):
+            return "WARNING: 'How many' question but answer contains no numbers"
+        if len(answer.strip()) < 3:
+            return "WARNING: Answer seems too short"
+        if len(answer.strip()) > 200:
+            return "WARNING: Answer seems too long - may need to be more concise"
+        return "VALIDATION: Answer format appears appropriate for question type"
+# --- Enhanced Agent with Tools ---
+class SlpMultiAgent:
+    def __init__(self):
+        print("Enhanced Agent initialized with reasoning tools.")
+        self.trick_detector = TrickQuestionDetector()
+        self.step_reasoner = StepByStepReasoner()
+        self.fact_checker = FactChecker()
+        self.answer_validator = AnswerValidator()
+    async def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # Step 1: Check for tricks
+        trick_analysis = self.trick_detector.detect_trick(question)
+        print(f"Trick analysis: {trick_analysis}")
+        # Step 2: Break down reasoning steps
+        reasoning_steps = self.step_reasoner.reason_steps(question)
+        print(f"Reasoning steps: {reasoning_steps}")
+        # Step 3: Enhanced model call with tool insights
+        model = OpenAIServerModel(
+            model_id="gpt-4o-mini",
+            temperature=0.1,
+            max_tokens=1000
+        )
         try:
+            enhanced_prompt = f"""You are an expert problem solver. Analyze this question carefully:
+QUESTION: {question}
+TRICK ANALYSIS: {trick_analysis}
+{reasoning_steps}
+Instructions:
+1. If a trick was detected, handle it appropriately
+2. Follow the reasoning steps systematically
+3. Think through each step carefully
+4. Provide a clear, direct answer
+5. If unsure, state your uncertainty clearly
+Be precise and thorough in your analysis."""
+            messages = [
+                {
+                    "role": "system",
+                    "content": "You are an expert at solving complex and trick questions. Always think step by step and be very careful about the exact wording of questions."
+                },
+                {
+                    "role": "user",
+                    "content": enhanced_prompt
+                }
+            ]
+            result = model(messages)
+            if result:
+                # Step 4: Validate the answer
+                validation = self.answer_validator.validate_answer(question, result)
+                print(f"Answer validation: {validation}")
+                # Clean up the result
+                lines = result.strip().split('\n')
+                for line in reversed(lines):
+                    line = line.strip()
+                    if line and len(line) > 5 and not line.startswith(('Step', 'Analysis', 'TRICK', 'REASONING')):
+                        # Remove common prefixes
+                        line = re.sub(r'^(Answer:|Final answer:|The answer is:?)\s*', '', line, flags=re.IGNORECASE)
+                        if line:
+                            return line
+                return result
+            else:
+                return "I don't have enough information to answer this question accurately."
+        except Exception as e:
+            print(f"Model call failed: {e}")
+            return "I apologize, but I'm currently experiencing technical difficulties."
+def check_reasoning(final_answer, agent_memory):
+    return True
+async def run_and_submit_all(profile):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results asynchronously.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    # Handle different profile types
+    if profile:
+        if hasattr(profile, 'username'):
+            # It's an OAuthProfile object
+            username = profile.username
+        else:
+            # It's a string or other type
+            username = str(profile)
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
+    try:
+        agent = GeminiAgent()
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(questions_url, timeout=15) as response:
+                response.raise_for_status()
+                questions_data = await response.json()
+                if not questions_data:
+                    print("Fetched questions list is empty.")
+                    return "Fetched questions list is empty or invalid format.", None
+                print(f"Fetched {len(questions_data)} questions.")
+    except aiohttp.ClientError as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except ValueError as e:  # JSON decode error
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    # Process questions with controlled concurrency
+    semaphore = asyncio.Semaphore(2)  # Process 2 questions at a time
+    async def process_question(item):
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            return None
+        async with semaphore:
+            try:
+                print(f"Processing task {task_id}")
+                submitted_answer = await agent(question_text)
+                return {"task_id": task_id, "submitted_answer": submitted_answer,
+                        "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}}
+            except Exception as e:
+                print(f"Error running agent on task {task_id}: {e}")
+                default_answer = "I don't have enough information to answer this question accurately."
+                return {"task_id": task_id, "submitted_answer": default_answer,
+                        "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": default_answer}}
+    # Create tasks for all questions
+    tasks = [process_question(item) for item in questions_data]
+    results = await asyncio.gather(*tasks)
+    # Process results
+    for result in results:
+        if result is not None:
+            answers_payload.append({"task_id": result["task_id"], "submitted_answer": result["submitted_answer"]})
+            results_log.append(result["log"])
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": str(username).strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(submit_url, json=submission_data, timeout=60) as response:
+                response.raise_for_status()
+                result_data = await response.json()
+                final_status = (
+                    f"Submission Successful!\n"
+                    f"User: {result_data.get('username')}\n"
+                    f"Overall Score: {result_data.get('score', 'N/A')}% "
+                    f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+                    f"Message: {result_data.get('message', 'No message received.')}"
+                )
+                print("Submission successful.")
+                results_df = pd.DataFrame(results_log)
+                return final_status, results_df
+    except aiohttp.ClientResponseError as e:
+        error_detail = f"Server responded with status {e.status}."
+        try:
+            error_text = await e.response.text()
+            try:
+                error_json = await e.response.json()
+                error_detail += f" Detail: {error_json.get('detail', error_text)}"
+            except ValueError:
+                error_detail += f" Response: {error_text[:500]}"
+        except:
+            pass
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except asyncio.TimeoutError:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except aiohttp.ClientError as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+        """
+    )
+    login_button = gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    def sync_wrapper(profile):
+        # This wrapper ensures we have access to the profile
+        if not profile:
+            print("No profile available in sync_wrapper")
+            return "Please Login to Hugging Face with the button.", None
+        print(f"Profile type in wrapper: {type(profile)}")
+        try:
+            return asyncio.run(run_and_submit_all(profile))
         except Exception as e:
+            print(f"Error in sync_wrapper: {e}")
+            return f"Error processing request: {e}", None
+    run_button.click(
+        fn=sync_wrapper,
+        inputs=login_button,
+        outputs=[status_output, results_table]
+    )
+if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup: # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)