Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 29, 2025

Commit

dfcd4f6

1 Parent(s): c913a81

Fixing

Browse files

Files changed (2) hide show

app.py +409 -297
requirements.txt +10 -10

app.py CHANGED Viewed

@@ -5,268 +5,367 @@ import inspect
 import pandas as pd
 import json
 import re
-from typing import Dict, List, Any, Optional
-import urllib.parse
 from datetime import datetime
-import math
-# Transformers and torch imports
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-import torch
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-class EnhancedGAIAAgent:
-    def __init__(self):
-        print("Initializing Enhanced GAIA Agent with Mistral-7B...")
-        # Initialize Mistral model
         try:
-            print("Loading Mistral-7B-Instruct model...")
-            self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
-            self.model = AutoModelForCausalLM.from_pretrained(
-                "mistralai/Mistral-7B-Instruct-v0.3",
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                device_map="auto" if torch.cuda.is_available() else None
-            )
-            # Create pipeline for easier use
-            self.pipe = pipeline(
-                "text-generation",
-                model=self.model,
-                tokenizer=self.tokenizer,
-                max_new_tokens=512,
-                temperature=0.7,
-                do_sample=True,
-                pad_token_id=self.tokenizer.eos_token_id
-            )
-            print("✅ Mistral model loaded successfully!")
-        except Exception as e:
-            print(f"❌ Error loading Mistral model: {e}")
-            print("Falling back to basic responses...")
-            self.pipe = None
-        # Tool functions for GAIA tasks
-        self.tools = {
-            "calculate": self._calculate,
-            "search_web": self._search_web,
-            "parse_data": self._parse_data,
-            "analyze_text": self._analyze_text,
-            "solve_math": self._solve_math
-        }
-    def _calculate(self, expression: str) -> str:
-        """Safe calculator for mathematical expressions"""
-        try:
-            # Clean and validate expression
-            expression = re.sub(r'[^0-9+\-*/().\s]', '', expression)
-            result = eval(expression)
-            return str(result)
-        except Exception as e:
-            return f"Calculation error: {e}"
-    def _search_web(self, query: str) -> str:
-        """Simulate web search (placeholder - you'd integrate real search API)"""
-        # This is a placeholder - integrate with actual search API
-        return f"Search results for '{query}': [This would contain real search results]"
-    def _parse_data(self, data: str) -> str:
-        """Parse and analyze structured data"""
-        try:
-            # Try to parse as JSON
-            if data.strip().startswith('{') or data.strip().startswith('['):
-                parsed = json.loads(data)
-                return f"Parsed data structure with {len(parsed) if isinstance(parsed, (list, dict)) else 1} elements"
-            else:
-                # Basic text analysis
-                lines = data.split('\n')
-                return f"Text data with {len(lines)} lines, {len(data.split())} words"
         except Exception as e:
-            return f"Data parsing error: {e}"
-    def _analyze_text(self, text: str) -> str:
-        """Analyze text content"""
-        words = text.split()
-        sentences = text.split('.')
-        return f"Text analysis: {len(words)} words, {len(sentences)} sentences"
-    def _solve_math(self, problem: str) -> str:
-        """Enhanced math problem solver"""
         try:
-            # Extract numbers and operations
-            numbers = re.findall(r'-?\d+\.?\d*', problem)
-            # Handle common math patterns
-            if "percent" in problem.lower() or "%" in problem:
-                if len(numbers) >= 2:
-                    base = float(numbers[0])
-                    percent = float(numbers[1])
-                    result = base * (percent / 100)
-                    return str(result)
-            if "average" in problem.lower() or "mean" in problem.lower():
-                if numbers:
-                    nums = [float(n) for n in numbers]
-                    return str(sum(nums) / len(nums))
-            # Default calculation
-            return self._calculate(" ".join(numbers))
         except Exception as e:
-            return f"Math solving error: {e}"
-    def _generate_response(self, prompt: str) -> str:
-        """Generate response using Mistral model"""
-        if not self.pipe:
-            return "Model not available - using fallback response."
-        try:
-            messages = [
-                {"role": "user", "content": prompt}
-            ]
-            response = self.pipe(messages, max_new_tokens=512, temperature=0.7)
-            # Extract the generated text
-            if response and len(response) > 0:
-                generated_text = response[0]['generated_text']
-                # Get only the assistant's response (after the user message)
-                if isinstance(generated_text, list):
-                    # Find the assistant's response
-                    for msg in generated_text:
-                        if msg.get('role') == 'assistant':
-                            return msg.get('content', '')
-                elif isinstance(generated_text, str):
-                    return generated_text
-                else:
-                    return str(generated_text)
-            return "No response generated."
         except Exception as e:
-            print(f"Error generating response: {e}")
-            return f"Error in response generation: {e}"
-    def _detect_task_type(self, question: str) -> str:
-        """Detect the type of task to apply appropriate strategy"""
         question_lower = question.lower()
-        if any(word in question_lower for word in ["calculate", "compute", "math", "+", "-", "*", "/", "="]):
-            return "calculation"
-        elif any(word in question_lower for word in ["search", "find", "lookup", "google"]):
-            return "search"
-        elif any(word in question_lower for word in ["data", "csv", "json", "table", "parse"]):
-            return "data_analysis"
-        elif any(word in question_lower for word in ["percent", "%", "average", "mean", "sum"]):
-            return "math_word_problem"
         else:
-            return "general_reasoning"
-    def __call__(self, question: str) -> str:
-        print(f"Agent processing question (first 100 chars): {question[:100]}...")
-        # Detect task type
-        task_type = self._detect_task_type(question)
-        print(f"Detected task type: {task_type}")
-        # Build enhanced prompt based on task type
-        if task_type == "calculation":
-            enhanced_prompt = f"""
-You are a precise mathematical assistant. Solve this step-by-step:
-Question: {question}
-Provide a clear, accurate answer. If calculation is needed, show your work.
-Answer:"""
-        elif task_type == "math_word_problem":
-            enhanced_prompt = f"""
-You are solving a math word problem. Break it down step by step:
-Question: {question}
-Steps:
-1. Identify what is being asked
-2. Extract the relevant numbers
-3. Determine the operation needed
-4. Calculate the result
-5. Provide the final answer
-Answer:"""
-        elif task_type == "data_analysis":
-            enhanced_prompt = f"""
-You are analyzing data. Approach this systematically:
-Question: {question}
-Consider:
-- What type of data is involved?
-- What analysis is needed?
-- What tools or methods should be used?
-Provide a clear, structured answer.
-Answer:"""
-        else:
-            enhanced_prompt = f"""
-You are a helpful assistant that provides accurate, well-reasoned answers.
-Question: {question}
-Think through this step-by-step and provide a clear, comprehensive answer.
-Answer:"""
-        # Generate response using the model
         try:
-            response = self._generate_response(enhanced_prompt)
-            # Post-process response for specific task types
-            if task_type in ["calculation", "math_word_problem"]:
-                # Try to extract and verify any calculations
-                numbers_in_response = re.findall(r'-?\d+\.?\d*', response)
-                if numbers_in_response:
-                    # Attempt to verify calculation if simple enough
-                    pass
-            print(f"Agent returning response (first 100 chars): {response[:100]}...")
-            return response.strip()
         except Exception as e:
-            print(f"Error in agent processing: {e}")
-            fallback_response = self._handle_fallback(question, task_type)
-            return fallback_response
-    def _handle_fallback(self, question: str, task_type: str) -> str:
-        """Provide fallback responses when the main model fails"""
-        if task_type == "calculation":
-            # Try to extract and calculate simple expressions
-            try:
-                numbers = re.findall(r'-?\d+\.?\d*', question)
-                if len(numbers) >= 2:
-                    if "+" in question:
-                        result = sum(float(n) for n in numbers)
-                        return f"The sum is {result}"
-                    elif "*" in question or "multiply" in question.lower():
-                        result = 1
-                        for n in numbers:
-                            result *= float(n)
-                        return f"The product is {result}"
-            except:
-                pass
-        return f"I understand you're asking about: {question}. This appears to be a {task_type} task. Let me provide my best analysis based on the available information."
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the EnhancedGAIAAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
@@ -279,17 +378,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Enhanced Agent
     try:
-        print("Initializing Enhanced GAIA Agent...")
-        agent = EnhancedGAIAAgent()
-        print("✅ Agent initialized successfully!")
     except Exception as e:
-        print(f"❌ Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(f"Agent code URL: {agent_code}")
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
@@ -300,83 +400,80 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         if not questions_data:
             print("Fetched questions list is empty.")
             return "Fetched questions list is empty or invalid format.", None
-        print(f"✅ Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
-        print(f"❌ Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-        print(f"❌ Error decoding JSON response from questions endpoint: {e}")
         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
-        print(f"❌ An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run Enhanced Agent
     results_log = []
     answers_payload = []
-    print(f"🚀 Running enhanced agent on {len(questions_data)} questions...")
-    for i, item in enumerate(questions_data, 1):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
-            print(f"⚠️  Skipping item with missing task_id or question: {item}")
             continue
-        print(f"📝 Processing question {i}/{len(questions_data)} (ID: {task_id})")
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
                 "Task ID": task_id,
-                "Question": question_text[:200] + "..." if len(question_text) > 200 else question_text,
-                "Submitted Answer": submitted_answer[:300] + "..." if len(submitted_answer) > 300 else submitted_answer
             })
-            print(f"✅ Completed question {i}")
         except Exception as e:
-            print(f"❌ Error running agent on task {task_id}: {e}")
-            error_response = f"AGENT ERROR: {e}"
-            answers_payload.append({"task_id": task_id, "submitted_answer": error_response})
             results_log.append({
                 "Task ID": task_id,
-                "Question": question_text[:200] + "..." if len(question_text) > 200 else question_text,
-                "Submitted Answer": error_response
             })
     if not answers_payload:
-        print("❌ Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {
-        "username": username.strip(),
-        "agent_code": agent_code,
-        "answers": answers_payload
-    }
-    print(f"📤 Submitting {len(answers_payload)} answers for user '{username}'...")
     # 5. Submit
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=120)  # Increased timeout
         response.raise_for_status()
         result_data = response.json()
         final_status = (
-            f"🎉 Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
-        print("✅ Submission successful!")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
@@ -384,56 +481,65 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
         except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"❌ Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
-        status_message = f"❌ An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
-with gr.Blocks(title="Enhanced GAIA Agent") as demo:
-    gr.Markdown("# 🚀 Enhanced GAIA Agent with Mistral-7B")
     gr.Markdown(
         """
-        **Enhanced Features:**
-        - 🧠 **Mistral-7B-Instruct** for advanced reasoning
-        - 🔧 **Tool Integration** for calculations and data processing
-        - 📊 **Task Type Detection** for optimized responses
-        - 🎯 **GAIA-Optimized** prompting strategies
         **Instructions:**
-        1. Clone this space and ensure you have access to Mistral-7B-Instruct
         2. Log in to your Hugging Face account using the button below
-        3. Click 'Run Enhanced Evaluation' to process all questions with the enhanced agent
-        **Note:** The enhanced agent uses Mistral-7B which requires significant computational resources.
-        Processing may take several minutes depending on the number of questions.
         """
     )
-    with gr.Row():
-        gr.LoginButton()
-    with gr.Row():
-        run_button = gr.Button("🚀 Run Enhanced Evaluation & Submit All Answers", variant="primary")
-    status_output = gr.Textbox(
-        label="📊 Run Status / Submission Result",
-        lines=8,
-        interactive=False
-    )
-    results_table = gr.DataFrame(
-        label="📝 Questions and Agent Answers",
-        wrap=True,
-        height=400
-    )
     run_button.click(
         fn=run_and_submit_all,
@@ -441,33 +547,39 @@ with gr.Blocks(title="Enhanced GAIA Agent") as demo:
     )
 if __name__ == "__main__":
-    print("\n" + "="*50)
-    print("🚀 ENHANCED GAIA AGENT STARTING")
-    print("="*50)
     # Environment check
     space_host = os.getenv("SPACE_HOST")
     space_id = os.getenv("SPACE_ID")
     if space_host:
         print(f"✅ SPACE_HOST: {space_host}")
-        print(f"🌐 Runtime URL: https://{space_host}.hf.space")
     else:
-        print("ℹ️  Running locally - SPACE_HOST not found")
     if space_id:
         print(f"✅ SPACE_ID: {space_id}")
-        print(f"📁 Repo URL: https://huggingface.co/spaces/{space_id}")
     else:
         print("ℹ️  SPACE_ID not found")
-    # GPU/CPU check
-    if torch.cuda.is_available():
-        print(f"🎮 GPU Available: {torch.cuda.get_device_name()}")
-        print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
     else:
-        print("💻 Running on CPU (GPU not available)")
-    print("="*50)
-    print("🚀 Launching Enhanced GAIA Agent Interface...")
     demo.launch(debug=True, share=False)

 import pandas as pd
 import json
 import re
+import time
+from typing import List, Dict, Any, Optional
 from datetime import datetime
+import threading
+import queue
+from ctransformers import AutoModelForCausalLM
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+class WebSearchTool:
+    """Web search tool using Serper API for real-time information retrieval"""
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+        self.base_url = "https://google.serper.dev/search"
+    def search(self, query: str, num_results: int = 5) -> Dict[str, Any]:
+        """Perform web search and return structured results"""
         try:
+            headers = {
+                'X-API-KEY': self.api_key,
+                'Content-Type': 'application/json'
+            }
+            payload = {
+                'q': query,
+                'num': num_results,
+                'gl': 'us',
+                'hl': 'en'
+            }
+            response = requests.post(self.base_url, json=payload, headers=headers, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            # Extract and format results
+            results = []
+            if 'organic' in data:
+                for item in data['organic'][:num_results]:
+                    results.append({
+                        'title': item.get('title', ''),
+                        'snippet': item.get('snippet', ''),
+                        'link': item.get('link', ''),
+                        'position': item.get('position', 0)
+                    })
+            return {
+                'success': True,
+                'results': results,
+                'query': query,
+                'total_results': len(results)
+            }
         except Exception as e:
+            logger.error(f"Web search error: {e}")
+            return {
+                'success': False,
+                'error': str(e),
+                'results': [],
+                'query': query,
+                'total_results': 0
+            }
+class CalculatorTool:
+    """Enhanced calculator tool for mathematical operations"""
+    def calculate(self, expression: str) -> Dict[str, Any]:
+        """Safely evaluate mathematical expressions"""
         try:
+            # Clean the expression
+            expression = expression.strip()
+            # Replace common mathematical functions
+            expression = expression.replace('^', '**')  # Power operator
+            expression = re.sub(r'\b(\d+)x(\d+)\b', r'\1*\2', expression)  # Handle multiplication like 5x3
+            # Allow only safe mathematical operations
+            allowed_chars = set('0123456789+-*/().,eE pi')
+            allowed_funcs = ['abs', 'round', 'min', 'max', 'sum', 'pow', 'sqrt']
+            # Basic safety check
+            if any(char.isalpha() and char not in 'pie' for char in expression):
+                # Check if it contains allowed function names
+                import math
+                safe_dict = {
+                    "__builtins__": {},
+                    "abs": abs, "round": round, "min": min, "max": max,
+                    "sum": sum, "pow": pow, "sqrt": math.sqrt,
+                    "pi": math.pi, "e": math.e,
+                    "sin": math.sin, "cos": math.cos, "tan": math.tan,
+                    "log": math.log, "log10": math.log10,
+                    "exp": math.exp, "floor": math.floor, "ceil": math.ceil
+                }
+                result = eval(expression, safe_dict)
+            else:
+                result = eval(expression)
+            return {
+                'success': True,
+                'result': result,
+                'expression': expression
+            }
         except Exception as e:
+            logger.error(f"Calculator error: {e}")
+            return {
+                'success': False,
+                'error': str(e),
+                'expression': expression,
+                'result': None
+            }
+class LocalLLMManager:
+    """Manages local quantized LLM for reasoning"""
+    def __init__(self):
+        self.model = None
+        self.model_loaded = False
+        self.load_lock = threading.Lock()
+    def load_model(self):
+        """Load quantized model optimized for CPU inference"""
+        with self.load_lock:
+            if self.model_loaded:
+                return
+            try:
+                logger.info("Loading quantized model...")
+                # Use Phi-3-mini for better performance on CPU with limited resources
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    "microsoft/Phi-3-mini-4k-instruct-gguf",
+                    model_file="Phi-3-mini-4k-instruct-q4.gguf",
+                    model_type="phi3",
+                    gpu_layers=0,  # CPU only
+                    context_length=3072,  # Reduced context to save memory
+                    max_new_tokens=512,
+                    temperature=0.1,
+                    top_p=0.9,
+                    repetition_penalty=1.1
+                )
+                self.model_loaded = True
+                logger.info("Model loaded successfully")
+            except Exception as e:
+                logger.error(f"Error loading model: {e}")
+                # Fallback to a smaller model if Phi-3 fails
+                try:
+                    logger.info("Trying fallback model...")
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
+                        model_file="tinyllama-1.1b-chat-v1.0.q4_k_m.gguf",
+                        model_type="llama",
+                        gpu_layers=0,
+                        context_length=2048,
+                        max_new_tokens=256
+                    )
+                    self.model_loaded = True
+                    logger.info("Fallback model loaded successfully")
+                except Exception as e2:
+                    logger.error(f"Fallback model also failed: {e2}")
+                    raise
+    def generate(self, prompt: str, max_tokens: int = 256) -> str:
+        """Generate response from local model"""
+        if not self.model_loaded:
+            self.load_model()
+        if not self.model:
+            return "Error: Model not available"
+        try:
+            # Format prompt for Phi-3
+            formatted_prompt = f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n"
+            response = self.model(
+                formatted_prompt,
+                max_new_tokens=min(max_tokens, 256),  # Limit tokens for speed
+                temperature=0.1,
+                stop=["<|end|>", "<|user|>"]
+            )
+            # Clean response
+            response = response.replace(formatted_prompt, "").strip()
+            if "<|end|>" in response:
+                response = response.split("<|end|>")[0].strip()
+            return response
         except Exception as e:
+            logger.error(f"Generation error: {e}")
+            return f"Error generating response: {e}"
+class GAIAAgent:
+    """Advanced GAIA agent with reasoning, tools, and multi-step problem solving"""
+    def __init__(self):
+        # Initialize tools
+        self.serper_api_key = os.getenv("SERPER_API_KEY")
+        if not self.serper_api_key:
+            logger.warning("SERPER_API_KEY not found. Web search will be disabled.")
+            self.web_search = None
+        else:
+            self.web_search = WebSearchTool(self.serper_api_key)
+        self.calculator = CalculatorTool()
+        self.llm = LocalLLMManager()
+        # Agent configuration
+        self.max_iterations = 5
+        self.max_reasoning_length = 1000
+        logger.info("GAIA Agent initialized")
+    def _identify_question_type(self, question: str) -> str:
+        """Identify the type of question to determine approach"""
         question_lower = question.lower()
+        if any(word in question_lower for word in ['calculate', 'compute', 'math', '+', '-', '*', '/', '=', 'sum', 'multiply', 'divide']):
+            return 'mathematical'
+        elif any(word in question_lower for word in ['current', 'latest', 'recent', 'today', 'now', '2024', '2025']):
+            return 'current_info'
+        elif any(word in question_lower for word in ['who', 'what', 'where', 'when', 'why', 'how']):
+            return 'factual'
+        elif any(word in question_lower for word in ['analyze', 'compare', 'explain', 'reason']):
+            return 'analytical'
         else:
+            return 'general'
+    def _use_web_search(self, query: str) -> str:
+        """Use web search tool and format results"""
+        if not self.web_search:
+            return "Web search not available (API key missing)"
+        results = self.web_search.search(query, num_results=3)
+        if not results['success']:
+            return f"Search failed: {results.get('error', 'Unknown error')}"
+        if not results['results']:
+            return "No search results found"
+        formatted_results = f"Search results for '{query}':\n"
+        for i, result in enumerate(results['results'], 1):
+            formatted_results += f"{i}. {result['title']}\n   {result['snippet']}\n\n"
+        return formatted_results
+    def _use_calculator(self, expression: str) -> str:
+        """Use calculator tool and format result"""
+        result = self.calculator.calculate(expression)
+        if result['success']:
+            return f"Calculation: {result['expression']} = {result['result']}"
+        else:
+            return f"Calculation error: {result['error']}"
+    def _generate_reasoning(self, question: str, context: str = "") -> str:
+        """Generate reasoning step using local LLM"""
+        reasoning_prompt = f"""Question: {question}
+Context: {context}
+Think step by step about this question. Consider:
+1. What information do I need?
+2. What tools might help?
+3. How should I approach this problem?
+Provide a clear reasoning step:"""
+        try:
+            reasoning = self.llm.generate(reasoning_prompt, max_tokens=200)
+            return reasoning
+        except Exception as e:
+            logger.error(f"Reasoning generation error: {e}")
+            return "Unable to generate reasoning step"
+    def _generate_final_answer(self, question: str, context: str, reasoning_steps: List[str]) -> str:
+        """Generate final answer using all available information"""
+        all_reasoning = "\n".join([f"Step {i+1}: {step}" for i, step in enumerate(reasoning_steps)])
+        answer_prompt = f"""Question: {question}
+Context and Information:
+{context}
+Reasoning Steps:
+{all_reasoning}
+Based on all the information and reasoning above, provide a clear, concise, and accurate final answer to the question:"""
+        try:
+            answer = self.llm.generate(answer_prompt, max_tokens=200)
+            return answer.strip()
+        except Exception as e:
+            logger.error(f"Answer generation error: {e}")
+            return "Unable to generate final answer"
+    def __call__(self, question: str) -> str:
+        """Main agent execution method"""
+        logger.info(f"Processing question: {question[:100]}...")
         try:
+            # Initialize
+            context = ""
+            reasoning_steps = []
+            question_type = self._identify_question_type(question)
+            logger.info(f"Question type identified: {question_type}")
+            # Step 1: Initial reasoning
+            initial_reasoning = self._generate_reasoning(question)
+            reasoning_steps.append(initial_reasoning)
+            context += f"Initial reasoning: {initial_reasoning}\n\n"
+            # Step 2: Apply tools based on question type
+            if question_type == 'mathematical':
+                # Try to extract mathematical expressions
+                math_matches = re.findall(r'[\d\+\-\*/\(\)\.\s\^]+', question)
+                for match in math_matches:
+                    if len(match.strip()) > 3:  # Avoid single digits
+                        calc_result = self._use_calculator(match.strip())
+                        context += f"Calculation: {calc_result}\n"
+            elif question_type in ['current_info', 'factual']:
+                # Use web search for factual or current information
+                search_result = self._use_web_search(question)
+                context += f"Web search results: {search_result}\n"
+            # Step 3: Additional reasoning with context
+            if context:
+                additional_reasoning = self._generate_reasoning(question, context)
+                reasoning_steps.append(additional_reasoning)
+                context += f"Additional reasoning: {additional_reasoning}\n\n"
+            # Step 4: Generate final answer
+            final_answer = self._generate_final_answer(question, context, reasoning_steps)
+            logger.info(f"Generated answer: {final_answer[:100]}...")
+            return final_answer
         except Exception as e:
+            logger.error(f"Agent execution error: {e}")
+            return f"Error processing question: {str(e)}"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
+    Fetches all questions, runs the GAIA Agent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
     if profile:
         username = f"{profile.username}"
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
+        print("Initializing GAIA Agent...")
+        agent = GAIAAgent()
+        print("GAIA Agent initialized successfully")
     except Exception as e:
+        print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # Agent code link
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(f"Agent code: {agent_code}")
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
         if not questions_data:
             print("Fetched questions list is empty.")
             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run GAIA Agent
     results_log = []
     answers_payload = []
+    print(f"Running GAIA agent on {len(questions_data)} questions...")
+    for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
             continue
+        print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
         try:
+            start_time = time.time()
             submitted_answer = agent(question_text)
+            processing_time = time.time() - start_time
+            print(f"Question {task_id} processed in {processing_time:.2f}s")
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
                 "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer,
+                "Processing Time (s)": f"{processing_time:.2f}"
             })
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
             results_log.append({
                 "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": f"AGENT ERROR: {e}",
+                "Processing Time (s)": "Error"
             })
     if not answers_payload:
+        print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
     # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
+            f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
+        print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
         except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
+with gr.Blocks(title="GAIA Agent Evaluation") as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
+        **Advanced GAIA Agent Features:**
+        - 🧠 Local quantized LLM for reasoning (Phi-3-mini optimized for CPU)
+        - 🔍 Web search capabilities via Serper API
+        - 🧮 Mathematical calculation tools
+        - 🎯 Multi-step problem solving approach
+        - 🚀 Optimized for 16GB RAM / 2 vCPU constraints
         **Instructions:**
+        1. Ensure your SERPER_API_KEY environment variable is set for web search
         2. Log in to your Hugging Face account using the button below
+        3. Click 'Run GAIA Evaluation' to start the comprehensive evaluation
+        **Note:** Initial model loading may take 1-2 minutes. Subsequent questions will be processed faster.
         """
     )
+    gr.LoginButton()
+    run_button = gr.Button("🚀 Run GAIA Evaluation & Submit All Answers", variant="primary")
+    status_output = gr.Textbox(label="📊 Evaluation Status & Results", lines=8, interactive=False)
+    results_table = gr.DataFrame(label="📋 Detailed Question Results", wrap=True)
+    # Add system info
+    with gr.Accordion("🔧 System Information", open=False):
+        gr.Markdown(f"""
+        - **Environment**: Hugging Face Space
+        - **Resources**: 16GB RAM, 2 vCPU
+        - **Model**: Phi-3-mini-4k-instruct (quantized)
+        - **Web Search**: {'✅ Enabled' if os.getenv('SERPER_API_KEY') else '❌ Disabled (no API key)'}
+        - **Calculator**: ✅ Enabled
+        - **Timestamp**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}
+        """)
     run_button.click(
         fn=run_and_submit_all,
     )
 if __name__ == "__main__":
+    print("\n" + "="*70)
+    print("🚀 GAIA AGENT EVALUATION SYSTEM STARTING")
+    print("="*70)
     # Environment check
     space_host = os.getenv("SPACE_HOST")
     space_id = os.getenv("SPACE_ID")
+    serper_key = os.getenv("SERPER_API_KEY")
     if space_host:
         print(f"✅ SPACE_HOST: {space_host}")
+        print(f"   🌐 Runtime URL: https://{space_host}.hf.space")
     else:
+        print("ℹ️  Running locally (SPACE_HOST not found)")
     if space_id:
         print(f"✅ SPACE_ID: {space_id}")
+        print(f"   📁 Repo URL: https://huggingface.co/spaces/{space_id}")
     else:
         print("ℹ️  SPACE_ID not found")
+    if serper_key:
+        print("✅ SERPER_API_KEY: Configured")
     else:
+        print("⚠️  SERPER_API_KEY: Not found - Web search will be disabled")
+    print("="*70)
+    print("📚 GAIA Agent Features:")
+    print("  🧠 Local LLM reasoning")
+    print("  🔍 Web search integration")
+    print("  🧮 Mathematical calculations")
+    print("  🎯 Multi-step problem solving")
+    print("="*70 + "\n")
+    print("🎯 Launching GAIA Agent Evaluation Interface...")
     demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,13 +1,13 @@
-torch>=2.0.0
 transformers>=4.35.0
-requests>=2.25.0
-pandas>=1.3.0
-numpy>=1.21.0
-duckduckgo-search>=3.8.0
-pdfminer.six>=20220524
-beautifulsoup4>=4.9.0
-html2text>=2020.1.16
-numexpr>=2.8.0
-python-dotenv>=0.19.0
 accelerate>=0.20.0
 sentencepiece>=0.1.99

+gradio>=4.0.0
 transformers>=4.35.0
+torch>=2.0.0
+pandas>=1.5.0
+requests>=2.28.0
+beautifulsoup4>=4.11.0
+wikipedia>=1.4.0
+smolagents>=0.1.0
 accelerate>=0.20.0
 sentencepiece>=0.1.99
+openpyxl
+PyPDF2
+pillow