Final_Assignment_Template

Sleeping

App Files Files Community

Kackle commited on Jun 30, 2025

Commit

7837d72

verified ·

1 Parent(s): 1d0f146

trying a different approach with original idea

Browse files

Files changed (1) hide show

app.py +277 -210

app.py CHANGED Viewed

@@ -8,9 +8,9 @@ import aiohttp
 import time
 import random
 import json
-import re
 from smolagents import FinalAnswerTool, Tool, tool, OpenAIServerModel, DuckDuckGoSearchTool, CodeAgent, VisitWebpageTool
-from gemini_agent import GeminiAgent  # Assuming you have a GeminiAgent class defined in gemini_agent.py
 from dotenv import load_dotenv
@@ -21,223 +21,276 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 OPENAI_TOKEN = os.getenv("OPENAI_API_KEY")
-# --- Custom Tools for Better Reasoning ---
-class TrickQuestionDetector(Tool):
-    """Detects and handles trick questions"""
     def __init__(self):
         super().__init__()
-        self.name = "trick_detector"
-        self.description = "Analyze if a question is a trick question and provide guidance"
-        self.inputs = {"question": {"type": "string", "description": "The question to analyze"}}
-    def detect_trick(self, question: str) -> str:
-        """Detect common trick question patterns"""
-        q_lower = question.lower()
-        # Reverse text tricks - check if question might be reversed
-        reversed_q = question[::-1]
-        if len(question) > 5 and any(c.isalpha() for c in question):
-            # Simple heuristic: if reversed version has common English patterns
-            if any(word in reversed_q.lower() for word in ['the', 'and', 'what', 'how', 'when', 'where']):
-                return f"TRICK DETECTED: This appears to be reversed text. Decoded: '{reversed_q}'"
-        # Word puzzles
-        if 'rewsna' in question or 'tfel' in question:
-            return "TRICK DETECTED: Contains reversed words. Try reading backwards."
-        # Contradictory statements
-        contradiction_words = ['impossible', 'never', 'always', 'none', 'all']
-        if sum(word in q_lower for word in contradiction_words) >= 2:
-            return "TRICK DETECTED: Contains contradictory terms. Look for logical impossibilities."
-        # Mathematical tricks
-        if any(phrase in q_lower for phrase in ['how many', 'total', 'sum']) and 'zero' in q_lower:
-            return "TRICK DETECTED: Mathematical trick involving zero or impossible calculations."
-        return "No obvious trick detected. Proceed with normal analysis."
-class StepByStepReasoner(Tool):
-    """Breaks down complex questions into steps"""
     def __init__(self):
         super().__init__()
-        self.name = "step_reasoner"
-        self.description = "Break down complex questions into logical steps"
-        self.inputs = {"question": {"type": "string", "description": "The question to break down"}}
-    def reason_steps(self, question: str) -> str:
-        """Break question into reasoning steps"""
-        steps = []
-        q_lower = question.lower()
-        # Identify question components
-        if any(word in q_lower for word in ['who', 'what', 'when', 'where', 'why', 'how']):
-            steps.append("1. Identify the specific information being requested")
-        if any(word in q_lower for word in ['between', 'from', 'to', 'during']):
-            steps.append("2. Note the time period or range specified")
-        if any(word in q_lower for word in ['calculate', 'count', 'how many', 'total']):
-            steps.append("3. Determine what needs to be calculated or counted")
-        if any(word in q_lower for word in ['wikipedia', 'article', 'featured']):
-            steps.append("4. Consider Wikipedia-specific processes and history")
-        if any(word in q_lower for word in ['only', 'single', 'one', 'unique']):
-            steps.append("5. Focus on finding the single/unique answer requested")
-        steps.append("6. Verify the answer makes logical sense")
-        return "REASONING STEPS:\n" + "\n".join(steps)
-class FactChecker(Tool):
-    """Validates factual claims and provides confidence levels"""
     def __init__(self):
         super().__init__()
-        self.name = "fact_checker"
-        self.description = "Check factual accuracy and provide confidence assessment"
-        self.inputs = {"claim": {"type": "string", "description": "The claim to fact-check"}}
-    def check_facts(self, claim: str) -> str:
-        """Assess factual accuracy of a claim"""
-        confidence_indicators = {
-            'high': ['wikipedia', 'well-known', 'documented', 'official', 'verified'],
-            'medium': ['likely', 'probably', 'appears', 'seems', 'reported'],
-            'low': ['unclear', 'uncertain', 'possibly', 'might', 'could be']
-        }
-        claim_lower = claim.lower()
-        # Check for confidence indicators
-        high_conf = sum(1 for word in confidence_indicators['high'] if word in claim_lower)
-        medium_conf = sum(1 for word in confidence_indicators['medium'] if word in claim_lower)
-        low_conf = sum(1 for word in confidence_indicators['low'] if word in claim_lower)
-        if high_conf > medium_conf and high_conf > low_conf:
-            return f"CONFIDENCE: HIGH - Claim appears to be well-documented: '{claim}'"
-        elif low_conf > high_conf:
-            return f"CONFIDENCE: LOW - Claim contains uncertainty markers: '{claim}'"
         else:
-            return f"CONFIDENCE: MEDIUM - Standard factual claim: '{claim}'"
-class AnswerValidator(Tool):
-    """Validates if an answer makes sense for the question"""
-    def __init__(self):
-        super().__init__()
-        self.name = "answer_validator"
-        self.description = "Validate if an answer is reasonable for the given question"
-        self.inputs = {"question": {"type": "string", "description": "The question"}, "answer": {"type": "string", "description": "The answer to validate"}}
-    def validate_answer(self, question: str, answer: str) -> str:
-        """Check if answer is reasonable for the question"""
-        q_lower = question.lower()
-        a_lower = answer.lower()
-        # Check for question-answer type matching
-        if 'who' in q_lower and not any(indicator in a_lower for indicator in ['person', 'user', 'editor', 'author', 'name']):
-            return "WARNING: 'Who' question but answer doesn't seem to identify a person"
-        if 'when' in q_lower and not any(indicator in a_lower for indicator in ['year', 'date', 'time', '20', '19']):
-            return "WARNING: 'When' question but answer doesn't contain time information"
-        if 'how many' in q_lower and not any(char.isdigit() for char in answer):
-            return "WARNING: 'How many' question but answer contains no numbers"
-        if len(answer.strip()) < 3:
-            return "WARNING: Answer seems too short"
-        if len(answer.strip()) > 200:
-            return "WARNING: Answer seems too long - may need to be more concise"
-        return "VALIDATION: Answer format appears appropriate for question type"
-# --- Enhanced Agent with Tools ---
 class SlpMultiAgent:
     def __init__(self):
-        print("Enhanced Agent initialized with reasoning tools.")
-        self.trick_detector = TrickQuestionDetector()
-        self.step_reasoner = StepByStepReasoner()
-        self.fact_checker = FactChecker()
-        self.answer_validator = AnswerValidator()
     async def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        # Step 1: Check for tricks
-        trick_analysis = self.trick_detector.detect_trick(question)
-        print(f"Trick analysis: {trick_analysis}")
-        # Step 2: Break down reasoning steps
-        reasoning_steps = self.step_reasoner.reason_steps(question)
-        print(f"Reasoning steps: {reasoning_steps}")
-        # Step 3: Enhanced model call with tool insights
-        model = OpenAIServerModel(
-            model_id="gpt-4o-mini",
-            temperature=0.1,
-            max_tokens=1000
         )
-        try:
-            enhanced_prompt = f"""You are an expert problem solver. Analyze this question carefully:
-QUESTION: {question}
-TRICK ANALYSIS: {trick_analysis}
-{reasoning_steps}
-Instructions:
-1. If a trick was detected, handle it appropriately
-2. Follow the reasoning steps systematically
-3. Think through each step carefully
-4. Provide a clear, direct answer
-5. If unsure, state your uncertainty clearly
-Be precise and thorough in your analysis."""
-            messages = [
-                {
-                    "role": "system",
-                    "content": "You are an expert at solving complex and trick questions. Always think step by step and be very careful about the exact wording of questions."
-                },
-                {
-                    "role": "user",
-                    "content": enhanced_prompt
-                }
-            ]
-            result = model(messages)
-            if result:
-                # Step 4: Validate the answer
-                validation = self.answer_validator.validate_answer(question, result)
-                print(f"Answer validation: {validation}")
-                # Clean up the result
-                lines = result.strip().split('\n')
-                for line in reversed(lines):
-                    line = line.strip()
-                    if line and len(line) > 5 and not line.startswith(('Step', 'Analysis', 'TRICK', 'REASONING')):
-                        # Remove common prefixes
-                        line = re.sub(r'^(Answer:|Final answer:|The answer is:?)\s*', '', line, flags=re.IGNORECASE)
-                        if line:
-                            return line
-                return result
-            else:
-                return "I don't have enough information to answer this question accurately."
-        except Exception as e:
-            print(f"Model call failed: {e}")
-            return "I apologize, but I'm currently experiencing technical difficulties."
 def check_reasoning(final_answer, agent_memory):
     return True
@@ -268,7 +321,7 @@ async def run_and_submit_all(profile):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = GeminiAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -302,8 +355,8 @@ async def run_and_submit_all(profile):
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    # Process questions with controlled concurrency
-    semaphore = asyncio.Semaphore(2)  # Process 2 questions at a time
     async def process_question(item):
         task_id = item.get("task_id")
@@ -313,16 +366,27 @@ async def run_and_submit_all(profile):
             return None
         async with semaphore:
-            try:
-                print(f"Processing task {task_id}")
-                submitted_answer = await agent(question_text)
-                return {"task_id": task_id, "submitted_answer": submitted_answer,
-                        "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}}
-            except Exception as e:
-                print(f"Error running agent on task {task_id}: {e}")
-                default_answer = "I don't have enough information to answer this question accurately."
-                return {"task_id": task_id, "submitted_answer": default_answer,
-                        "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": default_answer}}
     # Create tasks for all questions
     tasks = [process_question(item) for item in questions_data]
@@ -394,17 +458,20 @@ async def run_and_submit_all(profile):
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
@@ -455,5 +522,5 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)

 import time
 import random
 import json
+import google.generativeai as genai
 from smolagents import FinalAnswerTool, Tool, tool, OpenAIServerModel, DuckDuckGoSearchTool, CodeAgent, VisitWebpageTool
 from dotenv import load_dotenv
 OPENAI_TOKEN = os.getenv("OPENAI_API_KEY")
+GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
+GOOGLE_SEARCH_API_KEY = os.getenv('GOOGLE_SEARCH_API_KEY')
+GOOGLE_SEARCH_CX = os.getenv('GOOGLE_SEARCH_CX')
+# Configure Gemini
+if GOOGLE_API_KEY:
+    genai.configure(api_key=GOOGLE_API_KEY)
+# --- Custom Tools ---
+class GoogleSearchTool(Tool):
+    name = "google_search"
+    description = "Search Google for current information and facts"
+    inputs = {"query": {"type": "string", "description": "The search query for Google"}}
+    output_type = "string"
     def __init__(self):
         super().__init__()
+        self.is_initialized = True
+        self.google_search_api_key = GOOGLE_SEARCH_API_KEY
+        self.google_search_cx = GOOGLE_SEARCH_CX
+    def forward(self, query: str) -> str:
+        """Perform a Google search using the Custom Search API"""
+        if not self.google_search_api_key or not self.google_search_cx:
+            return f"Google Search API not configured. Query was: {query}"
+        try:
+            url = "https://www.googleapis.com/customsearch/v1"
+            params = {
+                'key': self.google_search_api_key,
+                'cx': self.google_search_cx,
+                'q': query,
+                'num': 5
+            }
+            response = requests.get(url, params=params, timeout=10)
+            if response.status_code != 200:
+                return f"Google Search failed with status {response.status_code}"
+            results = response.json()
+            if 'items' not in results:
+                return f"No search results found for: {query}"
+            # Format search results
+            formatted_results = f"Google search results for '{query}':\n\n"
+            for item in results['items']:
+                title = item.get('title', 'No title')
+                snippet = item.get('snippet', 'No description')
+                formatted_results += f"• {title}: {snippet}\n"
+            return formatted_results[:1000]  # Limit length
+        except Exception as e:
+            return f"Google Search error for '{query}': {str(e)}"
+class KnowledgeBaseTool(Tool):
+    name = "knowledge_base"
+    description = "Access structured knowledge for common topics"
+    inputs = {"topic": {"type": "string", "description": "The topic to look up"}}
+    output_type = "string"
     def __init__(self):
         super().__init__()
+        self.is_initialized = True
+        # Common knowledge base
+        self.knowledge = {
+            "olympics": "Olympic Games data: Countries, athletes, years, sports",
+            "countries": "Country codes: ISO, IOC, FIFA codes and country information",
+            "sports": "Sports history, rules, famous athletes and events",
+            "science": "Scientific facts, formulas, discoveries, and researchers",
+            "history": "Historical events, dates, people, and places",
+            "geography": "Countries, capitals, populations, and geographical features"
+        }
+    def forward(self, topic: str) -> str:
+        topic_lower = topic.lower()
+        for key, info in self.knowledge.items():
+            if key in topic_lower:
+                return f"Knowledge base: {info}. Use this context to answer questions about {topic}."
+        return f"No specific knowledge base entry for '{topic}'. Use general reasoning."
+class WikipediaSearchTool(Tool):
+    name = "wikipedia_search"
+    description = "Search Wikipedia for information"
+    inputs = {"query": {"type": "string", "description": "The search query for Wikipedia"}}
+    output_type = "string"
     def __init__(self):
         super().__init__()
+        self.is_initialized = True
+    def forward(self, query: str) -> str:
+        """Search Wikipedia with simple fallback."""
+        try:
+            import requests
+            wiki_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + query.replace(" ", "_")
+            response = requests.get(wiki_url, timeout=2)
+            if response.status_code == 200:
+                data = response.json()
+                if 'extract' in data and data['extract']:
+                    return f"Wikipedia: {data['extract'][:500]}"  # Limit length
+        except Exception as e:
+            print(f"Wikipedia search failed: {e}")
+        return f"Wikipedia search unavailable for '{query}'. Use your knowledge to answer."
+# --- Gemini Model Wrapper ---
+class GeminiModel:
+    def __init__(self, model_name="gemini-2.0-flash", temperature=0.0, max_tokens=500):
+        self.model_name = model_name
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+        if GOOGLE_API_KEY:
+            self.model = genai.GenerativeModel(model_name)
         else:
+            self.model = None
+            print("Warning: Google API key not found, falling back to OpenAI")
+    def generate_content(self, prompt):
+        if self.model:
+            try:
+                response = self.model.generate_content(
+                    prompt,
+                    generation_config=genai.types.GenerationConfig(
+                        max_output_tokens=self.max_tokens,
+                        temperature=self.temperature
+                    )
+                )
+                return response.text
+            except Exception as e:
+                print(f"Gemini API error: {e}")
+                return f"Error generating response: {e}"
+        else:
+            return "Gemini model not available"
+# --- Basic Agent Definition ---
+# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class SlpMultiAgent:
     def __init__(self):
+        print("BasicAgent initialized with Gemini and Google Search.")
     async def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        fixed_answer = "This is a default answer."
+        print(f"Agent returning fixed answer: {fixed_answer}")
+        # Truncate question to avoid exceeding model context length
+        MAX_QUESTION_LENGTH = 1000
+        short_question = question  # [:MAX_QUESTION_LENGTH]
+        # Use Gemini as primary model, fallback to OpenAI
+        if GOOGLE_API_KEY:
+            model = GeminiModel(
+                model_name="gemini-2.0-flash",
+                temperature=0.0,
+                max_tokens=400
+            )
+            print("Using Gemini model")
+        else:
+            model = OpenAIServerModel(
+                model_id="gpt-3.5-turbo",
+                temperature=0.0,
+                max_tokens=400
+            )
+            print("Using OpenAI model (Gemini not available)")
+        # Create only essential agents with Google Search as first option
+        research_agent = CodeAgent(
+            tools=[GoogleSearchTool(), KnowledgeBaseTool()],  # Google Search first
+            model=model if not isinstance(model, GeminiModel) else OpenAIServerModel(model_id="gpt-3.5-turbo", temperature=0.0, max_tokens=400),
+            additional_authorized_imports=["re", "datetime"],
+            max_steps=2,
+            name="ResearchAgent",
+            verbosity_level=0,
+            description="Research agent with Google Search and knowledge lookup."
         )
+        solver_agent = CodeAgent(
+            tools=[GoogleSearchTool()],  # Add Google Search to solver too
+            model=model if not isinstance(model, GeminiModel) else OpenAIServerModel(model_id="gpt-3.5-turbo", temperature=0.0, max_tokens=400),
+            additional_authorized_imports=["math", "re", "collections", "itertools"],
+            max_steps=2,
+            name="SolverAgent",
+            verbosity_level=0,
+            description="Problem solving with Google Search capability."
+        )
+        manager_agent = CodeAgent(
+            model=OpenAIServerModel(
+                model_id="gpt-3.5-turbo",
+                temperature=0.0,
+                max_tokens=500
+            ),
+            tools=[GoogleSearchTool(), KnowledgeBaseTool()],  # Google Search first
+            managed_agents=[research_agent, solver_agent],
+            name="ManagerAgent",
+            description="Manager with Google Search and agent coordination.",
+            additional_authorized_imports=["re", "math"],
+            planning_interval=1,
+            verbosity_level=0,
+            max_steps=3,
+            final_answer_checks=[check_reasoning]
+        )
+        # Create a task for the agent run with retry mechanism for rate limits
+        max_retries = 3
+        result = None
+        for attempt in range(max_retries):
+            try:
+                loop = asyncio.get_event_loop()
+                result = await loop.run_in_executor(
+                    None,
+                    lambda: manager_agent.run(f"""
+                    Question: {short_question}
+                    You have google_search() as your PRIMARY tool, plus knowledge_base() and two agents:
+                    - ResearchAgent: For factual questions (has Google Search)
+                    - SolverAgent: For calculations and logic (has Google Search)
+                    ALWAYS try google_search() FIRST for factual questions before using other tools.
+                    IMPORTANT: Always end with exactly this format:
+                    <code>
+                    final_answer("your direct answer")
+                    </code>
+                    Be concise and direct.
+                    """)
+                )
+                break  # Success, exit retry loop
+            except Exception as e:
+                print(f"Attempt {attempt+1}/{max_retries} failed: {e}")
+                if "rate limit" in str(e).lower() and attempt < max_retries - 1:
+                    # Add jitter to avoid synchronized retries
+                    wait_time = (attempt + 1) * 10 + random.uniform(0, 5)
+                    print(f"Rate limit hit. Waiting {wait_time:.2f} seconds before retry...")
+                    await asyncio.sleep(wait_time)
+                elif attempt < max_retries - 1:
+                    await asyncio.sleep(5)  # Wait before general retry
+                else:
+                    print(f"All attempts failed. Returning default answer.")
+                    return "I apologize, but I'm currently experiencing technical difficulties. Please try again later."
+        # If we couldn't get a result after all retries
+        if result is None:
+            return "I apologize, but I'm currently experiencing technical difficulties. Please try again later."
+        # Extract clean answer from result
+        if result and isinstance(result, str):
+            # Look for final_answer pattern
+            import re
+            final_answer_match = re.search(r'final_answer\(["\']([^"\']*)["\'\)]', result)  # Fixed regex
+            if final_answer_match:
+                clean_answer = final_answer_match.group(1)
+                return clean_answer
+            # If no final_answer found, try to extract the last meaningful line
+            lines = result.strip().split('\n')
+            for line in reversed(lines):
+                line = line.strip()
+                if line and not line.startswith('#') and not line.startswith('###') and len(line) < 200:
+                    return line
+        # Return the result from the agent
+        return result if result else "Unable to determine answer."
 def check_reasoning(final_answer, agent_memory):
+    # Skip expensive validation to save costs
     return True
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = SlpMultiAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    # Process questions one at a time to avoid rate limits
+    semaphore = asyncio.Semaphore(1)  # Process 1 question at a time
     async def process_question(item):
         task_id = item.get("task_id")
             return None
         async with semaphore:
+            max_retries = 3
+            for attempt in range(max_retries):
+                try:
+                    print(f"Processing task {task_id}, attempt {attempt+1}/{max_retries}")
+                    submitted_answer = await agent(question_text)
+                    return {"task_id": task_id, "submitted_answer": submitted_answer,
+                            "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}}
+                except Exception as e:
+                    print(f"Error running agent on task {task_id}, attempt {attempt+1}: {e}")
+                    if "rate limit" in str(e).lower() and attempt < max_retries - 1:
+                        # Exponential backoff with jitter
+                        wait_time = (2 ** attempt) * 5 + random.uniform(0, 3)
+                        print(f"Rate limit hit. Waiting {wait_time:.2f} seconds before retry...")
+                        await asyncio.sleep(wait_time)
+                    elif attempt < max_retries - 1:
+                        await asyncio.sleep(5)  # Reduced wait time
+                    else:
+                        # All retries failed, return default answer
+                        default_answer = "This is a default answer."
+                        return {"task_id": task_id, "submitted_answer": default_answer,
+                                "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": default_answer}}
     # Create tasks for all questions
     tasks = [process_question(item) for item in questions_data]
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Enhanced Agent with Google Search & Gemini")
     gr.Markdown(
         """
+        **Features:**
+        - **Google Search Integration**: Primary tool for factual information
+        - **Gemini 2.0 Flash**: Advanced AI model for reasoning
+        - **Multi-Agent Architecture**: Research and Solver agents with search capabilities
         **Instructions:**
+        1.  Set up your environment variables: GOOGLE_API_KEY, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_CX
+        2.  Log in to your Hugging Face account using the button below
+        3.  Click 'Run Evaluation & Submit All Answers' to start the enhanced agent
         ---
+        **Note:** The agent will prioritize Google Search for factual questions, providing more accurate and current information.
         """
     )
     print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Enhanced Agent with Google Search & Gemini...")
     demo.launch(debug=True, share=False)