Final_Assignment_Template

Sleeping

App Files Files Community

Kackle commited on Jun 28, 2025

Commit

83861dc

verified ·

1 Parent(s): 245f5ad

logical change

Browse files

Files changed (1) hide show

app.py +209 -176

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import aiohttp
 import time
 import random
 import json
 from smolagents import FinalAnswerTool, Tool, tool, OpenAIServerModel, DuckDuckGoSearchTool, CodeAgent, VisitWebpageTool
@@ -21,182 +22,225 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 OPENAI_TOKEN = os.getenv("OPENAI_API_KEY")
-# --- Custom Tools ---
-class KnowledgeBaseTool(Tool):
-    name = "knowledge_base"
-    description = "Access structured knowledge for common topics"
-    inputs = {"topic": {"type": "string", "description": "The topic to look up"}}
-    output_type = "string"
     def __init__(self):
-        super().__init__()
-        self.is_initialized = True
-        # Common knowledge base
-        self.knowledge = {
-            "olympics": "Olympic Games data: Countries, athletes, years, sports",
-            "countries": "Country codes: ISO, IOC, FIFA codes and country information",
-            "sports": "Sports history, rules, famous athletes and events",
-            "science": "Scientific facts, formulas, discoveries, and researchers",
-            "history": "Historical events, dates, people, and places",
-            "geography": "Countries, capitals, populations, and geographical features"
-        }
-    def forward(self, topic: str) -> str:
-        topic_lower = topic.lower()
-        for key, info in self.knowledge.items():
-            if key in topic_lower:
-                return f"Knowledge base: {info}. Use this context to answer questions about {topic}."
-        return f"No specific knowledge base entry for '{topic}'. Use general reasoning."
-class WikipediaSearchTool(Tool):
-    name = "wikipedia_search"
-    description = "Search Wikipedia for information"
-    inputs = {"query": {"type": "string", "description": "The search query for Wikipedia"}}
-    output_type = "string"
     def __init__(self):
-        super().__init__()
-        self.is_initialized = True
-    def forward(self, query: str) -> str:
-        """Search Wikipedia with simple fallback."""
-        try:
-            import requests
-            wiki_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + query.replace(" ", "_")
-            response = requests.get(wiki_url, timeout=2)
-            if response.status_code == 200:
-                data = response.json()
-                if 'extract' in data and data['extract']:
-                    return f"Wikipedia: {data['extract'][:500]}"  # Limit length
-        except Exception as e:
-            print(f"Wikipedia search failed: {e}")
-        return f"Wikipedia search unavailable for '{query}'. Use your knowledge to answer."
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class SlpMultiAgent:
     def __init__(self):
-        print("BasicAgent initialized.")
-    async def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        # Truncate question to avoid exceeding model context length
-        MAX_QUESTION_LENGTH = 1000
-        short_question = question  # [:MAX_QUESTION_LENGTH]
-        # Use cheaper, faster model
-        model = OpenAIServerModel(
-            model_id="gpt-3.5-turbo",
-            temperature=0.0,  # Deterministic for consistency
-            max_tokens=400    # Reduced tokens for cost efficiency
         )
-        # Create only essential agents with reduced complexity
-        research_agent = CodeAgent(
-            tools=[KnowledgeBaseTool()],  # Remove Wikipedia to avoid timeouts
-            model=model,
-            additional_authorized_imports=["re", "datetime"],
-            max_steps=2,  # Reduced steps for cost
-            name="ResearchAgent",
-            verbosity_level=0,
-            description="Quick factual research and knowledge lookup."
-        )
-        solver_agent = CodeAgent(
-            tools=[],
-            model=model,
-            additional_authorized_imports=["math", "re", "collections", "itertools"],
-            max_steps=2,  # Reduced steps
-            name="SolverAgent",
-            verbosity_level=0,
-            description="Problem solving, calculations, and logical reasoning."
-        )
-        manager_agent = CodeAgent(
-            model=OpenAIServerModel(
-                model_id="gpt-3.5-turbo",
-                temperature=0.0,
-                max_tokens=500
-            ),
-            tools=[KnowledgeBaseTool()],  # Remove Wikipedia to avoid timeouts
-            managed_agents=[research_agent, solver_agent],  # Only 2 agents
-            name="ManagerAgent",
-            description="Efficient manager for quick problem solving.",
-            additional_authorized_imports=["re", "math"],
-            planning_interval=1,  # Faster planning
-            verbosity_level=0,  # Reduce verbosity
-            max_steps=3,  # Further reduced steps to avoid timeouts
-            final_answer_checks=[check_reasoning]
-        )
-        # Create a task for the agent run with retry mechanism for rate limits
-        max_retries = 3
-        result = None
-        for attempt in range(max_retries):
-            try:
-                loop = asyncio.get_event_loop()
-                result = await loop.run_in_executor(
-                    None,
-                    lambda: manager_agent.run(f"""
-                    Question: {short_question}
-                    You have knowledge_base() tool and two agents:
-                    - ResearchAgent: For factual questions
-                    - SolverAgent: For calculations and logic
-                    IMPORTANT: Always end with exactly this format:
-                    <code>
-                    final_answer("your direct answer")
-                    </code>
-                    Be concise and direct.
-                    """)
-                )
-                break  # Success, exit retry loop
-            except Exception as e:
-                print(f"Attempt {attempt+1}/{max_retries} failed: {e}")
-                if "rate limit" in str(e).lower() and attempt < max_retries - 1:
-                    # Add jitter to avoid synchronized retries
-                    wait_time = (attempt + 1) * 10 + random.uniform(0, 5)
-                    print(f"Rate limit hit. Waiting {wait_time:.2f} seconds before retry...")
-                    await asyncio.sleep(wait_time)
-                elif attempt < max_retries - 1:
-                    await asyncio.sleep(5)  # Wait before general retry
-                else:
-                    print(f"All attempts failed. Returning default answer.")
-                    return "I apologize, but I'm currently experiencing technical difficulties. Please try again later."
-        # If we couldn't get a result after all retries
-        if result is None:
-            return "I apologize, but I'm currently experiencing technical difficulties. Please try again later."
-        # Extract clean answer from result
-        if result and isinstance(result, str):
-            # Look for final_answer pattern
-            import re
-            final_answer_match = re.search(r'final_answer\(["\']([^"\']*)["\'\)]', result)  # Fixed regex
-            if final_answer_match:
-                clean_answer = final_answer_match.group(1)
-                return clean_answer
-            # If no final_answer found, try to extract the last meaningful line
-            lines = result.strip().split('\n')
-            for line in reversed(lines):
-                line = line.strip()
-                if line and not line.startswith('#') and not line.startswith('###') and len(line) < 200:
-                    return line
-        # Return the result from the agent
-        return result if result else "Unable to determine answer."
 def check_reasoning(final_answer, agent_memory):
-    # Skip expensive validation to save costs
     return True
@@ -261,8 +305,8 @@ async def run_and_submit_all(profile):
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    # Process questions one at a time to avoid rate limits
-    semaphore = asyncio.Semaphore(1)  # Process 1 question at a time
     async def process_question(item):
         task_id = item.get("task_id")
@@ -272,27 +316,16 @@ async def run_and_submit_all(profile):
             return None
         async with semaphore:
-            max_retries = 3
-            for attempt in range(max_retries):
-                try:
-                    print(f"Processing task {task_id}, attempt {attempt+1}/{max_retries}")
-                    submitted_answer = await agent(question_text)
-                    return {"task_id": task_id, "submitted_answer": submitted_answer,
-                            "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}}
-                except Exception as e:
-                    print(f"Error running agent on task {task_id}, attempt {attempt+1}: {e}")
-                    if "rate limit" in str(e).lower() and attempt < max_retries - 1:
-                        # Exponential backoff with jitter
-                        wait_time = (2 ** attempt) * 5 + random.uniform(0, 3)
-                        print(f"Rate limit hit. Waiting {wait_time:.2f} seconds before retry...")
-                        await asyncio.sleep(wait_time)
-                    elif attempt < max_retries - 1:
-                        await asyncio.sleep(5)  # Reduced wait time
-                    else:
-                        # All retries failed, return default answer
-                        default_answer = "This is a default answer."
-                        return {"task_id": task_id, "submitted_answer": default_answer,
-                                "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": default_answer}}
     # Create tasks for all questions
     tasks = [process_question(item) for item in questions_data]

 import time
 import random
 import json
+import re
 from smolagents import FinalAnswerTool, Tool, tool, OpenAIServerModel, DuckDuckGoSearchTool, CodeAgent, VisitWebpageTool
 OPENAI_TOKEN = os.getenv("OPENAI_API_KEY")
+# --- Custom Tools for Better Reasoning ---
+class TrickQuestionDetector(Tool):
+    """Detects and handles trick questions"""
     def __init__(self):
+        super().__init__(
+            name="trick_detector",
+            description="Analyze if a question is a trick question and provide guidance",
+            fn=self.detect_trick
+        )
+    def detect_trick(self, question: str) -> str:
+        """Detect common trick question patterns"""
+        q_lower = question.lower()
+        # Reverse text tricks
+        if question != question and any(c.isalpha() for c in question):
+            reversed_q = question[::-1]
+            if reversed_q.count(' ') > 0:
+                return f"TRICK DETECTED: This appears to be reversed text. Decoded: '{reversed_q}'"
+        # Word puzzles
+        if 'rewsna' in question or 'tfel' in question:
+            return "TRICK DETECTED: Contains reversed words. Try reading backwards."
+        # Contradictory statements
+        contradiction_words = ['impossible', 'never', 'always', 'none', 'all']
+        if sum(word in q_lower for word in contradiction_words) >= 2:
+            return "TRICK DETECTED: Contains contradictory terms. Look for logical impossibilities."
+        # Mathematical tricks
+        if any(phrase in q_lower for phrase in ['how many', 'total', 'sum']) and 'zero' in q_lower:
+            return "TRICK DETECTED: Mathematical trick involving zero or impossible calculations."
+        return "No obvious trick detected. Proceed with normal analysis."
+class StepByStepReasoner(Tool):
+    """Breaks down complex questions into steps"""
     def __init__(self):
+        super().__init__(
+            name="step_reasoner",
+            description="Break down complex questions into logical steps",
+            fn=self.reason_steps
+        )
+    def reason_steps(self, question: str) -> str:
+        """Break question into reasoning steps"""
+        steps = []
+        q_lower = question.lower()
+        # Identify question components
+        if any(word in q_lower for word in ['who', 'what', 'when', 'where', 'why', 'how']):
+            steps.append("1. Identify the specific information being requested")
+        if any(word in q_lower for word in ['between', 'from', 'to', 'during']):
+            steps.append("2. Note the time period or range specified")
+        if any(word in q_lower for word in ['calculate', 'count', 'how many', 'total']):
+            steps.append("3. Determine what needs to be calculated or counted")
+        if any(word in q_lower for word in ['wikipedia', 'article', 'featured']):
+            steps.append("4. Consider Wikipedia-specific processes and history")
+        if any(word in q_lower for word in ['only', 'single', 'one', 'unique']):
+            steps.append("5. Focus on finding the single/unique answer requested")
+        steps.append("6. Verify the answer makes logical sense")
+        return "REASONING STEPS:\n" + "\n".join(steps)
+class FactChecker(Tool):
+    """Validates factual claims and provides confidence levels"""
     def __init__(self):
+        super().__init__(
+            name="fact_checker",
+            description="Check factual accuracy and provide confidence assessment",
+            fn=self.check_facts
+        )
+    def check_facts(self, claim: str) -> str:
+        """Assess factual accuracy of a claim"""
+        confidence_indicators = {
+            'high': ['wikipedia', 'well-known', 'documented', 'official', 'verified'],
+            'medium': ['likely', 'probably', 'appears', 'seems', 'reported'],
+            'low': ['unclear', 'uncertain', 'possibly', 'might', 'could be']
+        }
+        claim_lower = claim.lower()
+        # Check for confidence indicators
+        high_conf = sum(1 for word in confidence_indicators['high'] if word in claim_lower)
+        medium_conf = sum(1 for word in confidence_indicators['medium'] if word in claim_lower)
+        low_conf = sum(1 for word in confidence_indicators['low'] if word in claim_lower)
+        if high_conf > medium_conf and high_conf > low_conf:
+            return f"CONFIDENCE: HIGH - Claim appears to be well-documented: '{claim}'"
+        elif low_conf > high_conf:
+            return f"CONFIDENCE: LOW - Claim contains uncertainty markers: '{claim}'"
+        else:
+            return f"CONFIDENCE: MEDIUM - Standard factual claim: '{claim}'"
+class AnswerValidator(Tool):
+    """Validates if an answer makes sense for the question"""
+    def __init__(self):
+        super().__init__(
+            name="answer_validator",
+            description="Validate if an answer is reasonable for the given question",
+            fn=self.validate_answer
         )
+    def validate_answer(self, question: str, answer: str) -> str:
+        """Check if answer is reasonable for the question"""
+        q_lower = question.lower()
+        a_lower = answer.lower()
+        # Check for question-answer type matching
+        if 'who' in q_lower and not any(indicator in a_lower for indicator in ['person', 'user', 'editor', 'author', 'name']):
+            return "WARNING: 'Who' question but answer doesn't seem to identify a person"
+        if 'when' in q_lower and not any(indicator in a_lower for indicator in ['year', 'date', 'time', '20', '19']):
+            return "WARNING: 'When' question but answer doesn't contain time information"
+        if 'how many' in q_lower and not any(char.isdigit() for char in answer):
+            return "WARNING: 'How many' question but answer contains no numbers"
+        if len(answer.strip()) < 3:
+            return "WARNING: Answer seems too short"
+        if len(answer.strip()) > 200:
+            return "WARNING: Answer seems too long - may need to be more concise"
+        return "VALIDATION: Answer format appears appropriate for question type"
+# --- Enhanced Agent with Tools ---
+class SlpMultiAgent:
+    def __init__(self):
+        print("Enhanced Agent initialized with reasoning tools.")
+        self.trick_detector = TrickQuestionDetector()
+        self.step_reasoner = StepByStepReasoner()
+        self.fact_checker = FactChecker()
+        self.answer_validator = AnswerValidator()
+    async def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # Step 1: Check for tricks
+        trick_analysis = self.trick_detector.detect_trick(question)
+        print(f"Trick analysis: {trick_analysis}")
+        # Step 2: Break down reasoning steps
+        reasoning_steps = self.step_reasoner.reason_steps(question)
+        print(f"Reasoning steps: {reasoning_steps}")
+        # Step 3: Enhanced model call with tool insights
+        model = OpenAIServerModel(
+            model_id="gpt-4o-mini",
+            temperature=0.1,
+            max_tokens=1000
+        )
+        try:
+            enhanced_prompt = f"""You are an expert problem solver. Analyze this question carefully:
+QUESTION: {question}
+TRICK ANALYSIS: {trick_analysis}
+{reasoning_steps}
+Instructions:
+1. If a trick was detected, handle it appropriately
+2. Follow the reasoning steps systematically
+3. Think through each step carefully
+4. Provide a clear, direct answer
+5. If unsure, state your uncertainty clearly
+Be precise and thorough in your analysis."""
+            messages = [
+                {
+                    "role": "system",
+                    "content": "You are an expert at solving complex and trick questions. Always think step by step and be very careful about the exact wording of questions."
+                },
+                {
+                    "role": "user",
+                    "content": enhanced_prompt
+                }
+            ]
+            result = model(messages)
+            if result:
+                # Step 4: Validate the answer
+                validation = self.answer_validator.validate_answer(question, result)
+                print(f"Answer validation: {validation}")
+                # Clean up the result
+                lines = result.strip().split('\n')
+                for line in reversed(lines):
+                    line = line.strip()
+                    if line and len(line) > 5 and not line.startswith(('Step', 'Analysis', 'TRICK', 'REASONING')):
+                        # Remove common prefixes
+                        line = re.sub(r'^(Answer:|Final answer:|The answer is:?)\s*', '', line, flags=re.IGNORECASE)
+                        if line:
+                            return line
+                return result
+            else:
+                return "I don't have enough information to answer this question accurately."
+        except Exception as e:
+            print(f"Model call failed: {e}")
+            return "I apologize, but I'm currently experiencing technical difficulties."
 def check_reasoning(final_answer, agent_memory):
     return True
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    # Process questions with controlled concurrency
+    semaphore = asyncio.Semaphore(2)  # Process 2 questions at a time
     async def process_question(item):
         task_id = item.get("task_id")
             return None
         async with semaphore:
+            try:
+                print(f"Processing task {task_id}")
+                submitted_answer = await agent(question_text)
+                return {"task_id": task_id, "submitted_answer": submitted_answer,
+                        "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}}
+            except Exception as e:
+                print(f"Error running agent on task {task_id}: {e}")
+                default_answer = "I don't have enough information to answer this question accurately."
+                return {"task_id": task_id, "submitted_answer": default_answer,
+                        "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": default_answer}}
     # Create tasks for all questions
     tasks = [process_question(item) for item in questions_data]