Kackle commited on
Commit
182266e
·
verified ·
1 Parent(s): 4603b9a

Reverting change as logic did not improve and more issues came

Browse files
Files changed (1) hide show
  1. app.py +176 -175
app.py CHANGED
@@ -8,7 +8,6 @@ import aiohttp
8
  import time
9
  import random
10
  import json
11
- import re
12
  from smolagents import FinalAnswerTool, Tool, tool, OpenAIServerModel, DuckDuckGoSearchTool, CodeAgent, VisitWebpageTool
13
 
14
 
@@ -22,191 +21,182 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
  OPENAI_TOKEN = os.getenv("OPENAI_API_KEY")
24
 
25
- # --- Custom Tools for Better Reasoning ---
26
-
27
- class TrickQuestionDetector(Tool):
28
- """Detects and handles trick questions"""
29
-
30
- def __init__(self):
31
- super().__init__()
32
- self.name = "trick_detector"
33
- self.description = "Analyze if a question is a trick question and provide guidance"
34
- self.inputs = {"question": {"type": "string", "description": "The question to analyze"}}
35
- self.output_type = "string"
36
-
37
- def forward(self, question: str) -> str:
38
- """Detect common trick question patterns"""
39
- q_lower = question.lower()
40
-
41
- # Reverse text tricks - check if question might be reversed
42
- reversed_q = question[::-1]
43
- if len(question) > 5 and any(c.isalpha() for c in question):
44
- # Simple heuristic: if reversed version has common English patterns
45
- if any(word in reversed_q.lower() for word in ['the', 'and', 'what', 'how', 'when', 'where']):
46
- return f"TRICK DETECTED: This appears to be reversed text. Decoded: '{reversed_q}'"
47
-
48
- # Word puzzles
49
- if 'rewsna' in question or 'tfel' in question:
50
- return "TRICK DETECTED: Contains reversed words. Try reading backwards."
51
-
52
- # Contradictory statements
53
- contradiction_words = ['impossible', 'never', 'always', 'none', 'all']
54
- if sum(word in q_lower for word in contradiction_words) >= 2:
55
- return "TRICK DETECTED: Contains contradictory terms. Look for logical impossibilities."
56
-
57
- # Mathematical tricks
58
- if any(phrase in q_lower for phrase in ['how many', 'total', 'sum']) and 'zero' in q_lower:
59
- return "TRICK DETECTED: Mathematical trick involving zero or impossible calculations."
60
-
61
- return "No obvious trick detected. Proceed with normal analysis."
62
-
63
- class StepByStepReasoner(Tool):
64
- """Breaks down complex questions into steps"""
65
 
66
  def __init__(self):
67
  super().__init__()
68
- self.name = "step_reasoner"
69
- self.description = "Break down complex questions into logical steps"
70
- self.inputs = {"question": {"type": "string", "description": "The question to break down"}}
71
- self.output_type = "string"
 
 
 
 
 
 
72
 
73
- def forward(self, question: str) -> str:
74
- """Break question into reasoning steps"""
75
- steps = []
76
- q_lower = question.lower()
77
-
78
- # Identify question components
79
- if any(word in q_lower for word in ['who', 'what', 'when', 'where', 'why', 'how']):
80
- steps.append("1. Identify the specific information being requested")
81
-
82
- if any(word in q_lower for word in ['between', 'from', 'to', 'during']):
83
- steps.append("2. Note the time period or range specified")
84
-
85
- if any(word in q_lower for word in ['calculate', 'count', 'how many', 'total']):
86
- steps.append("3. Determine what needs to be calculated or counted")
87
-
88
- if any(word in q_lower for word in ['wikipedia', 'article', 'featured']):
89
- steps.append("4. Consider Wikipedia-specific processes and history")
90
-
91
- if any(word in q_lower for word in ['only', 'single', 'one', 'unique']):
92
- steps.append("5. Focus on finding the single/unique answer requested")
93
-
94
- steps.append("6. Verify the answer makes logical sense")
95
-
96
- return "REASONING STEPS:\n" + "\n".join(steps)
97
-
98
- class FactChecker(Tool):
99
- """Validates factual claims and provides confidence levels"""
100
 
101
  def __init__(self):
102
  super().__init__()
103
- self.name = "fact_checker"
104
- self.description = "Check factual accuracy and provide confidence assessment"
105
- self.inputs = {"claim": {"type": "string", "description": "The claim to fact-check"}}
106
- self.output_type = "string"
107
-
108
- def forward(self, claim: str) -> str:
109
- """Assess factual accuracy of a claim"""
110
- confidence_indicators = {
111
- 'high': ['wikipedia', 'well-known', 'documented', 'official', 'verified'],
112
- 'medium': ['likely', 'probably', 'appears', 'seems', 'reported'],
113
- 'low': ['unclear', 'uncertain', 'possibly', 'might', 'could be']
114
- }
115
-
116
- claim_lower = claim.lower()
117
 
118
- # Check for confidence indicators
119
- high_conf = sum(1 for word in confidence_indicators['high'] if word in claim_lower)
120
- medium_conf = sum(1 for word in confidence_indicators['medium'] if word in claim_lower)
121
- low_conf = sum(1 for word in confidence_indicators['low'] if word in claim_lower)
 
 
 
 
 
 
 
 
122
 
123
- if high_conf > medium_conf and high_conf > low_conf:
124
- return f"CONFIDENCE: HIGH - Claim appears to be well-documented: '{claim}'"
125
- elif low_conf > high_conf:
126
- return f"CONFIDENCE: LOW - Claim contains uncertainty markers: '{claim}'"
127
- else:
128
- return f"CONFIDENCE: MEDIUM - Standard factual claim: '{claim}'"
129
 
130
- class AnswerValidator(Tool):
131
- """Validates if an answer makes sense for the question"""
132
-
133
  def __init__(self):
134
- super().__init__()
135
- self.name = "answer_validator"
136
- self.description = "Validate if an answer is reasonable for the given question"
137
- self.inputs = {"question": {"type": "string", "description": "The question"}, "answer": {"type": "string", "description": "The answer to validate"}}
138
- self.output_type = "string"
139
-
140
- def forward(self, question: str, answer: str) -> str:
141
- """Check if answer is reasonable for the question"""
142
- q_lower = question.lower()
143
- a_lower = answer.lower()
144
 
145
- # Check for question-answer type matching
146
- if 'who' in q_lower and not any(indicator in a_lower for indicator in ['person', 'user', 'editor', 'author', 'name']):
147
- return "WARNING: 'Who' question but answer doesn't seem to identify a person"
 
148
 
149
- if 'when' in q_lower and not any(indicator in a_lower for indicator in ['year', 'date', 'time', '20', '19']):
150
- return "WARNING: 'When' question but answer doesn't contain time information"
 
151
 
152
- if 'how many' in q_lower and not any(char.isdigit() for char in answer):
153
- return "WARNING: 'How many' question but answer contains no numbers"
 
 
 
 
154
 
155
- if len(answer.strip()) < 3:
156
- return "WARNING: Answer seems too short"
 
 
 
 
 
 
 
 
157
 
158
- if len(answer.strip()) > 200:
159
- return "WARNING: Answer seems too long - may need to be more concise"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- return "VALIDATION: Answer format appears appropriate for question type"
162
-
163
- # --- Enhanced Agent with Tools ---
164
- class SlpMultiAgent:
165
- def __init__(self):
166
- print("Enhanced Agent initialized with reasoning tools.")
167
- self.trick_detector = TrickQuestionDetector()
168
- self.step_reasoner = StepByStepReasoner()
169
- self.fact_checker = FactChecker()
170
- self.answer_validator = AnswerValidator()
171
 
172
- async def __call__(self, question: str) -> str:
173
- print(f"Agent received question (first 50 chars): {question[:50]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- # Step 1: Check for tricks
176
- trick_analysis = self.trick_detector.forward(question)
177
- print(f"Trick analysis: {trick_analysis}")
178
 
179
- # Step 2: Break down reasoning steps
180
- reasoning_steps = self.step_reasoner.forward(question)
181
- print(f"Reasoning steps: {reasoning_steps}")
182
 
183
- # Step 3: Simple direct approach without complex model calls
184
- try:
185
- # Handle trick questions directly
186
- if "TRICK DETECTED" in trick_analysis:
187
- if "reversed text" in trick_analysis:
188
- # Extract the decoded text and answer it
189
- if "opposite of the word 'left'" in trick_analysis:
190
- return "right"
191
- # For other reversed text, try to extract and answer
192
- decoded_start = trick_analysis.find("Decoded: '") + 10
193
- decoded_end = trick_analysis.find("'", decoded_start)
194
- if decoded_start > 9 and decoded_end > decoded_start:
195
- decoded_question = trick_analysis[decoded_start:decoded_end]
196
- if "opposite of the word 'left'" in decoded_question:
197
- return "right"
198
-
199
- # For non-trick questions, provide a reasonable default response
200
- # This is a minimal approach to avoid model call issues
201
- result = "I need more information to provide a specific answer to this question."
202
 
203
- return result
204
-
205
- except Exception as e:
206
- print(f"Model call failed: {e}")
207
- return "I apologize, but I'm currently experiencing technical difficulties."
208
-
 
 
 
 
209
  def check_reasoning(final_answer, agent_memory):
 
210
  return True
211
 
212
 
@@ -271,8 +261,8 @@ async def run_and_submit_all(profile):
271
  answers_payload = []
272
  print(f"Running agent on {len(questions_data)} questions...")
273
 
274
- # Process questions with controlled concurrency
275
- semaphore = asyncio.Semaphore(2) # Process 2 questions at a time
276
 
277
  async def process_question(item):
278
  task_id = item.get("task_id")
@@ -282,16 +272,27 @@ async def run_and_submit_all(profile):
282
  return None
283
 
284
  async with semaphore:
285
- try:
286
- print(f"Processing task {task_id}")
287
- submitted_answer = await agent(question_text)
288
- return {"task_id": task_id, "submitted_answer": submitted_answer,
289
- "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}}
290
- except Exception as e:
291
- print(f"Error running agent on task {task_id}: {e}")
292
- default_answer = "I don't have enough information to answer this question accurately."
293
- return {"task_id": task_id, "submitted_answer": default_answer,
294
- "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": default_answer}}
 
 
 
 
 
 
 
 
 
 
 
295
 
296
  # Create tasks for all questions
297
  tasks = [process_question(item) for item in questions_data]
 
8
  import time
9
  import random
10
  import json
 
11
  from smolagents import FinalAnswerTool, Tool, tool, OpenAIServerModel, DuckDuckGoSearchTool, CodeAgent, VisitWebpageTool
12
 
13
 
 
21
 
22
  OPENAI_TOKEN = os.getenv("OPENAI_API_KEY")
23
 
24
+ # --- Custom Tools ---
25
+ class KnowledgeBaseTool(Tool):
26
+ name = "knowledge_base"
27
+ description = "Access structured knowledge for common topics"
28
+ inputs = {"topic": {"type": "string", "description": "The topic to look up"}}
29
+ output_type = "string"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def __init__(self):
32
  super().__init__()
33
+ self.is_initialized = True
34
+ # Common knowledge base
35
+ self.knowledge = {
36
+ "olympics": "Olympic Games data: Countries, athletes, years, sports",
37
+ "countries": "Country codes: ISO, IOC, FIFA codes and country information",
38
+ "sports": "Sports history, rules, famous athletes and events",
39
+ "science": "Scientific facts, formulas, discoveries, and researchers",
40
+ "history": "Historical events, dates, people, and places",
41
+ "geography": "Countries, capitals, populations, and geographical features"
42
+ }
43
 
44
+ def forward(self, topic: str) -> str:
45
+ topic_lower = topic.lower()
46
+ for key, info in self.knowledge.items():
47
+ if key in topic_lower:
48
+ return f"Knowledge base: {info}. Use this context to answer questions about {topic}."
49
+ return f"No specific knowledge base entry for '{topic}'. Use general reasoning."
50
+
51
+ class WikipediaSearchTool(Tool):
52
+ name = "wikipedia_search"
53
+ description = "Search Wikipedia for information"
54
+ inputs = {"query": {"type": "string", "description": "The search query for Wikipedia"}}
55
+ output_type = "string"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  def __init__(self):
58
  super().__init__()
59
+ self.is_initialized = True
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ def forward(self, query: str) -> str:
62
+ """Search Wikipedia with simple fallback."""
63
+ try:
64
+ import requests
65
+ wiki_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + query.replace(" ", "_")
66
+ response = requests.get(wiki_url, timeout=2)
67
+ if response.status_code == 200:
68
+ data = response.json()
69
+ if 'extract' in data and data['extract']:
70
+ return f"Wikipedia: {data['extract'][:500]}" # Limit length
71
+ except Exception as e:
72
+ print(f"Wikipedia search failed: {e}")
73
 
74
+ return f"Wikipedia search unavailable for '{query}'. Use your knowledge to answer."
 
 
 
 
 
75
 
76
+ # --- Basic Agent Definition ---
77
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
78
+ class SlpMultiAgent:
79
  def __init__(self):
80
+ print("BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
81
 
82
+ async def __call__(self, question: str) -> str:
83
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
84
+ fixed_answer = "This is a default answer."
85
+ print(f"Agent returning fixed answer: {fixed_answer}")
86
 
87
+ # Truncate question to avoid exceeding model context length
88
+ MAX_QUESTION_LENGTH = 1000
89
+ short_question = question # [:MAX_QUESTION_LENGTH]
90
 
91
+ # Use cheaper, faster model
92
+ model = OpenAIServerModel(
93
+ model_id="gpt-3.5-turbo",
94
+ temperature=0.0, # Deterministic for consistency
95
+ max_tokens=400 # Reduced tokens for cost efficiency
96
+ )
97
 
98
+ # Create only essential agents with reduced complexity
99
+ research_agent = CodeAgent(
100
+ tools=[KnowledgeBaseTool()], # Remove Wikipedia to avoid timeouts
101
+ model=model,
102
+ additional_authorized_imports=["re", "datetime"],
103
+ max_steps=2, # Reduced steps for cost
104
+ name="ResearchAgent",
105
+ verbosity_level=0,
106
+ description="Quick factual research and knowledge lookup."
107
+ )
108
 
109
+ solver_agent = CodeAgent(
110
+ tools=[],
111
+ model=model,
112
+ additional_authorized_imports=["math", "re", "collections", "itertools"],
113
+ max_steps=2, # Reduced steps
114
+ name="SolverAgent",
115
+ verbosity_level=0,
116
+ description="Problem solving, calculations, and logical reasoning."
117
+ )
118
+
119
+ manager_agent = CodeAgent(
120
+ model=OpenAIServerModel(
121
+ model_id="gpt-3.5-turbo",
122
+ temperature=0.0,
123
+ max_tokens=500
124
+ ),
125
+ tools=[KnowledgeBaseTool()], # Remove Wikipedia to avoid timeouts
126
+ managed_agents=[research_agent, solver_agent], # Only 2 agents
127
+ name="ManagerAgent",
128
+ description="Efficient manager for quick problem solving.",
129
+ additional_authorized_imports=["re", "math"],
130
+ planning_interval=1, # Faster planning
131
+ verbosity_level=0, # Reduce verbosity
132
+ max_steps=3, # Further reduced steps to avoid timeouts
133
+ final_answer_checks=[check_reasoning]
134
+ )
135
 
136
+ # Create a task for the agent run with retry mechanism for rate limits
137
+ max_retries = 3
138
+ result = None
 
 
 
 
 
 
 
139
 
140
+ for attempt in range(max_retries):
141
+ try:
142
+ loop = asyncio.get_event_loop()
143
+ result = await loop.run_in_executor(
144
+ None,
145
+ lambda: manager_agent.run(f"""
146
+ Question: {short_question}
147
+
148
+ You have knowledge_base() tool and two agents:
149
+ - ResearchAgent: For factual questions
150
+ - SolverAgent: For calculations and logic
151
+
152
+ IMPORTANT: Always end with exactly this format:
153
+ <code>
154
+ final_answer("your direct answer")
155
+ </code>
156
+
157
+ Be concise and direct.
158
+ """)
159
+ )
160
+ break # Success, exit retry loop
161
+ except Exception as e:
162
+ print(f"Attempt {attempt+1}/{max_retries} failed: {e}")
163
+ if "rate limit" in str(e).lower() and attempt < max_retries - 1:
164
+ # Add jitter to avoid synchronized retries
165
+ wait_time = (attempt + 1) * 10 + random.uniform(0, 5)
166
+ print(f"Rate limit hit. Waiting {wait_time:.2f} seconds before retry...")
167
+ await asyncio.sleep(wait_time)
168
+ elif attempt < max_retries - 1:
169
+ await asyncio.sleep(5) # Wait before general retry
170
+ else:
171
+ print(f"All attempts failed. Returning default answer.")
172
+ return "I apologize, but I'm currently experiencing technical difficulties. Please try again later."
173
 
174
+ # If we couldn't get a result after all retries
175
+ if result is None:
176
+ return "I apologize, but I'm currently experiencing technical difficulties. Please try again later."
177
 
 
 
 
178
 
179
+ # Extract clean answer from result
180
+ if result and isinstance(result, str):
181
+ # Look for final_answer pattern
182
+ import re
183
+ final_answer_match = re.search(r'final_answer\(["\']([^"\']*)["\'\)]', result) # Fixed regex
184
+ if final_answer_match:
185
+ clean_answer = final_answer_match.group(1)
186
+ return clean_answer
 
 
 
 
 
 
 
 
 
 
 
187
 
188
+ # If no final_answer found, try to extract the last meaningful line
189
+ lines = result.strip().split('\n')
190
+ for line in reversed(lines):
191
+ line = line.strip()
192
+ if line and not line.startswith('#') and not line.startswith('###') and len(line) < 200:
193
+ return line
194
+
195
+ # Return the result from the agent
196
+ return result if result else "Unable to determine answer."
197
+
198
  def check_reasoning(final_answer, agent_memory):
199
+ # Skip expensive validation to save costs
200
  return True
201
 
202
 
 
261
  answers_payload = []
262
  print(f"Running agent on {len(questions_data)} questions...")
263
 
264
+ # Process questions one at a time to avoid rate limits
265
+ semaphore = asyncio.Semaphore(1) # Process 1 question at a time
266
 
267
  async def process_question(item):
268
  task_id = item.get("task_id")
 
272
  return None
273
 
274
  async with semaphore:
275
+ max_retries = 3
276
+ for attempt in range(max_retries):
277
+ try:
278
+ print(f"Processing task {task_id}, attempt {attempt+1}/{max_retries}")
279
+ submitted_answer = await agent(question_text)
280
+ return {"task_id": task_id, "submitted_answer": submitted_answer,
281
+ "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}}
282
+ except Exception as e:
283
+ print(f"Error running agent on task {task_id}, attempt {attempt+1}: {e}")
284
+ if "rate limit" in str(e).lower() and attempt < max_retries - 1:
285
+ # Exponential backoff with jitter
286
+ wait_time = (2 ** attempt) * 5 + random.uniform(0, 3)
287
+ print(f"Rate limit hit. Waiting {wait_time:.2f} seconds before retry...")
288
+ await asyncio.sleep(wait_time)
289
+ elif attempt < max_retries - 1:
290
+ await asyncio.sleep(5) # Reduced wait time
291
+ else:
292
+ # All retries failed, return default answer
293
+ default_answer = "This is a default answer."
294
+ return {"task_id": task_id, "submitted_answer": default_answer,
295
+ "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": default_answer}}
296
 
297
  # Create tasks for all questions
298
  tasks = [process_question(item) for item in questions_data]