Kackle commited on
Commit
7837d72
·
verified ·
1 Parent(s): 1d0f146

trying a different approach with original idea

Browse files
Files changed (1) hide show
  1. app.py +277 -210
app.py CHANGED
@@ -8,9 +8,9 @@ import aiohttp
8
  import time
9
  import random
10
  import json
11
- import re
12
  from smolagents import FinalAnswerTool, Tool, tool, OpenAIServerModel, DuckDuckGoSearchTool, CodeAgent, VisitWebpageTool
13
- from gemini_agent import GeminiAgent # Assuming you have a GeminiAgent class defined in gemini_agent.py
14
 
15
  from dotenv import load_dotenv
16
 
@@ -21,223 +21,276 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
22
 
23
  OPENAI_TOKEN = os.getenv("OPENAI_API_KEY")
24
-
25
- # --- Custom Tools for Better Reasoning ---
26
-
27
- class TrickQuestionDetector(Tool):
28
- """Detects and handles trick questions"""
 
 
 
 
 
 
 
 
 
29
 
30
  def __init__(self):
31
  super().__init__()
32
- self.name = "trick_detector"
33
- self.description = "Analyze if a question is a trick question and provide guidance"
34
- self.inputs = {"question": {"type": "string", "description": "The question to analyze"}}
35
-
36
- def detect_trick(self, question: str) -> str:
37
- """Detect common trick question patterns"""
38
- q_lower = question.lower()
39
-
40
- # Reverse text tricks - check if question might be reversed
41
- reversed_q = question[::-1]
42
- if len(question) > 5 and any(c.isalpha() for c in question):
43
- # Simple heuristic: if reversed version has common English patterns
44
- if any(word in reversed_q.lower() for word in ['the', 'and', 'what', 'how', 'when', 'where']):
45
- return f"TRICK DETECTED: This appears to be reversed text. Decoded: '{reversed_q}'"
46
 
47
- # Word puzzles
48
- if 'rewsna' in question or 'tfel' in question:
49
- return "TRICK DETECTED: Contains reversed words. Try reading backwards."
50
-
51
- # Contradictory statements
52
- contradiction_words = ['impossible', 'never', 'always', 'none', 'all']
53
- if sum(word in q_lower for word in contradiction_words) >= 2:
54
- return "TRICK DETECTED: Contains contradictory terms. Look for logical impossibilities."
55
-
56
- # Mathematical tricks
57
- if any(phrase in q_lower for phrase in ['how many', 'total', 'sum']) and 'zero' in q_lower:
58
- return "TRICK DETECTED: Mathematical trick involving zero or impossible calculations."
59
-
60
- return "No obvious trick detected. Proceed with normal analysis."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- class StepByStepReasoner(Tool):
63
- """Breaks down complex questions into steps"""
 
 
 
64
 
65
  def __init__(self):
66
  super().__init__()
67
- self.name = "step_reasoner"
68
- self.description = "Break down complex questions into logical steps"
69
- self.inputs = {"question": {"type": "string", "description": "The question to break down"}}
 
 
 
 
 
 
 
70
 
71
- def reason_steps(self, question: str) -> str:
72
- """Break question into reasoning steps"""
73
- steps = []
74
- q_lower = question.lower()
75
-
76
- # Identify question components
77
- if any(word in q_lower for word in ['who', 'what', 'when', 'where', 'why', 'how']):
78
- steps.append("1. Identify the specific information being requested")
79
-
80
- if any(word in q_lower for word in ['between', 'from', 'to', 'during']):
81
- steps.append("2. Note the time period or range specified")
82
-
83
- if any(word in q_lower for word in ['calculate', 'count', 'how many', 'total']):
84
- steps.append("3. Determine what needs to be calculated or counted")
85
-
86
- if any(word in q_lower for word in ['wikipedia', 'article', 'featured']):
87
- steps.append("4. Consider Wikipedia-specific processes and history")
88
-
89
- if any(word in q_lower for word in ['only', 'single', 'one', 'unique']):
90
- steps.append("5. Focus on finding the single/unique answer requested")
91
-
92
- steps.append("6. Verify the answer makes logical sense")
93
-
94
- return "REASONING STEPS:\n" + "\n".join(steps)
95
-
96
- class FactChecker(Tool):
97
- """Validates factual claims and provides confidence levels"""
98
 
99
  def __init__(self):
100
  super().__init__()
101
- self.name = "fact_checker"
102
- self.description = "Check factual accuracy and provide confidence assessment"
103
- self.inputs = {"claim": {"type": "string", "description": "The claim to fact-check"}}
104
-
105
- def check_facts(self, claim: str) -> str:
106
- """Assess factual accuracy of a claim"""
107
- confidence_indicators = {
108
- 'high': ['wikipedia', 'well-known', 'documented', 'official', 'verified'],
109
- 'medium': ['likely', 'probably', 'appears', 'seems', 'reported'],
110
- 'low': ['unclear', 'uncertain', 'possibly', 'might', 'could be']
111
- }
112
 
113
- claim_lower = claim.lower()
114
-
115
- # Check for confidence indicators
116
- high_conf = sum(1 for word in confidence_indicators['high'] if word in claim_lower)
117
- medium_conf = sum(1 for word in confidence_indicators['medium'] if word in claim_lower)
118
- low_conf = sum(1 for word in confidence_indicators['low'] if word in claim_lower)
 
 
 
 
 
 
119
 
120
- if high_conf > medium_conf and high_conf > low_conf:
121
- return f"CONFIDENCE: HIGH - Claim appears to be well-documented: '{claim}'"
122
- elif low_conf > high_conf:
123
- return f"CONFIDENCE: LOW - Claim contains uncertainty markers: '{claim}'"
 
 
 
 
 
 
124
  else:
125
- return f"CONFIDENCE: MEDIUM - Standard factual claim: '{claim}'"
126
-
127
- class AnswerValidator(Tool):
128
- """Validates if an answer makes sense for the question"""
129
 
130
- def __init__(self):
131
- super().__init__()
132
- self.name = "answer_validator"
133
- self.description = "Validate if an answer is reasonable for the given question"
134
- self.inputs = {"question": {"type": "string", "description": "The question"}, "answer": {"type": "string", "description": "The answer to validate"}}
135
-
136
- def validate_answer(self, question: str, answer: str) -> str:
137
- """Check if answer is reasonable for the question"""
138
- q_lower = question.lower()
139
- a_lower = answer.lower()
140
-
141
- # Check for question-answer type matching
142
- if 'who' in q_lower and not any(indicator in a_lower for indicator in ['person', 'user', 'editor', 'author', 'name']):
143
- return "WARNING: 'Who' question but answer doesn't seem to identify a person"
144
-
145
- if 'when' in q_lower and not any(indicator in a_lower for indicator in ['year', 'date', 'time', '20', '19']):
146
- return "WARNING: 'When' question but answer doesn't contain time information"
147
-
148
- if 'how many' in q_lower and not any(char.isdigit() for char in answer):
149
- return "WARNING: 'How many' question but answer contains no numbers"
150
-
151
- if len(answer.strip()) < 3:
152
- return "WARNING: Answer seems too short"
153
-
154
- if len(answer.strip()) > 200:
155
- return "WARNING: Answer seems too long - may need to be more concise"
156
-
157
- return "VALIDATION: Answer format appears appropriate for question type"
158
 
159
- # --- Enhanced Agent with Tools ---
 
160
  class SlpMultiAgent:
161
  def __init__(self):
162
- print("Enhanced Agent initialized with reasoning tools.")
163
- self.trick_detector = TrickQuestionDetector()
164
- self.step_reasoner = StepByStepReasoner()
165
- self.fact_checker = FactChecker()
166
- self.answer_validator = AnswerValidator()
167
 
168
  async def __call__(self, question: str) -> str:
169
  print(f"Agent received question (first 50 chars): {question[:50]}...")
 
 
170
 
171
- # Step 1: Check for tricks
172
- trick_analysis = self.trick_detector.detect_trick(question)
173
- print(f"Trick analysis: {trick_analysis}")
174
 
175
- # Step 2: Break down reasoning steps
176
- reasoning_steps = self.step_reasoner.reason_steps(question)
177
- print(f"Reasoning steps: {reasoning_steps}")
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- # Step 3: Enhanced model call with tool insights
180
- model = OpenAIServerModel(
181
- model_id="gpt-4o-mini",
182
- temperature=0.1,
183
- max_tokens=1000
 
 
 
 
184
  )
185
 
186
- try:
187
- enhanced_prompt = f"""You are an expert problem solver. Analyze this question carefully:
188
-
189
- QUESTION: {question}
190
-
191
- TRICK ANALYSIS: {trick_analysis}
192
-
193
- {reasoning_steps}
194
-
195
- Instructions:
196
- 1. If a trick was detected, handle it appropriately
197
- 2. Follow the reasoning steps systematically
198
- 3. Think through each step carefully
199
- 4. Provide a clear, direct answer
200
- 5. If unsure, state your uncertainty clearly
201
-
202
- Be precise and thorough in your analysis."""
203
 
204
- messages = [
205
- {
206
- "role": "system",
207
- "content": "You are an expert at solving complex and trick questions. Always think step by step and be very careful about the exact wording of questions."
208
- },
209
- {
210
- "role": "user",
211
- "content": enhanced_prompt
212
- }
213
- ]
214
-
215
- result = model(messages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- if result:
218
- # Step 4: Validate the answer
219
- validation = self.answer_validator.validate_answer(question, result)
220
- print(f"Answer validation: {validation}")
221
-
222
- # Clean up the result
223
- lines = result.strip().split('\n')
224
- for line in reversed(lines):
225
- line = line.strip()
226
- if line and len(line) > 5 and not line.startswith(('Step', 'Analysis', 'TRICK', 'REASONING')):
227
- # Remove common prefixes
228
- line = re.sub(r'^(Answer:|Final answer:|The answer is:?)\s*', '', line, flags=re.IGNORECASE)
229
- if line:
230
- return line
231
-
232
- return result
233
- else:
234
- return "I don't have enough information to answer this question accurately."
235
-
236
- except Exception as e:
237
- print(f"Model call failed: {e}")
238
- return "I apologize, but I'm currently experiencing technical difficulties."
239
-
240
  def check_reasoning(final_answer, agent_memory):
 
241
  return True
242
 
243
 
@@ -268,7 +321,7 @@ async def run_and_submit_all(profile):
268
 
269
  # 1. Instantiate Agent ( modify this part to create your agent)
270
  try:
271
- agent = GeminiAgent()
272
  except Exception as e:
273
  print(f"Error instantiating agent: {e}")
274
  return f"Error initializing agent: {e}", None
@@ -302,8 +355,8 @@ async def run_and_submit_all(profile):
302
  answers_payload = []
303
  print(f"Running agent on {len(questions_data)} questions...")
304
 
305
- # Process questions with controlled concurrency
306
- semaphore = asyncio.Semaphore(2) # Process 2 questions at a time
307
 
308
  async def process_question(item):
309
  task_id = item.get("task_id")
@@ -313,16 +366,27 @@ async def run_and_submit_all(profile):
313
  return None
314
 
315
  async with semaphore:
316
- try:
317
- print(f"Processing task {task_id}")
318
- submitted_answer = await agent(question_text)
319
- return {"task_id": task_id, "submitted_answer": submitted_answer,
320
- "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}}
321
- except Exception as e:
322
- print(f"Error running agent on task {task_id}: {e}")
323
- default_answer = "I don't have enough information to answer this question accurately."
324
- return {"task_id": task_id, "submitted_answer": default_answer,
325
- "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": default_answer}}
 
 
 
 
 
 
 
 
 
 
 
326
 
327
  # Create tasks for all questions
328
  tasks = [process_question(item) for item in questions_data]
@@ -394,17 +458,20 @@ async def run_and_submit_all(profile):
394
 
395
  # --- Build Gradio Interface using Blocks ---
396
  with gr.Blocks() as demo:
397
- gr.Markdown("# Basic Agent Evaluation Runner")
398
  gr.Markdown(
399
  """
 
 
 
 
 
400
  **Instructions:**
401
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
402
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
403
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
404
  ---
405
- **Disclaimers:**
406
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
407
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
408
  """
409
  )
410
 
@@ -455,5 +522,5 @@ if __name__ == "__main__":
455
 
456
  print("-"*(60 + len(" App Starting ")) + "\n")
457
 
458
- print("Launching Gradio Interface for Basic Agent Evaluation...")
459
  demo.launch(debug=True, share=False)
 
8
  import time
9
  import random
10
  import json
11
+ import google.generativeai as genai
12
  from smolagents import FinalAnswerTool, Tool, tool, OpenAIServerModel, DuckDuckGoSearchTool, CodeAgent, VisitWebpageTool
13
+
14
 
15
  from dotenv import load_dotenv
16
 
 
21
 
22
 
23
  OPENAI_TOKEN = os.getenv("OPENAI_API_KEY")
24
+ GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
25
+ GOOGLE_SEARCH_API_KEY = os.getenv('GOOGLE_SEARCH_API_KEY')
26
+ GOOGLE_SEARCH_CX = os.getenv('GOOGLE_SEARCH_CX')
27
+
28
+ # Configure Gemini
29
+ if GOOGLE_API_KEY:
30
+ genai.configure(api_key=GOOGLE_API_KEY)
31
+
32
+ # --- Custom Tools ---
33
+ class GoogleSearchTool(Tool):
34
+ name = "google_search"
35
+ description = "Search Google for current information and facts"
36
+ inputs = {"query": {"type": "string", "description": "The search query for Google"}}
37
+ output_type = "string"
38
 
39
  def __init__(self):
40
  super().__init__()
41
+ self.is_initialized = True
42
+ self.google_search_api_key = GOOGLE_SEARCH_API_KEY
43
+ self.google_search_cx = GOOGLE_SEARCH_CX
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ def forward(self, query: str) -> str:
46
+ """Perform a Google search using the Custom Search API"""
47
+ if not self.google_search_api_key or not self.google_search_cx:
48
+ return f"Google Search API not configured. Query was: {query}"
49
+
50
+ try:
51
+ url = "https://www.googleapis.com/customsearch/v1"
52
+ params = {
53
+ 'key': self.google_search_api_key,
54
+ 'cx': self.google_search_cx,
55
+ 'q': query,
56
+ 'num': 5
57
+ }
58
+
59
+ response = requests.get(url, params=params, timeout=10)
60
+ if response.status_code != 200:
61
+ return f"Google Search failed with status {response.status_code}"
62
+
63
+ results = response.json()
64
+ if 'items' not in results:
65
+ return f"No search results found for: {query}"
66
+
67
+ # Format search results
68
+ formatted_results = f"Google search results for '{query}':\n\n"
69
+ for item in results['items']:
70
+ title = item.get('title', 'No title')
71
+ snippet = item.get('snippet', 'No description')
72
+ formatted_results += f"• {title}: {snippet}\n"
73
+
74
+ return formatted_results[:1000] # Limit length
75
+
76
+ except Exception as e:
77
+ return f"Google Search error for '{query}': {str(e)}"
78
 
79
+ class KnowledgeBaseTool(Tool):
80
+ name = "knowledge_base"
81
+ description = "Access structured knowledge for common topics"
82
+ inputs = {"topic": {"type": "string", "description": "The topic to look up"}}
83
+ output_type = "string"
84
 
85
  def __init__(self):
86
  super().__init__()
87
+ self.is_initialized = True
88
+ # Common knowledge base
89
+ self.knowledge = {
90
+ "olympics": "Olympic Games data: Countries, athletes, years, sports",
91
+ "countries": "Country codes: ISO, IOC, FIFA codes and country information",
92
+ "sports": "Sports history, rules, famous athletes and events",
93
+ "science": "Scientific facts, formulas, discoveries, and researchers",
94
+ "history": "Historical events, dates, people, and places",
95
+ "geography": "Countries, capitals, populations, and geographical features"
96
+ }
97
 
98
+ def forward(self, topic: str) -> str:
99
+ topic_lower = topic.lower()
100
+ for key, info in self.knowledge.items():
101
+ if key in topic_lower:
102
+ return f"Knowledge base: {info}. Use this context to answer questions about {topic}."
103
+ return f"No specific knowledge base entry for '{topic}'. Use general reasoning."
104
+
105
+ class WikipediaSearchTool(Tool):
106
+ name = "wikipedia_search"
107
+ description = "Search Wikipedia for information"
108
+ inputs = {"query": {"type": "string", "description": "The search query for Wikipedia"}}
109
+ output_type = "string"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  def __init__(self):
112
  super().__init__()
113
+ self.is_initialized = True
 
 
 
 
 
 
 
 
 
 
114
 
115
+ def forward(self, query: str) -> str:
116
+ """Search Wikipedia with simple fallback."""
117
+ try:
118
+ import requests
119
+ wiki_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + query.replace(" ", "_")
120
+ response = requests.get(wiki_url, timeout=2)
121
+ if response.status_code == 200:
122
+ data = response.json()
123
+ if 'extract' in data and data['extract']:
124
+ return f"Wikipedia: {data['extract'][:500]}" # Limit length
125
+ except Exception as e:
126
+ print(f"Wikipedia search failed: {e}")
127
 
128
+ return f"Wikipedia search unavailable for '{query}'. Use your knowledge to answer."
129
+
130
+ # --- Gemini Model Wrapper ---
131
+ class GeminiModel:
132
+ def __init__(self, model_name="gemini-2.0-flash", temperature=0.0, max_tokens=500):
133
+ self.model_name = model_name
134
+ self.temperature = temperature
135
+ self.max_tokens = max_tokens
136
+ if GOOGLE_API_KEY:
137
+ self.model = genai.GenerativeModel(model_name)
138
  else:
139
+ self.model = None
140
+ print("Warning: Google API key not found, falling back to OpenAI")
 
 
141
 
142
+ def generate_content(self, prompt):
143
+ if self.model:
144
+ try:
145
+ response = self.model.generate_content(
146
+ prompt,
147
+ generation_config=genai.types.GenerationConfig(
148
+ max_output_tokens=self.max_tokens,
149
+ temperature=self.temperature
150
+ )
151
+ )
152
+ return response.text
153
+ except Exception as e:
154
+ print(f"Gemini API error: {e}")
155
+ return f"Error generating response: {e}"
156
+ else:
157
+ return "Gemini model not available"
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ # --- Basic Agent Definition ---
160
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
161
  class SlpMultiAgent:
162
  def __init__(self):
163
+ print("BasicAgent initialized with Gemini and Google Search.")
 
 
 
 
164
 
165
  async def __call__(self, question: str) -> str:
166
  print(f"Agent received question (first 50 chars): {question[:50]}...")
167
+ fixed_answer = "This is a default answer."
168
+ print(f"Agent returning fixed answer: {fixed_answer}")
169
 
170
+ # Truncate question to avoid exceeding model context length
171
+ MAX_QUESTION_LENGTH = 1000
172
+ short_question = question # [:MAX_QUESTION_LENGTH]
173
 
174
+ # Use Gemini as primary model, fallback to OpenAI
175
+ if GOOGLE_API_KEY:
176
+ model = GeminiModel(
177
+ model_name="gemini-2.0-flash",
178
+ temperature=0.0,
179
+ max_tokens=400
180
+ )
181
+ print("Using Gemini model")
182
+ else:
183
+ model = OpenAIServerModel(
184
+ model_id="gpt-3.5-turbo",
185
+ temperature=0.0,
186
+ max_tokens=400
187
+ )
188
+ print("Using OpenAI model (Gemini not available)")
189
 
190
+ # Create only essential agents with Google Search as first option
191
+ research_agent = CodeAgent(
192
+ tools=[GoogleSearchTool(), KnowledgeBaseTool()], # Google Search first
193
+ model=model if not isinstance(model, GeminiModel) else OpenAIServerModel(model_id="gpt-3.5-turbo", temperature=0.0, max_tokens=400),
194
+ additional_authorized_imports=["re", "datetime"],
195
+ max_steps=2,
196
+ name="ResearchAgent",
197
+ verbosity_level=0,
198
+ description="Research agent with Google Search and knowledge lookup."
199
  )
200
 
201
+ solver_agent = CodeAgent(
202
+ tools=[GoogleSearchTool()], # Add Google Search to solver too
203
+ model=model if not isinstance(model, GeminiModel) else OpenAIServerModel(model_id="gpt-3.5-turbo", temperature=0.0, max_tokens=400),
204
+ additional_authorized_imports=["math", "re", "collections", "itertools"],
205
+ max_steps=2,
206
+ name="SolverAgent",
207
+ verbosity_level=0,
208
+ description="Problem solving with Google Search capability."
209
+ )
 
 
 
 
 
 
 
 
210
 
211
+ manager_agent = CodeAgent(
212
+ model=OpenAIServerModel(
213
+ model_id="gpt-3.5-turbo",
214
+ temperature=0.0,
215
+ max_tokens=500
216
+ ),
217
+ tools=[GoogleSearchTool(), KnowledgeBaseTool()], # Google Search first
218
+ managed_agents=[research_agent, solver_agent],
219
+ name="ManagerAgent",
220
+ description="Manager with Google Search and agent coordination.",
221
+ additional_authorized_imports=["re", "math"],
222
+ planning_interval=1,
223
+ verbosity_level=0,
224
+ max_steps=3,
225
+ final_answer_checks=[check_reasoning]
226
+ )
227
+
228
+ # Create a task for the agent run with retry mechanism for rate limits
229
+ max_retries = 3
230
+ result = None
231
+
232
+ for attempt in range(max_retries):
233
+ try:
234
+ loop = asyncio.get_event_loop()
235
+ result = await loop.run_in_executor(
236
+ None,
237
+ lambda: manager_agent.run(f"""
238
+ Question: {short_question}
239
+
240
+ You have google_search() as your PRIMARY tool, plus knowledge_base() and two agents:
241
+ - ResearchAgent: For factual questions (has Google Search)
242
+ - SolverAgent: For calculations and logic (has Google Search)
243
+
244
+ ALWAYS try google_search() FIRST for factual questions before using other tools.
245
+
246
+ IMPORTANT: Always end with exactly this format:
247
+ <code>
248
+ final_answer("your direct answer")
249
+ </code>
250
+
251
+ Be concise and direct.
252
+ """)
253
+ )
254
+ break # Success, exit retry loop
255
+ except Exception as e:
256
+ print(f"Attempt {attempt+1}/{max_retries} failed: {e}")
257
+ if "rate limit" in str(e).lower() and attempt < max_retries - 1:
258
+ # Add jitter to avoid synchronized retries
259
+ wait_time = (attempt + 1) * 10 + random.uniform(0, 5)
260
+ print(f"Rate limit hit. Waiting {wait_time:.2f} seconds before retry...")
261
+ await asyncio.sleep(wait_time)
262
+ elif attempt < max_retries - 1:
263
+ await asyncio.sleep(5) # Wait before general retry
264
+ else:
265
+ print(f"All attempts failed. Returning default answer.")
266
+ return "I apologize, but I'm currently experiencing technical difficulties. Please try again later."
267
+
268
+ # If we couldn't get a result after all retries
269
+ if result is None:
270
+ return "I apologize, but I'm currently experiencing technical difficulties. Please try again later."
271
+
272
+
273
+ # Extract clean answer from result
274
+ if result and isinstance(result, str):
275
+ # Look for final_answer pattern
276
+ import re
277
+ final_answer_match = re.search(r'final_answer\(["\']([^"\']*)["\'\)]', result) # Fixed regex
278
+ if final_answer_match:
279
+ clean_answer = final_answer_match.group(1)
280
+ return clean_answer
281
 
282
+ # If no final_answer found, try to extract the last meaningful line
283
+ lines = result.strip().split('\n')
284
+ for line in reversed(lines):
285
+ line = line.strip()
286
+ if line and not line.startswith('#') and not line.startswith('###') and len(line) < 200:
287
+ return line
288
+
289
+ # Return the result from the agent
290
+ return result if result else "Unable to determine answer."
291
+
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  def check_reasoning(final_answer, agent_memory):
293
+ # Skip expensive validation to save costs
294
  return True
295
 
296
 
 
321
 
322
  # 1. Instantiate Agent ( modify this part to create your agent)
323
  try:
324
+ agent = SlpMultiAgent()
325
  except Exception as e:
326
  print(f"Error instantiating agent: {e}")
327
  return f"Error initializing agent: {e}", None
 
355
  answers_payload = []
356
  print(f"Running agent on {len(questions_data)} questions...")
357
 
358
+ # Process questions one at a time to avoid rate limits
359
+ semaphore = asyncio.Semaphore(1) # Process 1 question at a time
360
 
361
  async def process_question(item):
362
  task_id = item.get("task_id")
 
366
  return None
367
 
368
  async with semaphore:
369
+ max_retries = 3
370
+ for attempt in range(max_retries):
371
+ try:
372
+ print(f"Processing task {task_id}, attempt {attempt+1}/{max_retries}")
373
+ submitted_answer = await agent(question_text)
374
+ return {"task_id": task_id, "submitted_answer": submitted_answer,
375
+ "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}}
376
+ except Exception as e:
377
+ print(f"Error running agent on task {task_id}, attempt {attempt+1}: {e}")
378
+ if "rate limit" in str(e).lower() and attempt < max_retries - 1:
379
+ # Exponential backoff with jitter
380
+ wait_time = (2 ** attempt) * 5 + random.uniform(0, 3)
381
+ print(f"Rate limit hit. Waiting {wait_time:.2f} seconds before retry...")
382
+ await asyncio.sleep(wait_time)
383
+ elif attempt < max_retries - 1:
384
+ await asyncio.sleep(5) # Reduced wait time
385
+ else:
386
+ # All retries failed, return default answer
387
+ default_answer = "This is a default answer."
388
+ return {"task_id": task_id, "submitted_answer": default_answer,
389
+ "log": {"Task ID": task_id, "Question": question_text, "Submitted Answer": default_answer}}
390
 
391
  # Create tasks for all questions
392
  tasks = [process_question(item) for item in questions_data]
 
458
 
459
  # --- Build Gradio Interface using Blocks ---
460
  with gr.Blocks() as demo:
461
+ gr.Markdown("# Enhanced Agent with Google Search & Gemini")
462
  gr.Markdown(
463
  """
464
+ **Features:**
465
+ - **Google Search Integration**: Primary tool for factual information
466
+ - **Gemini 2.0 Flash**: Advanced AI model for reasoning
467
+ - **Multi-Agent Architecture**: Research and Solver agents with search capabilities
468
+
469
  **Instructions:**
470
+ 1. Set up your environment variables: GOOGLE_API_KEY, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_CX
471
+ 2. Log in to your Hugging Face account using the button below
472
+ 3. Click 'Run Evaluation & Submit All Answers' to start the enhanced agent
473
  ---
474
+ **Note:** The agent will prioritize Google Search for factual questions, providing more accurate and current information.
 
 
475
  """
476
  )
477
 
 
522
 
523
  print("-"*(60 + len(" App Starting ")) + "\n")
524
 
525
+ print("Launching Gradio Interface for Enhanced Agent with Google Search & Gemini...")
526
  demo.launch(debug=True, share=False)