Kackle commited on
Commit
f152db2
·
verified ·
1 Parent(s): 3ffe515

google search with disco added

Browse files
Files changed (1) hide show
  1. gemini_agent.py +208 -13
gemini_agent.py CHANGED
@@ -139,24 +139,35 @@ class GeminiAgent:
139
  """Handle questions about actors, TV shows, and movies with enhanced search"""
140
  print(f"Processing actor/show question: {question[:50]}...")
141
 
142
- # Always try both Wikipedia and DuckDuckGo for these questions
 
143
  wiki_context = ""
144
  ddg_context = ""
145
 
 
 
 
 
 
 
146
  try:
147
  wiki_context = self.wiki_tool.run(question)
148
  print("Wikipedia search completed")
149
  except Exception as e:
150
  print(f"Wikipedia tool failed: {e}")
151
 
152
- try:
153
- ddg_context = self.ddg_tool.run(question)
154
- print("DuckDuckGo search completed")
155
- except Exception as e:
156
- print(f"DuckDuckGo tool failed: {e}")
 
 
157
 
158
  # Combine contexts if available
159
  combined_context = ""
 
 
160
  if wiki_context and not any(x in wiki_context.lower() for x in ["not found", "no results", "does not contain"]):
161
  combined_context += f"Wikipedia context: {wiki_context}\n\n"
162
  if ddg_context and not any(x in ddg_context.lower() for x in ["not found", "no results", "does not contain"]):
@@ -199,6 +210,174 @@ If the answer is a person's name, provide ONLY their first name as requested."""
199
 
200
  return answer
201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  async def _handle_video_question(self, question: str) -> str:
203
  """Handle questions that require video analysis"""
204
  # Extract YouTube URL
@@ -322,7 +501,8 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
322
  "from wikipedia" in q or
323
  "search the web" in q or
324
  "duckduckgo" in q or
325
- "web search" in q
 
326
  )
327
 
328
  def is_factual_question(question):
@@ -333,16 +513,25 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
333
  "what role", "what character", "what part",
334
  "which actor", "which actress",
335
  "in the movie", "in the show", "in the series", "in the film",
336
- "version of"
 
337
  ]
338
  return any(pattern in q for pattern in factual_patterns)
339
 
340
  wiki_context = ""
 
341
  ddg_context = ""
342
 
343
- # Use retrieval for explicit web/Wikipedia questions OR factual questions about actors/shows
344
  if is_explicit_retrieval_question(question) or is_factual_question(question):
345
- # For factual questions, always try both Wikipedia and DuckDuckGo
 
 
 
 
 
 
 
346
  if is_factual_question(question) or "wikipedia" in question.lower():
347
  try:
348
  wiki_context = self.wiki_tool.run(question)
@@ -350,7 +539,9 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
350
  except Exception as e:
351
  print(f"Wikipedia tool failed: {e}")
352
 
353
- if is_factual_question(question) or "duckduckgo" in question.lower() or "web search" in question.lower():
 
 
354
  try:
355
  ddg_context = self.ddg_tool.run(question)
356
  print(f"DuckDuckGo search completed for: {question[:50]}...")
@@ -397,9 +588,11 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
397
  def is_good_context(context):
398
  return context and not any(x in context.lower() for x in ["not found", "no results", "does not contain information"])
399
 
400
- # For factual questions about actors/shows, try to use both Wikipedia and web search results
401
  if is_factual_question(question):
402
  combined_context = ""
 
 
403
  if wiki_context and is_good_context(wiki_context):
404
  combined_context += f"Wikipedia context: {wiki_context}\n\n"
405
  if ddg_context and is_good_context(ddg_context):
@@ -409,7 +602,9 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
409
  prompt = f"Use the following context to answer the question accurately. Focus on finding the exact name or information requested:\n{combined_context}\n{prompt}"
410
  else:
411
  # For non-factual questions, use the first good context available
412
- if wiki_context and is_good_context(wiki_context):
 
 
413
  prompt = f"Use the following Wikipedia context to answer the question:\n{wiki_context}\n\n{prompt}"
414
  elif ddg_context and is_good_context(ddg_context):
415
  prompt = f"Use the following web search context to answer the question:\n{ddg_context}\n\n{prompt}"
 
139
  """Handle questions about actors, TV shows, and movies with enhanced search"""
140
  print(f"Processing actor/show question: {question[:50]}...")
141
 
142
+ # Try Google Search first, then Wikipedia and DuckDuckGo
143
+ google_context = ""
144
  wiki_context = ""
145
  ddg_context = ""
146
 
147
+ try:
148
+ google_context = await self._google_search(question, num_results=7)
149
+ print("Google search completed")
150
+ except Exception as e:
151
+ print(f"Google search failed: {e}")
152
+
153
  try:
154
  wiki_context = self.wiki_tool.run(question)
155
  print("Wikipedia search completed")
156
  except Exception as e:
157
  print(f"Wikipedia tool failed: {e}")
158
 
159
+ # Only use DuckDuckGo if Google search failed
160
+ if not google_context:
161
+ try:
162
+ ddg_context = self.ddg_tool.run(question)
163
+ print("DuckDuckGo search completed")
164
+ except Exception as e:
165
+ print(f"DuckDuckGo tool failed: {e}")
166
 
167
  # Combine contexts if available
168
  combined_context = ""
169
+ if google_context and not any(x in google_context.lower() for x in ["not found", "no results", "does not contain"]):
170
+ combined_context += f"Google search context: {google_context}\n\n"
171
  if wiki_context and not any(x in wiki_context.lower() for x in ["not found", "no results", "does not contain"]):
172
  combined_context += f"Wikipedia context: {wiki_context}\n\n"
173
  if ddg_context and not any(x in ddg_context.lower() for x in ["not found", "no results", "does not contain"]):
 
210
 
211
  return answer
212
 
213
+ async def _handle_discography_question(self, question: str) -> str:
214
+ """Handle questions about music discography with enhanced search capabilities"""
215
+ print(f"Processing discography question: {question[:50]}...")
216
+
217
+ # Extract key information from the question
218
+ artist_name = ""
219
+ start_year = None
220
+ end_year = None
221
+ album_type = "studio albums" # Default to studio albums
222
+
223
+ # Try to extract artist name
224
+ artist_patterns = [
225
+ r'by\s+([\w\s]+)\s+between', # "by Mercedes Sosa between"
226
+ r'([\w\s]+)\s+albums', # "Mercedes Sosa albums"
227
+ r'([\w\s]+)\s+discography', # "Mercedes Sosa discography"
228
+ r'([\w\s]+)\s+between\s+\d{4}' # "Mercedes Sosa between 2000"
229
+ ]
230
+
231
+ for pattern in artist_patterns:
232
+ match = re.search(pattern, question, re.IGNORECASE)
233
+ if match:
234
+ artist_name = match.group(1).strip()
235
+ break
236
+
237
+ # Extract date range
238
+ date_patterns = [
239
+ r'between\s+(\d{4})\s+and\s+(\d{4})', # "between 2000 and 2009"
240
+ r'from\s+(\d{4})\s+to\s+(\d{4})', # "from 2000 to 2009"
241
+ r'(\d{4})\s*[-–]\s*(\d{4})', # "2000-2009"
242
+ r'(\d{4})\s+to\s+(\d{4})' # "2000 to 2009"
243
+ ]
244
+
245
+ for pattern in date_patterns:
246
+ match = re.search(pattern, question, re.IGNORECASE)
247
+ if match:
248
+ start_year = int(match.group(1))
249
+ end_year = int(match.group(2))
250
+ break
251
+
252
+ # Check for included year
253
+ if not end_year:
254
+ included_match = re.search(r'(\d{4})\s*\(included\)', question, re.IGNORECASE)
255
+ if included_match:
256
+ end_year = int(included_match.group(1))
257
+
258
+ # Determine album type
259
+ if 'studio album' in question.lower():
260
+ album_type = "studio albums"
261
+ elif 'live album' in question.lower():
262
+ album_type = "live albums"
263
+ elif 'compilation' in question.lower():
264
+ album_type = "compilation albums"
265
+
266
+ # Construct specialized search queries
267
+ search_queries = []
268
+ if artist_name:
269
+ # Create multiple search queries for better coverage
270
+ if start_year and end_year:
271
+ search_queries.append(f"{artist_name} {album_type} between {start_year} and {end_year} wikipedia")
272
+ search_queries.append(f"{artist_name} discography {start_year}-{end_year} wikipedia")
273
+ search_queries.append(f"{artist_name} complete list of {album_type} {start_year}-{end_year}")
274
+ else:
275
+ search_queries.append(f"{artist_name} complete discography wikipedia")
276
+ search_queries.append(f"{artist_name} {album_type} list wikipedia")
277
+ else:
278
+ # If we couldn't extract artist name, use the original question
279
+ search_queries.append(question + " wikipedia")
280
+
281
+ # Gather context from multiple sources
282
+ wiki_context = ""
283
+ google_context = ""
284
+ ddg_context = ""
285
+
286
+ # Try Google Search first with multiple queries for better coverage
287
+ for i, query in enumerate(search_queries[:2]): # Use first two queries for Google
288
+ try:
289
+ result = await self._google_search(query, num_results=7)
290
+ if result and not google_context:
291
+ google_context = result
292
+ print(f"Google search completed for query {i+1}")
293
+ except Exception as e:
294
+ print(f"Google search failed for query {i+1}: {e}")
295
+
296
+ # Try Wikipedia
297
+ try:
298
+ # Use the first query for Wikipedia
299
+ wiki_context = self.wiki_tool.run(search_queries[0])
300
+ print("Wikipedia search completed")
301
+ except Exception as e:
302
+ print(f"Wikipedia tool failed: {e}")
303
+
304
+ # Fall back to DuckDuckGo if needed
305
+ if not google_context:
306
+ try:
307
+ # Use a different query for DuckDuckGo
308
+ query_idx = min(2, len(search_queries)-1)
309
+ ddg_context = self.ddg_tool.run(search_queries[query_idx])
310
+ print("DuckDuckGo search completed")
311
+ except Exception as e:
312
+ print(f"DuckDuckGo tool failed: {e}")
313
+
314
+ # Combine contexts if available
315
+ combined_context = ""
316
+ if google_context and not any(x in google_context.lower() for x in ["not found", "no results", "does not contain"]):
317
+ combined_context += f"Google search context: {google_context}\n\n"
318
+ if wiki_context and not any(x in wiki_context.lower() for x in ["not found", "no results", "does not contain"]):
319
+ combined_context += f"Wikipedia context: {wiki_context}\n\n"
320
+ if ddg_context and not any(x in ddg_context.lower() for x in ["not found", "no results", "does not contain"]):
321
+ combined_context += f"Web search context: {ddg_context}\n\n"
322
+
323
+ # Create a specialized prompt for discography questions
324
+ prompt = f"""Based on the following context, answer this question about music discography:
325
+
326
+ {combined_context}
327
+
328
+ Question: {question}
329
+
330
+ """
331
+
332
+ # Add specific instructions for counting albums in a date range
333
+ if "how many" in question.lower() and "album" in question.lower() and start_year and end_year:
334
+ prompt += f"""Count ONLY the {album_type} released between {start_year} and {end_year}, inclusive of both years.
335
+
336
+ Provide ONLY the numeric count as your answer, with no additional text.
337
+
338
+ Make sure to count each album only once, and only count {album_type} unless specifically asked for other types.
339
+
340
+ If you find a list of albums with years, list them here with their release years before giving the final count:
341
+ [Album name] (year)
342
+ [Album name] (year)
343
+ ...
344
+ Final count: [number]"""
345
+ else:
346
+ prompt += "Provide ONLY the specific information requested. No explanations or additional context."
347
+
348
+ await self._rate_limit()
349
+ response = self.model.generate_content(
350
+ prompt,
351
+ generation_config=genai.types.GenerationConfig(
352
+ max_output_tokens=500, # Increased to allow for album listing
353
+ temperature=0.0
354
+ )
355
+ )
356
+ answer = response.text.strip()
357
+
358
+ # Extract just the count if that's what was requested
359
+ if "how many" in question.lower():
360
+ # Look for "Final count: X" pattern first
361
+ final_count_match = re.search(r'Final count:\s*(\d+)', answer)
362
+ if final_count_match:
363
+ return final_count_match.group(1)
364
+
365
+ # Otherwise try to extract any number
366
+ number_match = re.search(r'\b(\d+)\b', answer)
367
+ if number_match:
368
+ return number_match.group(1)
369
+
370
+ # Clean up the answer to extract just the information
371
+ # Remove common prefixes
372
+ prefixes = ['The answer is', 'Based on', 'According to', 'There were']
373
+ for prefix in prefixes:
374
+ if answer.lower().startswith(prefix.lower()):
375
+ answer = answer[len(prefix):].strip()
376
+ if answer.startswith(','):
377
+ answer = answer[1:].strip()
378
+
379
+ return answer
380
+
381
  async def _handle_video_question(self, question: str) -> str:
382
  """Handle questions that require video analysis"""
383
  # Extract YouTube URL
 
501
  "from wikipedia" in q or
502
  "search the web" in q or
503
  "duckduckgo" in q or
504
+ "web search" in q or
505
+ "google" in q
506
  )
507
 
508
  def is_factual_question(question):
 
513
  "what role", "what character", "what part",
514
  "which actor", "which actress",
515
  "in the movie", "in the show", "in the series", "in the film",
516
+ "version of", "how many", "when did", "where was",
517
+ "published", "released", "recorded", "between", "from", "to"
518
  ]
519
  return any(pattern in q for pattern in factual_patterns)
520
 
521
  wiki_context = ""
522
+ google_context = ""
523
  ddg_context = ""
524
 
525
+ # Use retrieval for explicit web/Wikipedia questions OR factual questions
526
  if is_explicit_retrieval_question(question) or is_factual_question(question):
527
+ # Try Google Search first for all factual questions
528
+ try:
529
+ google_context = await self._google_search(question, num_results=7)
530
+ print(f"Google search completed for: {question[:50]}...")
531
+ except Exception as e:
532
+ print(f"Google search failed: {e}")
533
+
534
+ # For factual questions, also try Wikipedia
535
  if is_factual_question(question) or "wikipedia" in question.lower():
536
  try:
537
  wiki_context = self.wiki_tool.run(question)
 
539
  except Exception as e:
540
  print(f"Wikipedia tool failed: {e}")
541
 
542
+ # Use DuckDuckGo as a fallback or additional source
543
+ if (not google_context or is_factual_question(question)) and \
544
+ ("duckduckgo" in question.lower() or "web search" in question.lower()):
545
  try:
546
  ddg_context = self.ddg_tool.run(question)
547
  print(f"DuckDuckGo search completed for: {question[:50]}...")
 
588
  def is_good_context(context):
589
  return context and not any(x in context.lower() for x in ["not found", "no results", "does not contain information"])
590
 
591
+ # For factual questions, try to use all available search results
592
  if is_factual_question(question):
593
  combined_context = ""
594
+ if google_context and is_good_context(google_context):
595
+ combined_context += f"Google search context: {google_context}\n\n"
596
  if wiki_context and is_good_context(wiki_context):
597
  combined_context += f"Wikipedia context: {wiki_context}\n\n"
598
  if ddg_context and is_good_context(ddg_context):
 
602
  prompt = f"Use the following context to answer the question accurately. Focus on finding the exact name or information requested:\n{combined_context}\n{prompt}"
603
  else:
604
  # For non-factual questions, use the first good context available
605
+ if google_context and is_good_context(google_context):
606
+ prompt = f"Use the following search context to answer the question:\n{google_context}\n\n{prompt}"
607
+ elif wiki_context and is_good_context(wiki_context):
608
  prompt = f"Use the following Wikipedia context to answer the question:\n{wiki_context}\n\n{prompt}"
609
  elif ddg_context and is_good_context(ddg_context):
610
  prompt = f"Use the following web search context to answer the question:\n{ddg_context}\n\n{prompt}"