Spaces:
Sleeping
Sleeping
google search with disco added
Browse files- gemini_agent.py +208 -13
gemini_agent.py
CHANGED
|
@@ -139,24 +139,35 @@ class GeminiAgent:
|
|
| 139 |
"""Handle questions about actors, TV shows, and movies with enhanced search"""
|
| 140 |
print(f"Processing actor/show question: {question[:50]}...")
|
| 141 |
|
| 142 |
-
#
|
|
|
|
| 143 |
wiki_context = ""
|
| 144 |
ddg_context = ""
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
try:
|
| 147 |
wiki_context = self.wiki_tool.run(question)
|
| 148 |
print("Wikipedia search completed")
|
| 149 |
except Exception as e:
|
| 150 |
print(f"Wikipedia tool failed: {e}")
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
|
|
|
|
|
|
| 157 |
|
| 158 |
# Combine contexts if available
|
| 159 |
combined_context = ""
|
|
|
|
|
|
|
| 160 |
if wiki_context and not any(x in wiki_context.lower() for x in ["not found", "no results", "does not contain"]):
|
| 161 |
combined_context += f"Wikipedia context: {wiki_context}\n\n"
|
| 162 |
if ddg_context and not any(x in ddg_context.lower() for x in ["not found", "no results", "does not contain"]):
|
|
@@ -199,6 +210,174 @@ If the answer is a person's name, provide ONLY their first name as requested."""
|
|
| 199 |
|
| 200 |
return answer
|
| 201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
async def _handle_video_question(self, question: str) -> str:
|
| 203 |
"""Handle questions that require video analysis"""
|
| 204 |
# Extract YouTube URL
|
|
@@ -322,7 +501,8 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
|
|
| 322 |
"from wikipedia" in q or
|
| 323 |
"search the web" in q or
|
| 324 |
"duckduckgo" in q or
|
| 325 |
-
"web search" in q
|
|
|
|
| 326 |
)
|
| 327 |
|
| 328 |
def is_factual_question(question):
|
|
@@ -333,16 +513,25 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
|
|
| 333 |
"what role", "what character", "what part",
|
| 334 |
"which actor", "which actress",
|
| 335 |
"in the movie", "in the show", "in the series", "in the film",
|
| 336 |
-
"version of"
|
|
|
|
| 337 |
]
|
| 338 |
return any(pattern in q for pattern in factual_patterns)
|
| 339 |
|
| 340 |
wiki_context = ""
|
|
|
|
| 341 |
ddg_context = ""
|
| 342 |
|
| 343 |
-
# Use retrieval for explicit web/Wikipedia questions OR factual questions
|
| 344 |
if is_explicit_retrieval_question(question) or is_factual_question(question):
|
| 345 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
if is_factual_question(question) or "wikipedia" in question.lower():
|
| 347 |
try:
|
| 348 |
wiki_context = self.wiki_tool.run(question)
|
|
@@ -350,7 +539,9 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
|
|
| 350 |
except Exception as e:
|
| 351 |
print(f"Wikipedia tool failed: {e}")
|
| 352 |
|
| 353 |
-
|
|
|
|
|
|
|
| 354 |
try:
|
| 355 |
ddg_context = self.ddg_tool.run(question)
|
| 356 |
print(f"DuckDuckGo search completed for: {question[:50]}...")
|
|
@@ -397,9 +588,11 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
|
|
| 397 |
def is_good_context(context):
|
| 398 |
return context and not any(x in context.lower() for x in ["not found", "no results", "does not contain information"])
|
| 399 |
|
| 400 |
-
# For factual questions
|
| 401 |
if is_factual_question(question):
|
| 402 |
combined_context = ""
|
|
|
|
|
|
|
| 403 |
if wiki_context and is_good_context(wiki_context):
|
| 404 |
combined_context += f"Wikipedia context: {wiki_context}\n\n"
|
| 405 |
if ddg_context and is_good_context(ddg_context):
|
|
@@ -409,7 +602,9 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
|
|
| 409 |
prompt = f"Use the following context to answer the question accurately. Focus on finding the exact name or information requested:\n{combined_context}\n{prompt}"
|
| 410 |
else:
|
| 411 |
# For non-factual questions, use the first good context available
|
| 412 |
-
if
|
|
|
|
|
|
|
| 413 |
prompt = f"Use the following Wikipedia context to answer the question:\n{wiki_context}\n\n{prompt}"
|
| 414 |
elif ddg_context and is_good_context(ddg_context):
|
| 415 |
prompt = f"Use the following web search context to answer the question:\n{ddg_context}\n\n{prompt}"
|
|
|
|
| 139 |
"""Handle questions about actors, TV shows, and movies with enhanced search"""
|
| 140 |
print(f"Processing actor/show question: {question[:50]}...")
|
| 141 |
|
| 142 |
+
# Try Google Search first, then Wikipedia and DuckDuckGo
|
| 143 |
+
google_context = ""
|
| 144 |
wiki_context = ""
|
| 145 |
ddg_context = ""
|
| 146 |
|
| 147 |
+
try:
|
| 148 |
+
google_context = await self._google_search(question, num_results=7)
|
| 149 |
+
print("Google search completed")
|
| 150 |
+
except Exception as e:
|
| 151 |
+
print(f"Google search failed: {e}")
|
| 152 |
+
|
| 153 |
try:
|
| 154 |
wiki_context = self.wiki_tool.run(question)
|
| 155 |
print("Wikipedia search completed")
|
| 156 |
except Exception as e:
|
| 157 |
print(f"Wikipedia tool failed: {e}")
|
| 158 |
|
| 159 |
+
# Only use DuckDuckGo if Google search failed
|
| 160 |
+
if not google_context:
|
| 161 |
+
try:
|
| 162 |
+
ddg_context = self.ddg_tool.run(question)
|
| 163 |
+
print("DuckDuckGo search completed")
|
| 164 |
+
except Exception as e:
|
| 165 |
+
print(f"DuckDuckGo tool failed: {e}")
|
| 166 |
|
| 167 |
# Combine contexts if available
|
| 168 |
combined_context = ""
|
| 169 |
+
if google_context and not any(x in google_context.lower() for x in ["not found", "no results", "does not contain"]):
|
| 170 |
+
combined_context += f"Google search context: {google_context}\n\n"
|
| 171 |
if wiki_context and not any(x in wiki_context.lower() for x in ["not found", "no results", "does not contain"]):
|
| 172 |
combined_context += f"Wikipedia context: {wiki_context}\n\n"
|
| 173 |
if ddg_context and not any(x in ddg_context.lower() for x in ["not found", "no results", "does not contain"]):
|
|
|
|
| 210 |
|
| 211 |
return answer
|
| 212 |
|
| 213 |
+
async def _handle_discography_question(self, question: str) -> str:
|
| 214 |
+
"""Handle questions about music discography with enhanced search capabilities"""
|
| 215 |
+
print(f"Processing discography question: {question[:50]}...")
|
| 216 |
+
|
| 217 |
+
# Extract key information from the question
|
| 218 |
+
artist_name = ""
|
| 219 |
+
start_year = None
|
| 220 |
+
end_year = None
|
| 221 |
+
album_type = "studio albums" # Default to studio albums
|
| 222 |
+
|
| 223 |
+
# Try to extract artist name
|
| 224 |
+
artist_patterns = [
|
| 225 |
+
r'by\s+([\w\s]+)\s+between', # "by Mercedes Sosa between"
|
| 226 |
+
r'([\w\s]+)\s+albums', # "Mercedes Sosa albums"
|
| 227 |
+
r'([\w\s]+)\s+discography', # "Mercedes Sosa discography"
|
| 228 |
+
r'([\w\s]+)\s+between\s+\d{4}' # "Mercedes Sosa between 2000"
|
| 229 |
+
]
|
| 230 |
+
|
| 231 |
+
for pattern in artist_patterns:
|
| 232 |
+
match = re.search(pattern, question, re.IGNORECASE)
|
| 233 |
+
if match:
|
| 234 |
+
artist_name = match.group(1).strip()
|
| 235 |
+
break
|
| 236 |
+
|
| 237 |
+
# Extract date range
|
| 238 |
+
date_patterns = [
|
| 239 |
+
r'between\s+(\d{4})\s+and\s+(\d{4})', # "between 2000 and 2009"
|
| 240 |
+
r'from\s+(\d{4})\s+to\s+(\d{4})', # "from 2000 to 2009"
|
| 241 |
+
r'(\d{4})\s*[-–]\s*(\d{4})', # "2000-2009"
|
| 242 |
+
r'(\d{4})\s+to\s+(\d{4})' # "2000 to 2009"
|
| 243 |
+
]
|
| 244 |
+
|
| 245 |
+
for pattern in date_patterns:
|
| 246 |
+
match = re.search(pattern, question, re.IGNORECASE)
|
| 247 |
+
if match:
|
| 248 |
+
start_year = int(match.group(1))
|
| 249 |
+
end_year = int(match.group(2))
|
| 250 |
+
break
|
| 251 |
+
|
| 252 |
+
# Check for included year
|
| 253 |
+
if not end_year:
|
| 254 |
+
included_match = re.search(r'(\d{4})\s*\(included\)', question, re.IGNORECASE)
|
| 255 |
+
if included_match:
|
| 256 |
+
end_year = int(included_match.group(1))
|
| 257 |
+
|
| 258 |
+
# Determine album type
|
| 259 |
+
if 'studio album' in question.lower():
|
| 260 |
+
album_type = "studio albums"
|
| 261 |
+
elif 'live album' in question.lower():
|
| 262 |
+
album_type = "live albums"
|
| 263 |
+
elif 'compilation' in question.lower():
|
| 264 |
+
album_type = "compilation albums"
|
| 265 |
+
|
| 266 |
+
# Construct specialized search queries
|
| 267 |
+
search_queries = []
|
| 268 |
+
if artist_name:
|
| 269 |
+
# Create multiple search queries for better coverage
|
| 270 |
+
if start_year and end_year:
|
| 271 |
+
search_queries.append(f"{artist_name} {album_type} between {start_year} and {end_year} wikipedia")
|
| 272 |
+
search_queries.append(f"{artist_name} discography {start_year}-{end_year} wikipedia")
|
| 273 |
+
search_queries.append(f"{artist_name} complete list of {album_type} {start_year}-{end_year}")
|
| 274 |
+
else:
|
| 275 |
+
search_queries.append(f"{artist_name} complete discography wikipedia")
|
| 276 |
+
search_queries.append(f"{artist_name} {album_type} list wikipedia")
|
| 277 |
+
else:
|
| 278 |
+
# If we couldn't extract artist name, use the original question
|
| 279 |
+
search_queries.append(question + " wikipedia")
|
| 280 |
+
|
| 281 |
+
# Gather context from multiple sources
|
| 282 |
+
wiki_context = ""
|
| 283 |
+
google_context = ""
|
| 284 |
+
ddg_context = ""
|
| 285 |
+
|
| 286 |
+
# Try Google Search first with multiple queries for better coverage
|
| 287 |
+
for i, query in enumerate(search_queries[:2]): # Use first two queries for Google
|
| 288 |
+
try:
|
| 289 |
+
result = await self._google_search(query, num_results=7)
|
| 290 |
+
if result and not google_context:
|
| 291 |
+
google_context = result
|
| 292 |
+
print(f"Google search completed for query {i+1}")
|
| 293 |
+
except Exception as e:
|
| 294 |
+
print(f"Google search failed for query {i+1}: {e}")
|
| 295 |
+
|
| 296 |
+
# Try Wikipedia
|
| 297 |
+
try:
|
| 298 |
+
# Use the first query for Wikipedia
|
| 299 |
+
wiki_context = self.wiki_tool.run(search_queries[0])
|
| 300 |
+
print("Wikipedia search completed")
|
| 301 |
+
except Exception as e:
|
| 302 |
+
print(f"Wikipedia tool failed: {e}")
|
| 303 |
+
|
| 304 |
+
# Fall back to DuckDuckGo if needed
|
| 305 |
+
if not google_context:
|
| 306 |
+
try:
|
| 307 |
+
# Use a different query for DuckDuckGo
|
| 308 |
+
query_idx = min(2, len(search_queries)-1)
|
| 309 |
+
ddg_context = self.ddg_tool.run(search_queries[query_idx])
|
| 310 |
+
print("DuckDuckGo search completed")
|
| 311 |
+
except Exception as e:
|
| 312 |
+
print(f"DuckDuckGo tool failed: {e}")
|
| 313 |
+
|
| 314 |
+
# Combine contexts if available
|
| 315 |
+
combined_context = ""
|
| 316 |
+
if google_context and not any(x in google_context.lower() for x in ["not found", "no results", "does not contain"]):
|
| 317 |
+
combined_context += f"Google search context: {google_context}\n\n"
|
| 318 |
+
if wiki_context and not any(x in wiki_context.lower() for x in ["not found", "no results", "does not contain"]):
|
| 319 |
+
combined_context += f"Wikipedia context: {wiki_context}\n\n"
|
| 320 |
+
if ddg_context and not any(x in ddg_context.lower() for x in ["not found", "no results", "does not contain"]):
|
| 321 |
+
combined_context += f"Web search context: {ddg_context}\n\n"
|
| 322 |
+
|
| 323 |
+
# Create a specialized prompt for discography questions
|
| 324 |
+
prompt = f"""Based on the following context, answer this question about music discography:
|
| 325 |
+
|
| 326 |
+
{combined_context}
|
| 327 |
+
|
| 328 |
+
Question: {question}
|
| 329 |
+
|
| 330 |
+
"""
|
| 331 |
+
|
| 332 |
+
# Add specific instructions for counting albums in a date range
|
| 333 |
+
if "how many" in question.lower() and "album" in question.lower() and start_year and end_year:
|
| 334 |
+
prompt += f"""Count ONLY the {album_type} released between {start_year} and {end_year}, inclusive of both years.
|
| 335 |
+
|
| 336 |
+
Provide ONLY the numeric count as your answer, with no additional text.
|
| 337 |
+
|
| 338 |
+
Make sure to count each album only once, and only count {album_type} unless specifically asked for other types.
|
| 339 |
+
|
| 340 |
+
If you find a list of albums with years, list them here with their release years before giving the final count:
|
| 341 |
+
[Album name] (year)
|
| 342 |
+
[Album name] (year)
|
| 343 |
+
...
|
| 344 |
+
Final count: [number]"""
|
| 345 |
+
else:
|
| 346 |
+
prompt += "Provide ONLY the specific information requested. No explanations or additional context."
|
| 347 |
+
|
| 348 |
+
await self._rate_limit()
|
| 349 |
+
response = self.model.generate_content(
|
| 350 |
+
prompt,
|
| 351 |
+
generation_config=genai.types.GenerationConfig(
|
| 352 |
+
max_output_tokens=500, # Increased to allow for album listing
|
| 353 |
+
temperature=0.0
|
| 354 |
+
)
|
| 355 |
+
)
|
| 356 |
+
answer = response.text.strip()
|
| 357 |
+
|
| 358 |
+
# Extract just the count if that's what was requested
|
| 359 |
+
if "how many" in question.lower():
|
| 360 |
+
# Look for "Final count: X" pattern first
|
| 361 |
+
final_count_match = re.search(r'Final count:\s*(\d+)', answer)
|
| 362 |
+
if final_count_match:
|
| 363 |
+
return final_count_match.group(1)
|
| 364 |
+
|
| 365 |
+
# Otherwise try to extract any number
|
| 366 |
+
number_match = re.search(r'\b(\d+)\b', answer)
|
| 367 |
+
if number_match:
|
| 368 |
+
return number_match.group(1)
|
| 369 |
+
|
| 370 |
+
# Clean up the answer to extract just the information
|
| 371 |
+
# Remove common prefixes
|
| 372 |
+
prefixes = ['The answer is', 'Based on', 'According to', 'There were']
|
| 373 |
+
for prefix in prefixes:
|
| 374 |
+
if answer.lower().startswith(prefix.lower()):
|
| 375 |
+
answer = answer[len(prefix):].strip()
|
| 376 |
+
if answer.startswith(','):
|
| 377 |
+
answer = answer[1:].strip()
|
| 378 |
+
|
| 379 |
+
return answer
|
| 380 |
+
|
| 381 |
async def _handle_video_question(self, question: str) -> str:
|
| 382 |
"""Handle questions that require video analysis"""
|
| 383 |
# Extract YouTube URL
|
|
|
|
| 501 |
"from wikipedia" in q or
|
| 502 |
"search the web" in q or
|
| 503 |
"duckduckgo" in q or
|
| 504 |
+
"web search" in q or
|
| 505 |
+
"google" in q
|
| 506 |
)
|
| 507 |
|
| 508 |
def is_factual_question(question):
|
|
|
|
| 513 |
"what role", "what character", "what part",
|
| 514 |
"which actor", "which actress",
|
| 515 |
"in the movie", "in the show", "in the series", "in the film",
|
| 516 |
+
"version of", "how many", "when did", "where was",
|
| 517 |
+
"published", "released", "recorded", "between", "from", "to"
|
| 518 |
]
|
| 519 |
return any(pattern in q for pattern in factual_patterns)
|
| 520 |
|
| 521 |
wiki_context = ""
|
| 522 |
+
google_context = ""
|
| 523 |
ddg_context = ""
|
| 524 |
|
| 525 |
+
# Use retrieval for explicit web/Wikipedia questions OR factual questions
|
| 526 |
if is_explicit_retrieval_question(question) or is_factual_question(question):
|
| 527 |
+
# Try Google Search first for all factual questions
|
| 528 |
+
try:
|
| 529 |
+
google_context = await self._google_search(question, num_results=7)
|
| 530 |
+
print(f"Google search completed for: {question[:50]}...")
|
| 531 |
+
except Exception as e:
|
| 532 |
+
print(f"Google search failed: {e}")
|
| 533 |
+
|
| 534 |
+
# For factual questions, also try Wikipedia
|
| 535 |
if is_factual_question(question) or "wikipedia" in question.lower():
|
| 536 |
try:
|
| 537 |
wiki_context = self.wiki_tool.run(question)
|
|
|
|
| 539 |
except Exception as e:
|
| 540 |
print(f"Wikipedia tool failed: {e}")
|
| 541 |
|
| 542 |
+
# Use DuckDuckGo as a fallback or additional source
|
| 543 |
+
if (not google_context or is_factual_question(question)) and \
|
| 544 |
+
("duckduckgo" in question.lower() or "web search" in question.lower()):
|
| 545 |
try:
|
| 546 |
ddg_context = self.ddg_tool.run(question)
|
| 547 |
print(f"DuckDuckGo search completed for: {question[:50]}...")
|
|
|
|
| 588 |
def is_good_context(context):
|
| 589 |
return context and not any(x in context.lower() for x in ["not found", "no results", "does not contain information"])
|
| 590 |
|
| 591 |
+
# For factual questions, try to use all available search results
|
| 592 |
if is_factual_question(question):
|
| 593 |
combined_context = ""
|
| 594 |
+
if google_context and is_good_context(google_context):
|
| 595 |
+
combined_context += f"Google search context: {google_context}\n\n"
|
| 596 |
if wiki_context and is_good_context(wiki_context):
|
| 597 |
combined_context += f"Wikipedia context: {wiki_context}\n\n"
|
| 598 |
if ddg_context and is_good_context(ddg_context):
|
|
|
|
| 602 |
prompt = f"Use the following context to answer the question accurately. Focus on finding the exact name or information requested:\n{combined_context}\n{prompt}"
|
| 603 |
else:
|
| 604 |
# For non-factual questions, use the first good context available
|
| 605 |
+
if google_context and is_good_context(google_context):
|
| 606 |
+
prompt = f"Use the following search context to answer the question:\n{google_context}\n\n{prompt}"
|
| 607 |
+
elif wiki_context and is_good_context(wiki_context):
|
| 608 |
prompt = f"Use the following Wikipedia context to answer the question:\n{wiki_context}\n\n{prompt}"
|
| 609 |
elif ddg_context and is_good_context(ddg_context):
|
| 610 |
prompt = f"Use the following web search context to answer the question:\n{ddg_context}\n\n{prompt}"
|