Final_Assignment_Template

Sleeping

App Files Files Community

Kackle commited on Jun 30, 2025

Commit

f152db2

verified ·

1 Parent(s): 3ffe515

google search with disco added

Browse files

Files changed (1) hide show

gemini_agent.py +208 -13

gemini_agent.py CHANGED Viewed

@@ -139,24 +139,35 @@ class GeminiAgent:
         """Handle questions about actors, TV shows, and movies with enhanced search"""
         print(f"Processing actor/show question: {question[:50]}...")
-        # Always try both Wikipedia and DuckDuckGo for these questions
         wiki_context = ""
         ddg_context = ""
         try:
             wiki_context = self.wiki_tool.run(question)
             print("Wikipedia search completed")
         except Exception as e:
             print(f"Wikipedia tool failed: {e}")
-        try:
-            ddg_context = self.ddg_tool.run(question)
-            print("DuckDuckGo search completed")
-        except Exception as e:
-            print(f"DuckDuckGo tool failed: {e}")
         # Combine contexts if available
         combined_context = ""
         if wiki_context and not any(x in wiki_context.lower() for x in ["not found", "no results", "does not contain"]):
             combined_context += f"Wikipedia context: {wiki_context}\n\n"
         if ddg_context and not any(x in ddg_context.lower() for x in ["not found", "no results", "does not contain"]):
@@ -199,6 +210,174 @@ If the answer is a person's name, provide ONLY their first name as requested."""
         return answer
     async def _handle_video_question(self, question: str) -> str:
         """Handle questions that require video analysis"""
         # Extract YouTube URL
@@ -322,7 +501,8 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
                 "from wikipedia" in q or
                 "search the web" in q or
                 "duckduckgo" in q or
-                "web search" in q
             )
         def is_factual_question(question):
@@ -333,16 +513,25 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
                 "what role", "what character", "what part",
                 "which actor", "which actress",
                 "in the movie", "in the show", "in the series", "in the film",
-                "version of"
             ]
             return any(pattern in q for pattern in factual_patterns)
         wiki_context = ""
         ddg_context = ""
-        # Use retrieval for explicit web/Wikipedia questions OR factual questions about actors/shows
         if is_explicit_retrieval_question(question) or is_factual_question(question):
-            # For factual questions, always try both Wikipedia and DuckDuckGo
             if is_factual_question(question) or "wikipedia" in question.lower():
                 try:
                     wiki_context = self.wiki_tool.run(question)
@@ -350,7 +539,9 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
                 except Exception as e:
                     print(f"Wikipedia tool failed: {e}")
-            if is_factual_question(question) or "duckduckgo" in question.lower() or "web search" in question.lower():
                 try:
                     ddg_context = self.ddg_tool.run(question)
                     print(f"DuckDuckGo search completed for: {question[:50]}...")
@@ -397,9 +588,11 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
         def is_good_context(context):
             return context and not any(x in context.lower() for x in ["not found", "no results", "does not contain information"])
-        # For factual questions about actors/shows, try to use both Wikipedia and web search results
         if is_factual_question(question):
             combined_context = ""
             if wiki_context and is_good_context(wiki_context):
                 combined_context += f"Wikipedia context: {wiki_context}\n\n"
             if ddg_context and is_good_context(ddg_context):
@@ -409,7 +602,9 @@ Provide only the direct answer. If it's a quote, give just the quoted text. If i
                 prompt = f"Use the following context to answer the question accurately. Focus on finding the exact name or information requested:\n{combined_context}\n{prompt}"
         else:
             # For non-factual questions, use the first good context available
-            if wiki_context and is_good_context(wiki_context):
                 prompt = f"Use the following Wikipedia context to answer the question:\n{wiki_context}\n\n{prompt}"
             elif ddg_context and is_good_context(ddg_context):
                 prompt = f"Use the following web search context to answer the question:\n{ddg_context}\n\n{prompt}"

         """Handle questions about actors, TV shows, and movies with enhanced search"""
         print(f"Processing actor/show question: {question[:50]}...")
+        # Try Google Search first, then Wikipedia and DuckDuckGo
+        google_context = ""
         wiki_context = ""
         ddg_context = ""
+        try:
+            google_context = await self._google_search(question, num_results=7)
+            print("Google search completed")
+        except Exception as e:
+            print(f"Google search failed: {e}")
         try:
             wiki_context = self.wiki_tool.run(question)
             print("Wikipedia search completed")
         except Exception as e:
             print(f"Wikipedia tool failed: {e}")
+        # Only use DuckDuckGo if Google search failed
+        if not google_context:
+            try:
+                ddg_context = self.ddg_tool.run(question)
+                print("DuckDuckGo search completed")
+            except Exception as e:
+                print(f"DuckDuckGo tool failed: {e}")
         # Combine contexts if available
         combined_context = ""
+        if google_context and not any(x in google_context.lower() for x in ["not found", "no results", "does not contain"]):
+            combined_context += f"Google search context: {google_context}\n\n"
         if wiki_context and not any(x in wiki_context.lower() for x in ["not found", "no results", "does not contain"]):
             combined_context += f"Wikipedia context: {wiki_context}\n\n"
         if ddg_context and not any(x in ddg_context.lower() for x in ["not found", "no results", "does not contain"]):
         return answer
+    async def _handle_discography_question(self, question: str) -> str:
+        """Handle questions about music discography with enhanced search capabilities"""
+        print(f"Processing discography question: {question[:50]}...")
+        # Extract key information from the question
+        artist_name = ""
+        start_year = None
+        end_year = None
+        album_type = "studio albums"  # Default to studio albums
+        # Try to extract artist name
+        artist_patterns = [
+            r'by\s+([\w\s]+)\s+between',  # "by Mercedes Sosa between"
+            r'([\w\s]+)\s+albums',        # "Mercedes Sosa albums"
+            r'([\w\s]+)\s+discography',   # "Mercedes Sosa discography"
+            r'([\w\s]+)\s+between\s+\d{4}' # "Mercedes Sosa between 2000"
+        ]
+        for pattern in artist_patterns:
+            match = re.search(pattern, question, re.IGNORECASE)
+            if match:
+                artist_name = match.group(1).strip()
+                break
+        # Extract date range
+        date_patterns = [
+            r'between\s+(\d{4})\s+and\s+(\d{4})',  # "between 2000 and 2009"
+            r'from\s+(\d{4})\s+to\s+(\d{4})',      # "from 2000 to 2009"
+            r'(\d{4})\s*[-–]\s*(\d{4})',        # "2000-2009"
+            r'(\d{4})\s+to\s+(\d{4})'             # "2000 to 2009"
+        ]
+        for pattern in date_patterns:
+            match = re.search(pattern, question, re.IGNORECASE)
+            if match:
+                start_year = int(match.group(1))
+                end_year = int(match.group(2))
+                break
+        # Check for included year
+        if not end_year:
+            included_match = re.search(r'(\d{4})\s*\(included\)', question, re.IGNORECASE)
+            if included_match:
+                end_year = int(included_match.group(1))
+        # Determine album type
+        if 'studio album' in question.lower():
+            album_type = "studio albums"
+        elif 'live album' in question.lower():
+            album_type = "live albums"
+        elif 'compilation' in question.lower():
+            album_type = "compilation albums"
+        # Construct specialized search queries
+        search_queries = []
+        if artist_name:
+            # Create multiple search queries for better coverage
+            if start_year and end_year:
+                search_queries.append(f"{artist_name} {album_type} between {start_year} and {end_year} wikipedia")
+                search_queries.append(f"{artist_name} discography {start_year}-{end_year} wikipedia")
+                search_queries.append(f"{artist_name} complete list of {album_type} {start_year}-{end_year}")
+            else:
+                search_queries.append(f"{artist_name} complete discography wikipedia")
+                search_queries.append(f"{artist_name} {album_type} list wikipedia")
+        else:
+            # If we couldn't extract artist name, use the original question
+            search_queries.append(question + " wikipedia")
+        # Gather context from multiple sources
+        wiki_context = ""
+        google_context = ""
+        ddg_context = ""
+        # Try Google Search first with multiple queries for better coverage
+        for i, query in enumerate(search_queries[:2]):  # Use first two queries for Google
+            try:
+                result = await self._google_search(query, num_results=7)
+                if result and not google_context:
+                    google_context = result
+                    print(f"Google search completed for query {i+1}")
+            except Exception as e:
+                print(f"Google search failed for query {i+1}: {e}")
+        # Try Wikipedia
+        try:
+            # Use the first query for Wikipedia
+            wiki_context = self.wiki_tool.run(search_queries[0])
+            print("Wikipedia search completed")
+        except Exception as e:
+            print(f"Wikipedia tool failed: {e}")
+        # Fall back to DuckDuckGo if needed
+        if not google_context:
+            try:
+                # Use a different query for DuckDuckGo
+                query_idx = min(2, len(search_queries)-1)
+                ddg_context = self.ddg_tool.run(search_queries[query_idx])
+                print("DuckDuckGo search completed")
+            except Exception as e:
+                print(f"DuckDuckGo tool failed: {e}")
+        # Combine contexts if available
+        combined_context = ""
+        if google_context and not any(x in google_context.lower() for x in ["not found", "no results", "does not contain"]):
+            combined_context += f"Google search context: {google_context}\n\n"
+        if wiki_context and not any(x in wiki_context.lower() for x in ["not found", "no results", "does not contain"]):
+            combined_context += f"Wikipedia context: {wiki_context}\n\n"
+        if ddg_context and not any(x in ddg_context.lower() for x in ["not found", "no results", "does not contain"]):
+            combined_context += f"Web search context: {ddg_context}\n\n"
+        # Create a specialized prompt for discography questions
+        prompt = f"""Based on the following context, answer this question about music discography:
+{combined_context}
+Question: {question}
+"""
+        # Add specific instructions for counting albums in a date range
+        if "how many" in question.lower() and "album" in question.lower() and start_year and end_year:
+            prompt += f"""Count ONLY the {album_type} released between {start_year} and {end_year}, inclusive of both years.
+Provide ONLY the numeric count as your answer, with no additional text.
+Make sure to count each album only once, and only count {album_type} unless specifically asked for other types.
+If you find a list of albums with years, list them here with their release years before giving the final count:
+[Album name] (year)
+[Album name] (year)
+...
+Final count: [number]"""
+        else:
+            prompt += "Provide ONLY the specific information requested. No explanations or additional context."
+        await self._rate_limit()
+        response = self.model.generate_content(
+            prompt,
+            generation_config=genai.types.GenerationConfig(
+                max_output_tokens=500,  # Increased to allow for album listing
+                temperature=0.0
+            )
+        )
+        answer = response.text.strip()
+        # Extract just the count if that's what was requested
+        if "how many" in question.lower():
+            # Look for "Final count: X" pattern first
+            final_count_match = re.search(r'Final count:\s*(\d+)', answer)
+            if final_count_match:
+                return final_count_match.group(1)
+            # Otherwise try to extract any number
+            number_match = re.search(r'\b(\d+)\b', answer)
+            if number_match:
+                return number_match.group(1)
+        # Clean up the answer to extract just the information
+        # Remove common prefixes
+        prefixes = ['The answer is', 'Based on', 'According to', 'There were']
+        for prefix in prefixes:
+            if answer.lower().startswith(prefix.lower()):
+                answer = answer[len(prefix):].strip()
+                if answer.startswith(','):
+                    answer = answer[1:].strip()
+        return answer
     async def _handle_video_question(self, question: str) -> str:
         """Handle questions that require video analysis"""
         # Extract YouTube URL
                 "from wikipedia" in q or
                 "search the web" in q or
                 "duckduckgo" in q or
+                "web search" in q or
+                "google" in q
             )
         def is_factual_question(question):
                 "what role", "what character", "what part",
                 "which actor", "which actress",
                 "in the movie", "in the show", "in the series", "in the film",
+                "version of", "how many", "when did", "where was",
+                "published", "released", "recorded", "between", "from", "to"
             ]
             return any(pattern in q for pattern in factual_patterns)
         wiki_context = ""
+        google_context = ""
         ddg_context = ""
+        # Use retrieval for explicit web/Wikipedia questions OR factual questions
         if is_explicit_retrieval_question(question) or is_factual_question(question):
+            # Try Google Search first for all factual questions
+            try:
+                google_context = await self._google_search(question, num_results=7)
+                print(f"Google search completed for: {question[:50]}...")
+            except Exception as e:
+                print(f"Google search failed: {e}")
+            # For factual questions, also try Wikipedia
             if is_factual_question(question) or "wikipedia" in question.lower():
                 try:
                     wiki_context = self.wiki_tool.run(question)
                 except Exception as e:
                     print(f"Wikipedia tool failed: {e}")
+            # Use DuckDuckGo as a fallback or additional source
+            if (not google_context or is_factual_question(question)) and \
+               ("duckduckgo" in question.lower() or "web search" in question.lower()):
                 try:
                     ddg_context = self.ddg_tool.run(question)
                     print(f"DuckDuckGo search completed for: {question[:50]}...")
         def is_good_context(context):
             return context and not any(x in context.lower() for x in ["not found", "no results", "does not contain information"])
+        # For factual questions, try to use all available search results
         if is_factual_question(question):
             combined_context = ""
+            if google_context and is_good_context(google_context):
+                combined_context += f"Google search context: {google_context}\n\n"
             if wiki_context and is_good_context(wiki_context):
                 combined_context += f"Wikipedia context: {wiki_context}\n\n"
             if ddg_context and is_good_context(ddg_context):
                 prompt = f"Use the following context to answer the question accurately. Focus on finding the exact name or information requested:\n{combined_context}\n{prompt}"
         else:
             # For non-factual questions, use the first good context available
+            if google_context and is_good_context(google_context):
+                prompt = f"Use the following search context to answer the question:\n{google_context}\n\n{prompt}"
+            elif wiki_context and is_good_context(wiki_context):
                 prompt = f"Use the following Wikipedia context to answer the question:\n{wiki_context}\n\n{prompt}"
             elif ddg_context and is_good_context(ddg_context):
                 prompt = f"Use the following web search context to answer the question:\n{ddg_context}\n\n{prompt}"