derkaal
/

FinalSubmission

Model card Files Files and versions

xet

Community

derkaal commited on May 13, 2025

Commit

216d3ae

verified ·

1 Parent(s): 3bfcda8

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

gaiaX/question_handlers.py +38 -228
requirements.txt +1 -11

gaiaX/question_handlers.py CHANGED Viewed

@@ -27,48 +27,29 @@ def detect_question_type(question_text: str) -> str:
     # Convert to lowercase for case-insensitive matching
     text = question_text.lower()
-    # Check for media content questions (videos, YouTube, audio, etc.)
-    if any(keyword in text for keyword in ["video", "youtube", "watch", "channel", "podcast",
-                                           "stream", "streaming", "media", "transcript",
-                                           "audio", "recording", "listen", "sound", "speech",
-                                           "voice", "mp3", "wav", "spoken", "transcribe"]):
-        return "media_content"
-    # Check for current events or real-time information questions
-    if any(keyword in text for keyword in ["current", "recent", "latest", "news", "today",
-                                           "this year", "this month", "this week", "update"]):
-        return "current_events"
     # Check for mathematical questions
-    if any(keyword in text for keyword in ["calculate", "compute", "equation", "formula", "derivative",
-                                           "integral", "probability", "statistics", "math"]):
         return "mathematical"
     # Check for technical implementation questions
-    if any(keyword in text for keyword in ["implement", "code", "algorithm", "function", "class",
-                                           "method", "programming", "pseudocode", "complexity"]):
         return "technical"
     # Check for context-based questions
-    if any(keyword in text for keyword in ["context", "file", "document", "text", "analyze",
-                                           "based on", "according to", "refer to"]):
         return "context_based"
-    # Check for categorization questions
-    if any(keyword in text for keyword in ["categorize", "classify", "sort", "group", "list of",
-                                          "which are", "identify the", "separate", "distinguish between",
-                                          "fruits", "vegetables", "animals", "plants", "types of",
-                                          "categories of", "examples of", "create a list", "make a list"]):
-        return "categorization"
     # Check for ethical/societal questions
-    if any(keyword in text for keyword in ["ethics", "ethical", "society", "impact", "bias",
-                                           "fairness", "responsible", "governance"]):
         return "ethical"
     # Check for factual knowledge questions
-    if any(keyword in text for keyword in ["define", "explain", "describe", "what is", "who is",
-                                           "when was", "history", "concept"]):
         return "factual"
     # Default to general if no specific type is detected
@@ -93,14 +74,36 @@ def handle_factual_question(agent: Any, question: dict, context: str = None) ->
     enhanced_question = question.copy()
     question_text = question.get("question", "")
-    enhanced_text = f"""
-    [FACTUAL KNOWLEDGE QUESTION]
-    {question_text}
-    Please provide a precise, accurate answer based on established facts and knowledge.
-    Include relevant examples and cite important research or developments when applicable.
-    """
     enhanced_question["question"] = enhanced_text
@@ -271,193 +274,6 @@ def handle_general_question(agent: Any, question: dict, context: str = None) ->
     return get_agent_response(agent, enhanced_question)
-def handle_current_events_question(agent: Any, question: dict, context: str = None) -> str:
-    """
-    Handle questions about current events or real-time information.
-    Args:
-        agent: Initialized LangChain agent
-        question: Dictionary containing question data
-        context: Optional context text
-    Returns:
-        Agent's response as a string
-    """
-    logger.info("Handling current events question")
-    # Enhance the question with specific instructions for current events questions
-    enhanced_question = question.copy()
-    question_text = question.get("question", "")
-    enhanced_text = f"""
-    [CURRENT EVENTS QUESTION]
-    {question_text}
-    Please provide an up-to-date answer by:
-    - Using search tools to find the most recent information
-    - Citing sources and their publication dates
-    - Synthesizing information from multiple sources when appropriate
-    - Clearly distinguishing between facts and opinions
-    - Indicating any uncertainties or conflicting information
-    Make sure to use search tools to verify the most current information before answering.
-    """
-    if context:
-        enhanced_text += f"\n\nContext:\n{context}"
-    enhanced_question["question"] = enhanced_text
-    # Get response from the agent
-    return get_agent_response(agent, enhanced_question)
-def handle_media_content_question(agent: Any, question: dict, context: str = None) -> str:
-    """
-    Handle questions about media content (videos, podcasts, audio files, etc.).
-    Args:
-        agent: Initialized LangChain agent
-        question: Dictionary containing question data
-        context: Optional context text
-    Returns:
-        Agent's response as a string
-    """
-    logger.info("Handling media content question")
-    # Detect if this is an audio-specific question
-    question_text = question.get("question", "")
-    is_audio_question = any(keyword in question_text.lower() for keyword in
-                           ["audio", "sound", "listen", "recording", "speech", "voice",
-                            "podcast", "mp3", "wav", "spoken", "transcribe", "recipe audio"])
-    # Check if context contains audio file detection message
-    has_audio_file = False
-    audio_file_path = None
-    if context and "Audio file detected" in context:
-        has_audio_file = True
-        # Try to extract the file path
-        import re
-        path_match = re.search(r"path: (.*?)($|\n)", context)
-        if path_match:
-            audio_file_path = path_match.group(1).strip()
-    # Enhance the question with specific instructions for media content questions
-    enhanced_question = question.copy()
-    if is_audio_question or has_audio_file:
-        # Audio-specific instructions
-        enhanced_text = f"""
-        [AUDIO CONTENT QUESTION]
-        {question_text}
-        Please provide a comprehensive answer by:
-        - Using audio transcription tools if an audio file is provided
-        - For recipe audio, extracting ingredients and steps using specialized tools
-        - Analyzing the transcribed content in relation to the question
-        - Formatting the response according to any specific request in the question
-        - Providing clear, structured information extracted from the audio
-        """
-        if audio_file_path:
-            enhanced_text += f"\nAn audio file has been detected. Use the transcribe_audio tool with the path: {audio_file_path}\n"
-            # Check if it's a recipe question
-            if "recipe" in question_text.lower() or "ingredient" in question_text.lower():
-                enhanced_text += f"\nThis appears to be a recipe-related question. After transcription, use the extract_ingredients_from_audio tool with the path: {audio_file_path}\n"
-    else:
-        # Video/general media instructions
-        enhanced_text = f"""
-        [MEDIA CONTENT QUESTION]
-        {question_text}
-        Please provide a comprehensive answer by:
-        - Using YouTube search tools to find relevant videos if needed
-        - Retrieving and analyzing video transcripts when appropriate
-        - Summarizing key points from the media content
-        - Connecting the media content to the specific question being asked
-        - Citing the source, creator, and publication date of the media
-        - Formatting the response according to any specific request in the question
-        Make sure to use YouTube tools to search for and analyze relevant videos before answering.
-        """
-    if context:
-        enhanced_text += f"\n\nContext:\n{context}"
-    enhanced_question["question"] = enhanced_text
-    # Get response from the agent
-    return get_agent_response(agent, enhanced_question)
-def handle_categorization_question(agent: Any, question: dict, context: str = None) -> str:
-    """
-    Handle categorization questions (e.g., classifying items into groups).
-    Args:
-        agent: Initialized LangChain agent
-        question: Dictionary containing question data
-        context: Optional context text
-    Returns:
-        Agent's response as a string
-    """
-    logger.info("Handling categorization question")
-    # Enhance the question with specific instructions for categorization questions
-    enhanced_question = question.copy()
-    question_text = question.get("question", "")
-    enhanced_text = f"""
-    [CATEGORIZATION QUESTION]
-    {question_text}
-    Please provide a careful and accurate categorization by:
-    - Paying close attention to the specific classification system requested (botanical, culinary, etc.)
-    - For botanical categorization:
-      * Fruits develop from the flower of a plant and contain seeds
-      * Vegetables come from other parts of the plant (leaves, stems, roots, bulbs)
-      * Some botanical fruits are culinarily considered vegetables (tomatoes, bell peppers, cucumbers, etc.)
-      * The following items are botanically fruits (develop from flowers and contain seeds):
-        - Green beans (legume fruits)
-        - Bell peppers (berry fruits)
-        - Zucchini (pepo fruits)
-        - Corn kernels (grain fruits/caryopsis)
-        - Whole allspice (berry fruits)
-        - Tomatoes (berry fruits)
-        - Eggplants (berry fruits)
-        - Cucumbers (pepo fruits)
-        - Pumpkins (pepo fruits)
-        - Avocados (berry fruits)
-        - Olives (drupe fruits)
-    - For culinary categorization:
-      * Sweet or tart items served as dessert or snacks are typically considered fruits
-      * Items used in savory dishes are typically considered vegetables
-      * Many culinary vegetables are botanically fruits (tomatoes, eggplants, bell peppers, etc.)
-    - When in doubt about classification systems, default to the most common usage unless specified otherwise
-    - Herbs like basil, cilantro, and parsley are considered vegetables in culinary contexts
-    - Sweet potatoes are root vegetables (true botanical vegetables)
-    - Broccoli, celery, and lettuce are true botanical vegetables (not fruits)
-    Ensure your categorization is complete and accurate according to the specified criteria.
-    """
-    if context:
-        enhanced_text += f"\n\nContext:\n{context}"
-    enhanced_question["question"] = enhanced_text
-    # Get response from the agent
-    return get_agent_response(agent, enhanced_question)
 def process_question(agent: Any, question: dict, api_base_url: str = API_BASE_URL) -> dict:
     """
     Process a single question using the appropriate handler.
@@ -502,12 +318,6 @@ def process_question(agent: Any, question: dict, api_base_url: str = API_BASE_UR
             answer = handle_mathematical_question(agent, question, context)
         elif question_type == "context_based":
             answer = handle_context_based_question(agent, question, context)
-        elif question_type == "current_events":
-            answer = handle_current_events_question(agent, question, context)
-        elif question_type == "media_content":
-            answer = handle_media_content_question(agent, question, context)
-        elif question_type == "categorization":
-            answer = handle_categorization_question(agent, question, context)
         else:
             answer = handle_general_question(agent, question, context)

     # Convert to lowercase for case-insensitive matching
     text = question_text.lower()
     # Check for mathematical questions
+    if any(keyword in text for keyword in ["calculate", "compute", "equation", "formula", "derivative",
+                                          "integral", "probability", "statistics", "math"]):
         return "mathematical"
     # Check for technical implementation questions
+    if any(keyword in text for keyword in ["implement", "code", "algorithm", "function", "class",
+                                          "method", "programming", "pseudocode", "complexity"]):
         return "technical"
     # Check for context-based questions
+    if any(keyword in text for keyword in ["context", "file", "document", "text", "analyze",
+                                          "based on", "according to", "refer to"]):
         return "context_based"
     # Check for ethical/societal questions
+    if any(keyword in text for keyword in ["ethics", "ethical", "society", "impact", "bias",
+                                          "fairness", "responsible", "governance"]):
         return "ethical"
     # Check for factual knowledge questions
+    if any(keyword in text for keyword in ["define", "explain", "describe", "what is", "who is",
+                                          "when was", "history", "concept"]):
         return "factual"
     # Default to general if no specific type is detected
     enhanced_question = question.copy()
     question_text = question.get("question", "")
+    # Check if this is a counting question
+    is_counting_question = any(keyword in question_text.lower() for keyword in
+                              ["how many", "count", "number of", "total number", "quantity"])
+    if is_counting_question:
+        enhanced_text = f"""
+        [FACTUAL COUNTING QUESTION]
+        {question_text}
+        This is a counting question. Please:
+        1. Be precise and verify information from multiple sources when possible
+        2. Carefully distinguish between different categories (e.g., studio albums vs. live albums vs. compilations)
+        3. Pay careful attention to date ranges and ensure items fall within the specified period
+        4. Count only the items that exactly match all criteria in the question
+        5. When using Wikipedia as a source, make sure to check the entire article for complete information
+        6. For discographies, verify the type of each album before counting it
+        7. List all items you're counting to ensure accuracy
+        8. Double-check your count before providing the final answer
+        """
+    else:
+        enhanced_text = f"""
+        [FACTUAL KNOWLEDGE QUESTION]
+        {question_text}
+        Please provide a precise, accurate answer based on established facts and knowledge.
+        Include relevant examples and cite important research or developments when applicable.
+        """
     enhanced_question["question"] = enhanced_text
     return get_agent_response(agent, enhanced_question)
 def process_question(agent: Any, question: dict, api_base_url: str = API_BASE_URL) -> dict:
     """
     Process a single question using the appropriate handler.
             answer = handle_mathematical_question(agent, question, context)
         elif question_type == "context_based":
             answer = handle_context_based_question(agent, question, context)
         else:
             answer = handle_general_question(agent, question, context)

requirements.txt CHANGED Viewed

@@ -15,14 +15,4 @@ pandas>=2.0.0
 # Utility dependencies
 tqdm>=4.66.1
 pydantic>=2.4.0
-tenacity>=8.2.3
-# Audio processing dependencies
-pydub>=0.25.1
-SpeechRecognition>=3.10.0
-# External information sources dependencies
-google-api-python-client>=2.100.0  # For YouTube API
-youtube-transcript-api>=0.6.1      # For YouTube transcripts
-google-search-results>=2.4.2       # For SerpAPI
-tavily-python>=0.2.6               # For Tavily search

 # Utility dependencies
 tqdm>=4.66.1
 pydantic>=2.4.0
+tenacity>=8.2.3