Spaces:

codeby-hp
/

medical-rag-chatbot

Sleeping

File size: 7,197 Bytes

15a08d2

"""

Utility functions for query classification and response generation

"""
import re
from typing import Tuple


class QueryClassifier:
    """Classify queries to determine if retrieval is needed"""

    # Simple greetings/acknowledgments (no retrieval needed)
    SIMPLE_PATTERNS = [
        r"\b(hi|hello|hey|greetings|good morning|good evening|good afternoon)\b",
        r"\b(thank you|thanks|thx|appreciate it)\b",
        r"\b(bye|goodbye|see you|take care)\b",
        r"\b(ok|okay|got it|understood|alright|sure)\b",
        r"\b(yes|yeah|yep|no|nope)\b",
    ]

    # Medical keywords (definitely needs retrieval)
    MEDICAL_KEYWORDS = [
        "symptom",
        "treatment",
        "disease",
        "diagnosis",
        "medicine",
        "medication",
        "cure",
        "pain",
        "fever",
        "infection",
        "doctor",
        "hospital",
        "prescription",
        "side effect",
        "dosage",
        "therapy",
        "vaccine",
        "surgery",
        "condition",
        "blood",
        "pressure",
        "diabetes",
        "cancer",
        "heart",
        "lung",
        "kidney",
        "test",
        "scan",
        "mri",
        "x-ray",
        "injury",
        "allergy",
        "chronic",
        "acute",
        "disorder",
        "illness",
        "sick",
        "health",
    ]

    @classmethod
    def needs_retrieval(cls, query: str) -> Tuple[bool, str]:
        """

        Determine if query needs document retrieval



        Args:

            query: User's input message



        Returns:

            Tuple[bool, str]: (needs_retrieval, reason)

        """
        query_lower = query.lower().strip()
        word_count = len(query_lower.split())

        # Rule 1: Very short queries with simple patterns (no retrieval)
        if word_count <= 3:
            for pattern in cls.SIMPLE_PATTERNS:
                if re.search(pattern, query_lower):
                    return False, "simple_greeting"

        # Rule 2: Contains medical keywords (needs retrieval)
        for keyword in cls.MEDICAL_KEYWORDS:
            if keyword in query_lower:
                return True, "medical_keyword_detected"

        # Rule 3: Question words in longer queries (likely needs retrieval)
        question_words = [
            "what",
            "how",
            "why",
            "when",
            "where",
            "which",
            "who",
            "can",
            "should",
            "is",
            "are",
            "does",
            "do",
            "could",
            "would",
            "will",
        ]
        if word_count >= 3 and any(q in query_lower.split()[:3] for q in question_words):
            return True, "question_detected"

        # Rule 4: Single word queries (context-dependent, default to no retrieval)
        if word_count == 1:
            return False, "single_word"

        # Default: If uncertain and query is substantial, use retrieval
        if word_count >= 4:
            return True, "substantial_query"

        return False, "default_no_retrieval"

    @classmethod
    def get_simple_response(cls, query: str) -> str:
        """

        Generate appropriate response for non-retrieval queries



        Args:

            query: User's input message



        Returns:

            str: Appropriate response without retrieval

        """
        query_lower = query.lower().strip()

        # Greetings
        if re.search(cls.SIMPLE_PATTERNS[0], query_lower):
            return (
                "Hello! I'm your medical assistant. I can help answer questions about "
                "symptoms, treatments, medications, and general health information. "
                "How can I assist you today?"
            )

        # Thanks
        if re.search(cls.SIMPLE_PATTERNS[1], query_lower):
            return (
                "You're very welcome! If you have any other health-related questions, "
                "feel free to ask. I'm here to help!"
            )

        # Goodbye
        if re.search(cls.SIMPLE_PATTERNS[2], query_lower):
            return (
                "Goodbye! Take care of your health. Feel free to return anytime you "
                "have questions. Stay well!"
            )

        # Acknowledgments
        if re.search(cls.SIMPLE_PATTERNS[3], query_lower):
            return (
                "Is there anything else you'd like to know about your health or medical concerns?"
            )

        # Yes/No
        if re.search(cls.SIMPLE_PATTERNS[4], query_lower):
            return (
                "Could you please provide more details about your question? "
                "I'm here to help with any health-related information you need."
            )

        # Default
        return (
            "I'm here to help with medical and health-related questions. "
            "Could you please elaborate on what you'd like to know?"
        )


class StreamingHandler:
    """Handle streaming responses from LangChain"""

    @staticmethod
    async def stream_rag_response(rag_chain, input_data: dict):
        """

        Stream tokens from RAG chain

        

        Args:

            rag_chain: The retrieval chain to stream from

            input_data: Dict with 'input' and 'chat_history' keys

            

        Yields:

            str: JSON formatted chunks with token data

        """
        import json
        
        try:
            # Stream the response
            full_answer = ""
            async for chunk in rag_chain.astream(input_data):
                # Extract answer tokens from the chunk
                if "answer" in chunk:
                    token = chunk["answer"]
                    full_answer += token
                    # Send token as JSON
                    yield f"data: {json.dumps({'token': token, 'done': False})}\n\n"
            
            # Send completion signal
            yield f"data: {json.dumps({'token': '', 'done': True, 'full_answer': full_answer})}\n\n"
            
        except Exception as e:
            error_msg = f"Streaming error: {str(e)}"
            yield f"data: {json.dumps({'error': error_msg, 'done': True})}\n\n"

    @staticmethod
    async def stream_simple_response(response: str):
        """

        Stream a simple non-retrieval response character by character

        

        Args:

            response: The complete response text

            

        Yields:

            str: JSON formatted chunks with token data

        """
        import json
        import asyncio
        
        # Stream character by character with slight delay for smooth effect
        for char in response:
            yield f"data: {json.dumps({'token': char, 'done': False})}\n\n"
            await asyncio.sleep(0.01)  # Small delay for smooth streaming
        
        # Send completion signal
        yield f"data: {json.dumps({'token': '', 'done': True, 'full_answer': response})}\n\n"