Spaces:

nada013
/

chat-gpu

Paused

App Files Files Community

Nada commited on Jul 11, 2025

Commit

5eea25b

1 Parent(s): 910249b

yy

Browse files

Files changed (1) hide show

chatbot.py +167 -58

chatbot.py CHANGED Viewed

@@ -19,15 +19,14 @@ from peft import PeftModel, PeftConfig
 from sentence_transformers import SentenceTransformer
 # LangChain imports
-# Core LangChain components for building conversational AI
-from langchain.llms import HuggingFacePipeline  # Wrapper for HuggingFace models
-from langchain.chains import LLMChain  # Chain for LLM interactions
-from langchain.memory import ConversationBufferMemory  # Memory for conversation history
-from langchain.prompts import PromptTemplate  # Template for structured prompts
-from langchain.embeddings import HuggingFaceEmbeddings  # Text embeddings for similarity search
-from langchain.text_splitter import RecursiveCharacterTextSplitter  # Document chunking
-from langchain.document_loaders import TextLoader  # Load text documents
-from langchain.vectorstores import FAISS  # Vector database for similarity search
 # Import FlowManager
 from conversation_flow import FlowManager
@@ -214,7 +213,11 @@ class MentalHealthChatbot:
         peft_model_path: str = "nada013/mental-health-chatbot",
         therapy_guidelines_path: str = None,
         use_4bit: bool = True,
-        device: str = None
     ):
         # Set device (cuda if available, otherwise cpu)
         if device is None:
@@ -234,6 +237,13 @@ class MentalHealthChatbot:
         logger.info(f"Using device: {self.device}")
         # Initialize models
         self.peft_model_path = peft_model_path
@@ -264,24 +274,12 @@ class MentalHealthChatbot:
         self.flow_manager = FlowManager(self.llm)
         # Setup conversation memory with LangChain
-        # ConversationBufferMemory stores the conversation history in a buffer
-        # This allows the chatbot to maintain context across multiple interactions
-        # - return_messages=True: Returns messages as a list of message objects
-        # - input_key="input": Specifies which key to use for the input in the memory
         self.memory = ConversationBufferMemory(
             return_messages=True,
             input_key="input"
         )
         # Create conversation prompt template
-        # PromptTemplate defines the structure for generating responses
-        # It includes placeholders for dynamic content that gets filled during generation
-        # Input variables:
-        # - history: Previous conversation context from memory
-        # - input: Current user message
-        # - past_context: Relevant past conversations from vector search
-        # - emotion_context: Detected emotions and their context
-        # - guidelines: Relevant therapeutic guidelines from vector search
         self.prompt_template = PromptTemplate(
             input_variables=["history", "input", "past_context", "emotion_context", "guidelines"],
             template="""You are a supportive and empathetic mental health conversational AI. Your role is to provide therapeutic support while maintaining professional boundaries.
@@ -323,7 +321,6 @@ Response:"""
         )
         # Setup vector database for retrieving relevant past conversations
         if therapy_guidelines_path and os.path.exists(therapy_guidelines_path):
             self.setup_vector_db(therapy_guidelines_path)
         else:
@@ -502,6 +499,109 @@ Response:"""
             logger.error(f"Error detecting emotions: {e}")
             return {"neutral": 1.0}
     def retrieve_relevant_context(self, query: str, k: int = 3) -> str:
         # Retrieve relevant past conversations using vector similarity
         if not hasattr(self, 'vector_db'):
@@ -568,31 +668,25 @@ Response:"""
             guidelines=guidelines
         )
-        # Clean up the response to only include the actual message
-        response = response.split("Response:")[-1].strip()
-        response = response.split("---")[0].strip()
-        response = response.split("Note:")[0].strip()
-        # Remove any casual greetings like "Hey" or "Hi"
-        response = re.sub(r'^(Hey|Hi|Hello|Hi there|Hey there),\s*', '', response)
         # Ensure the response is unique and not repeating previous messages
         if len(conversation_history) > 0:
             last_responses = [msg["text"] for msg in conversation_history[-4:] if msg["role"] == "assistant"]
             if response in last_responses:
                 # Generate a new response with a different angle
-                response = self.conversation.predict(
                     input=f"{prompt} (Please provide a different perspective)",
                     past_context=past_context,
                     emotion_context=emotion_context,
                     guidelines=guidelines
                 )
-                response = response.split("Response:")[-1].strip()
-                response = re.sub(r'^(Hey|Hi|Hello|Hi there|Hey there),\s*', '', response)
-        return response.strip()
     def generate_session_summary(
         self,
@@ -838,6 +932,9 @@ Would you like to connect with a professional now, or would you prefer to keep t
             return crisis_response
         # Detect emotions
         emotions = self.detect_emotion(message)
         conversation.emotion_history.append(emotions)
@@ -854,34 +951,46 @@ Would you like to connect with a professional now, or would you prefer to keep t
                 "role": msg.role
             })
-        # Generate response
-        response_text = self.generate_response(message, emotions, conversation_history)
-        # Generate a follow-up question if the response is too short
-        if len(response_text.split()) < 20 and not response_text.endswith('?'):
-            follow_up_prompt = f"""
 Recent conversation:
 {chr(10).join([f"{msg['role']}: {msg['text']}" for msg in conversation_history[-3:]])}
 Now, write a single empathetic and open-ended question to encourage the user to share more.
 Respond with just the question, no explanation.
 """
-            follow_up = self.llm.invoke(follow_up_prompt).strip()
-            # Clean and extract only the actual question (first sentence ending with '?')
-            matches = re.findall(r'([^\n.?!]*\?)', follow_up)
-            if matches:
-                question = matches[0].strip()
-            else:
-                question = follow_up.strip().split('\n')[0]
-            # If the main response is very short, return just the question
-            if len(response_text.split()) < 5:
-                response_text = question
-            else:
-                response_text = f"{response_text}\n\n{question}"
-        # Final post-processing: remove any LLM commentary that may have leaked in
-        response_text = response_text.strip()
-        response_text = re.sub(r"(Your response|This response).*", "", response_text, flags=re.IGNORECASE).strip()
         #  assistant response -> conversation history
         assistant_message = Message(

 from sentence_transformers import SentenceTransformer
 # LangChain imports
+from langchain.llms import HuggingFacePipeline
+from langchain.chains import LLMChain
+from langchain.memory import ConversationBufferMemory
+from langchain.prompts import PromptTemplate
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import TextLoader
+from langchain.vectorstores import FAISS
 # Import FlowManager
 from conversation_flow import FlowManager
         peft_model_path: str = "nada013/mental-health-chatbot",
         therapy_guidelines_path: str = None,
         use_4bit: bool = True,
+        device: str = None,
+        max_response_length: int = 500,  # Maximum characters in response
+        max_response_words: int = 100,   # Maximum words in response
+        min_response_words: int = 10,    # Minimum words in response
+        max_consecutive_responses: int = 3  # Max consecutive responses without user input
     ):
         # Set device (cuda if available, otherwise cpu)
         if device is None:
         logger.info(f"Using device: {self.device}")
+        # Set response limits
+        self.max_response_length = max_response_length
+        self.max_response_words = max_response_words
+        self.min_response_words = min_response_words
+        self.max_consecutive_responses = max_consecutive_responses
+        self.consecutive_response_count = 0  # Track consecutive responses
         # Initialize models
         self.peft_model_path = peft_model_path
         self.flow_manager = FlowManager(self.llm)
         # Setup conversation memory with LangChain
         self.memory = ConversationBufferMemory(
             return_messages=True,
             input_key="input"
         )
         # Create conversation prompt template
         self.prompt_template = PromptTemplate(
             input_variables=["history", "input", "past_context", "emotion_context", "guidelines"],
             template="""You are a supportive and empathetic mental health conversational AI. Your role is to provide therapeutic support while maintaining professional boundaries.
         )
         # Setup vector database for retrieving relevant past conversations
         if therapy_guidelines_path and os.path.exists(therapy_guidelines_path):
             self.setup_vector_db(therapy_guidelines_path)
         else:
             logger.error(f"Error detecting emotions: {e}")
             return {"neutral": 1.0}
+    def _validate_and_limit_response(self, response: str, user_message: str) -> str:
+        """
+        Validate and limit response length and content.
+        Returns a properly limited response.
+        """
+        if not response or not response.strip():
+            return "I understand. Could you tell me more about that?"
+        # Clean the response
+        response = response.strip()
+        # Remove any LLM commentary or instructions
+        response = re.sub(r"(Your response|This response|Response:|Note:).*", "", response, flags=re.IGNORECASE).strip()
+        response = re.sub(r"---.*", "", response).strip()
+        # Remove casual greetings
+        response = re.sub(r'^(Hey|Hi|Hello|Hi there|Hey there),\s*', '', response)
+        # Count words and characters
+        words = response.split()
+        word_count = len(words)
+        char_count = len(response)
+        # Check if response is too short
+        if word_count < self.min_response_words:
+            logger.info(f"Response too short ({word_count} words), adding follow-up question")
+            if not response.endswith('?'):
+                response += " Could you tell me more about that?"
+        # Check if response is too long
+        if char_count > self.max_response_length or word_count > self.max_response_words:
+            logger.info(f"Response too long ({char_count} chars, {word_count} words), truncating")
+            # Try to find a good breaking point
+            if word_count > self.max_response_words:
+                # Truncate to max words
+                truncated_words = words[:self.max_response_words]
+                response = ' '.join(truncated_words)
+                # Try to end at a sentence
+                last_period = response.rfind('.')
+                last_question = response.rfind('?')
+                last_exclamation = response.rfind('!')
+                end_point = max(last_period, last_question, last_exclamation)
+                if end_point > len(response) * 0.7:  # If we can end at a sentence within 70% of the limit
+                    response = response[:end_point + 1]
+                else:
+                    # Add ellipsis if we can't end naturally
+                    response = response.rstrip() + "..."
+            elif char_count > self.max_response_length:
+                # Truncate to max characters
+                response = response[:self.max_response_length]
+                # Try to end at a word boundary
+                last_space = response.rfind(' ')
+                if last_space > len(response) * 0.8:  # If we can end at a word within 80% of the limit
+                    response = response[:last_space]
+                else:
+                    # Add ellipsis
+                    response = response.rstrip() + "..."
+        # Check for repetitive content
+        if self._is_repetitive(response, user_message):
+            logger.info("Response detected as repetitive, generating alternative")
+            return "I hear what you're saying. Could you help me understand this better?"
+        # Ensure response ends properly
+        if not response.endswith(('.', '!', '?')):
+            response = response.rstrip() + '.'
+        return response.strip()
+    def _is_repetitive(self, response: str, user_message: str) -> bool:
+        """
+        Check if response is repetitive or too similar to user message.
+        """
+        # Convert to lowercase for comparison
+        response_lower = response.lower()
+        user_lower = user_message.lower()
+        # Check if response contains too much of the user's message
+        user_words = set(user_lower.split())
+        response_words = set(response_lower.split())
+        if len(user_words) > 3:  # Only check if user message has enough words
+            common_words = user_words.intersection(response_words)
+            if len(common_words) / len(user_words) > 0.6:  # If more than 60% of user words are in response
+                return True
+        # Check for repetitive phrases
+        repetitive_phrases = [
+            "i understand", "i hear you", "that sounds", "i can see",
+            "thank you for sharing", "i appreciate", "that must be"
+        ]
+        phrase_count = sum(1 for phrase in repetitive_phrases if phrase in response_lower)
+        if phrase_count > 2:  # If more than 2 repetitive phrases
+            return True
+        return False
     def retrieve_relevant_context(self, query: str, k: int = 3) -> str:
         # Retrieve relevant past conversations using vector similarity
         if not hasattr(self, 'vector_db'):
             guidelines=guidelines
         )
+        # Validate and limit the response
+        response = self._validate_and_limit_response(response, prompt)
         # Ensure the response is unique and not repeating previous messages
         if len(conversation_history) > 0:
             last_responses = [msg["text"] for msg in conversation_history[-4:] if msg["role"] == "assistant"]
             if response in last_responses:
+                logger.info("Response detected as duplicate, generating alternative")
                 # Generate a new response with a different angle
+                alternative_response = self.conversation.predict(
                     input=f"{prompt} (Please provide a different perspective)",
                     past_context=past_context,
                     emotion_context=emotion_context,
                     guidelines=guidelines
                 )
+                alternative_response = self._validate_and_limit_response(alternative_response, prompt)
+                response = alternative_response
+        return response
     def generate_session_summary(
         self,
             return crisis_response
+        # Reset consecutive response counter when user sends a message
+        self.consecutive_response_count = 0
         # Detect emotions
         emotions = self.detect_emotion(message)
         conversation.emotion_history.append(emotions)
                 "role": msg.role
             })
+        # Check rate limiting for consecutive responses
+        if self.consecutive_response_count >= self.max_consecutive_responses:
+            logger.warning(f"Rate limit reached for user {user_id}, sending brief response")
+            response_text = "I'm here to listen. Take your time to share what's on your mind."
+            self.consecutive_response_count = 0  # Reset counter
+        else:
+            # Generate response
+            response_text = self.generate_response(message, emotions, conversation_history)
+            # Increment consecutive response counter
+            self.consecutive_response_count += 1
+            # Generate a follow-up question if the response is too short and we haven't hit limits
+            if (len(response_text.split()) < self.min_response_words and
+                not response_text.endswith('?') and
+                self.consecutive_response_count < self.max_consecutive_responses):
+                follow_up_prompt = f"""
 Recent conversation:
 {chr(10).join([f"{msg['role']}: {msg['text']}" for msg in conversation_history[-3:]])}
 Now, write a single empathetic and open-ended question to encourage the user to share more.
 Respond with just the question, no explanation.
 """
+                follow_up = self.llm.invoke(follow_up_prompt).strip()
+                # Clean and extract only the actual question (first sentence ending with '?')
+                matches = re.findall(r'([^\n.?!]*\?)', follow_up)
+                if matches:
+                    question = matches[0].strip()
+                else:
+                    question = follow_up.strip().split('\n')[0]
+                # Validate the follow-up question
+                question = self._validate_and_limit_response(question, message)
+                # If the main response is very short, return just the question
+                if len(response_text.split()) < 5:
+                    response_text = question
+                else:
+                    response_text = f"{response_text}\n\n{question}"
         #  assistant response -> conversation history
         assistant_message = Message(