Spaces:

Phoenix21
/

DailyWellnessMVPchatbot

Sleeping

App Files Files Community

Phoenix21 commited on Dec 20, 2024

Commit

f5296e8

verified ·

1 Parent(s): 5659335

overly restricted error

Browse files

Files changed (1) hide show

app.py +11 -66

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import logging
 import re
-from langchain_community.vectorstores import Chroma  # Updated import
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_groq import ChatGroq
@@ -61,67 +61,22 @@ def load_documents(file_paths):
             logger.error(f"Error processing file {file_path}: {e}")
     return docs
-def ensure_complete_sentences(text):
-    sentences = re.findall(r'[^.!?]*[.!?]', text)
-    if sentences:
-        return ' '.join(s.strip() for s in sentences)
-    return text
-# --- Added: Handling "Not Feasible" Keywords and Gibberish Inputs ---
 def is_valid_input(text):
-    """
-    Validate the user's input question.
-    Returns a tuple (is_valid, message).
-    """
     if not text or text.strip() == "":
         return False, "Input cannot be empty. Please provide a meaningful question."
-    if not re.search('[A-Za-z]', text):
-        return False, "Input must contain alphabetic characters."
     if len(text.strip()) < 5:
         return False, "Input is too short. Please provide a more detailed question."
-    # Define not feasible keywords
-    not_feasible_keywords = [
-        "illegal", "harmful", "dangerous", "unethical", "inappropriate",
-        "forbidden", "restricted", "banned", "prohibited", "secret"
-    ]
-    # Check for not feasible keywords (case-insensitive)
-    pattern = re.compile(r'\b(' + '|'.join(not_feasible_keywords) + r')\b', re.IGNORECASE)
-    if pattern.search(text):
-        return False, "Your question contains restricted or inappropriate content. Please modify your query."
-    # --- Added: Gibberish Detection ---
-    # Simple heuristic: Check the ratio of alphabetic characters to total characters
-    total_chars = len(text)
-    alpha_chars = len(re.findall(r'[A-Za-z]', text))
-    ratio = alpha_chars / total_chars if total_chars > 0 else 0
-    if ratio < 0.6:
-        return False, "Your input appears to be gibberish or nonsensical. Please enter a clear and meaningful question."
-    # Additionally, check for a minimum number of recognizable words
     words = re.findall(r'\b\w+\b', text)
-    recognized_words = [word for word in words if word.lower() in recognized_words_set]
-    if len(recognized_words) < max(3, len(words) * 0.4):
-        return False, "Your input contains too many unrecognizable words. Please enter a clear and meaningful question."
-    return True, "Valid input."
-# Predefined set of common English words for basic gibberish detection
-# In a production environment, consider using a more comprehensive dictionary or language model
-recognized_words_set = set([
-    'the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have', 'I',
-    'it', 'for', 'not', 'on', 'with', 'he', 'as', 'you', 'do', 'at',
-    'this', 'but', 'his', 'by', 'from', 'they', 'we', 'say', 'her',
-    'she', 'or', 'an', 'will', 'my', 'one', 'all', 'would', 'there',
-    'their', 'what', 'so', 'up', 'out', 'if', 'about', 'who', 'get',
-    'which', 'go', 'me'
-    # Add more words as needed
-])
 def initialize_llm(model, temperature, max_tokens):
     prompt_allocation = int(max_tokens * 0.2)
@@ -152,20 +107,13 @@ def create_rag_pipeline(file_paths, model, temperature, max_tokens):
         embedding=embedding_model,
         persist_directory="/tmp/chroma_db"
     )
-    # vectorstore.persist()  # Deprecated in Chroma 0.4.x
     retriever = vectorstore.as_retriever()
-    # --- Improved Prompt Template ---
     custom_prompt_template = PromptTemplate(
         input_variables=["context", "question"],
         template="""
 You are an AI assistant specialized in daily wellness. Provide a concise, thorough, and stand-alone answer to the user's question based on the given context. Include relevant examples or schedules where beneficial. **When listing steps or guidelines, format them as a numbered list with appropriate markdown formatting.** The final answer should be coherent, self-contained, and end with a complete sentence.
-If the question contains restricted or inappropriate content, respond with a polite message indicating that you cannot assist with that request.
-If the question appears to be gibberish or nonsensical, respond with a polite message requesting clarification or a more coherent question.
 Context:
 {context}
 Question:
@@ -196,10 +144,7 @@ def answer_question(model, temperature, max_tokens, question):
         return "The system is currently unavailable. Please try again later."
     try:
         answer = rag_chain.run(question)
-        # Remove or modify ensure_complete_sentences if necessary
-        # complete_answer = ensure_complete_sentences(answer)
-        complete_answer = answer
-        return complete_answer
     except Exception as e_inner:
         logger.error(f"Error: {e_inner}")
         return "An error occurred while processing your request."
@@ -215,7 +160,7 @@ interface = gr.Interface(
         gr.Slider(label="Max Tokens", minimum=200, maximum=2048, step=1, value=max_tokens),
         gr.Textbox(label="Question", placeholder="e.g., What is box breathing and how does it help reduce anxiety?")
     ],
-    outputs=gr.Markdown(label="Answer"),  # Updated output component
     title="Daily Wellness AI",
     description="Ask questions about daily wellness and receive a concise, complete answer.",
     examples=[

 import os
 import logging
 import re
+from langchain_community.vectorstores import Chroma
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_groq import ChatGroq
             logger.error(f"Error processing file {file_path}: {e}")
     return docs
+# Simplify input validation
 def is_valid_input(text):
+    """Validate the user's input question."""
     if not text or text.strip() == "":
         return False, "Input cannot be empty. Please provide a meaningful question."
     if len(text.strip()) < 5:
         return False, "Input is too short. Please provide a more detailed question."
+    # Gibberish detection: Ensure text contains valid words
     words = re.findall(r'\b\w+\b', text)
+    if len(words) < 3:  # Require at least three recognizable words
+        return False, "Input appears incomplete. Please provide a more meaningful question."
+    return True, "Valid input."
 def initialize_llm(model, temperature, max_tokens):
     prompt_allocation = int(max_tokens * 0.2)
         embedding=embedding_model,
         persist_directory="/tmp/chroma_db"
     )
     retriever = vectorstore.as_retriever()
     custom_prompt_template = PromptTemplate(
         input_variables=["context", "question"],
         template="""
 You are an AI assistant specialized in daily wellness. Provide a concise, thorough, and stand-alone answer to the user's question based on the given context. Include relevant examples or schedules where beneficial. **When listing steps or guidelines, format them as a numbered list with appropriate markdown formatting.** The final answer should be coherent, self-contained, and end with a complete sentence.
 Context:
 {context}
 Question:
         return "The system is currently unavailable. Please try again later."
     try:
         answer = rag_chain.run(question)
+        return answer.strip()
     except Exception as e_inner:
         logger.error(f"Error: {e_inner}")
         return "An error occurred while processing your request."
         gr.Slider(label="Max Tokens", minimum=200, maximum=2048, step=1, value=max_tokens),
         gr.Textbox(label="Question", placeholder="e.g., What is box breathing and how does it help reduce anxiety?")
     ],
+    outputs=gr.Markdown(label="Answer"),
     title="Daily Wellness AI",
     description="Ask questions about daily wellness and receive a concise, complete answer.",
     examples=[