Spaces:

JumaRubea
/

chatbot

Sleeping

App Files Files Community

JumaRubea commited on Sep 15, 2025

Commit

e82b521

verified ·

1 Parent(s): 1258b31

Update rag_components.py

Browse files

Files changed (1) hide show

rag_components.py +159 -12

rag_components.py CHANGED Viewed

@@ -6,7 +6,11 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.document_loaders import TextLoader
 from langchain_huggingface import HuggingFacePipeline
 from langchain.chains import RetrievalQA
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 # Set cache directories for HuggingFace Spaces
 os.environ["HF_HOME"] = "/tmp/huggingface_cache"
@@ -18,6 +22,22 @@ os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/sentence_transformers_cache"
 for cache_dir in ["/tmp/huggingface_cache", "/tmp/transformers_cache", "/tmp/hf_hub_cache", "/tmp/sentence_transformers_cache"]:
     os.makedirs(cache_dir, exist_ok=True)
 def load_documents(file_path: str):
     """Loads documents from a specified file path."""
     loader = TextLoader(file_path)
@@ -57,8 +77,7 @@ def setup_vector_store(docs, embeddings, persist_directory="./chroma_db"):
     return db.as_retriever()
 def create_qa_chain(retriever, model_name="microsoft/DialoGPT-medium"):
-    """Creates the RetrievalQA chain with streaming capabilities.
-    Using a smaller, more reliable model for HuggingFace Spaces."""
     try:
         tokenizer = AutoTokenizer.from_pretrained(
             model_name,
@@ -75,32 +94,52 @@ def create_qa_chain(retriever, model_name="microsoft/DialoGPT-medium"):
             cache_dir="/tmp/transformers_cache",
             device_map="auto",
             trust_remote_code=True,
-            torch_dtype="auto"  # Let it choose the best dtype
         )
         pipe = pipeline(
             "text-generation",
             model=model,
             tokenizer=tokenizer,
-            max_new_tokens=256,  # Reduced for faster generation
             temperature=0.7,
             top_p=0.9,
-            pad_token_id=tokenizer.eos_token_id
         )
         llm = HuggingFacePipeline(pipeline=pipe)
         qa_chain = RetrievalQA.from_chain_type(
             llm=llm,
             retriever=retriever,
             chain_type="stuff",
-            return_source_documents=True
         )
         return qa_chain
     except Exception as e:
         print(f"Error loading model {model_name}: {e}")
-        # Try with an even smaller model as fallback
         try:
             print("Trying fallback model: distilgpt2")
             return create_qa_chain_fallback(retriever)
@@ -109,7 +148,7 @@ def create_qa_chain(retriever, model_name="microsoft/DialoGPT-medium"):
             raise e2
 def create_qa_chain_fallback(retriever):
-    """Fallback QA chain with a very small model."""
     tokenizer = AutoTokenizer.from_pretrained(
         "distilgpt2",
         cache_dir="/tmp/transformers_cache"
@@ -125,17 +164,125 @@ def create_qa_chain_fallback(retriever):
         "text-generation",
         model=model,
         tokenizer=tokenizer,
-        max_new_tokens=128,
         temperature=0.7,
-        pad_token_id=tokenizer.eos_token_id
     )
     llm = HuggingFacePipeline(pipeline=pipe)
     qa_chain = RetrievalQA.from_chain_type(
         llm=llm,
         retriever=retriever,
         chain_type="stuff",
-        return_source_documents=True
     )
     return qa_chain

 from langchain.document_loaders import TextLoader
 from langchain_huggingface import HuggingFacePipeline
 from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain.callbacks.base import BaseCallbackHandler
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TextStreamer
+import streamlit as st
+from typing import Any, Dict, List
 # Set cache directories for HuggingFace Spaces
 os.environ["HF_HOME"] = "/tmp/huggingface_cache"
 for cache_dir in ["/tmp/huggingface_cache", "/tmp/transformers_cache", "/tmp/hf_hub_cache", "/tmp/sentence_transformers_cache"]:
     os.makedirs(cache_dir, exist_ok=True)
+class StreamingCallbackHandler(BaseCallbackHandler):
+    """Callback handler for streaming responses."""
+    def __init__(self, placeholder):
+        self.placeholder = placeholder
+        self.text = ""
+    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
+        """Handle new token from LLM."""
+        self.text += token
+        self.placeholder.markdown(self.text + "▌")
+    def on_llm_end(self, response: Any, **kwargs: Any) -> None:
+        """Handle end of LLM response."""
+        self.placeholder.markdown(self.text)
 def load_documents(file_path: str):
     """Loads documents from a specified file path."""
     loader = TextLoader(file_path)
     return db.as_retriever()
 def create_qa_chain(retriever, model_name="microsoft/DialoGPT-medium"):
+    """Creates an enhanced QA chain with better prompting and streaming capabilities."""
     try:
         tokenizer = AutoTokenizer.from_pretrained(
             model_name,
             cache_dir="/tmp/transformers_cache",
             device_map="auto",
             trust_remote_code=True,
+            torch_dtype="auto"
         )
+        # Create pipeline with better parameters to reduce repetition
         pipe = pipeline(
             "text-generation",
             model=model,
             tokenizer=tokenizer,
+            max_new_tokens=150,
             temperature=0.7,
             top_p=0.9,
+            top_k=40,
+            repetition_penalty=1.2,  # Reduce repetition
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+            return_full_text=False  # Only return new tokens
         )
         llm = HuggingFacePipeline(pipeline=pipe)
+        # Enhanced prompt template for better QA responses
+        prompt_template = """You're Juma's Assistant. Use the following context to answer the user's question. If you cannot answer based on the context, say so clearly.
+Context: {context}
+Question: {question}
+Answer: Let me help you with that based on the information provided."""
+        prompt = PromptTemplate(
+            template=prompt_template,
+            input_variables=["context", "question"]
+        )
         qa_chain = RetrievalQA.from_chain_type(
             llm=llm,
             retriever=retriever,
             chain_type="stuff",
+            return_source_documents=True,
+            chain_type_kwargs={"prompt": prompt}
         )
         return qa_chain
     except Exception as e:
         print(f"Error loading model {model_name}: {e}")
         try:
             print("Trying fallback model: distilgpt2")
             return create_qa_chain_fallback(retriever)
             raise e2
 def create_qa_chain_fallback(retriever):
+    """Fallback QA chain with a very small model and better parameters."""
     tokenizer = AutoTokenizer.from_pretrained(
         "distilgpt2",
         cache_dir="/tmp/transformers_cache"
         "text-generation",
         model=model,
         tokenizer=tokenizer,
+        max_new_tokens=100,
         temperature=0.7,
+        top_p=0.9,
+        top_k=40,
+        repetition_penalty=1.3,
+        do_sample=True,
+        pad_token_id=tokenizer.eos_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        return_full_text=False
     )
     llm = HuggingFacePipeline(pipeline=pipe)
+    # Same enhanced prompt
+    prompt_template = """You're Juma's Assistant. Use the following context to answer the user's question. If you cannot answer based on the context, say so clearly.
+Context: {context}
+Question: {question}
+Answer: Let me help you with that based on the information provided."""
+    prompt = PromptTemplate(
+        template=prompt_template,
+        input_variables=["context", "question"]
+    )
     qa_chain = RetrievalQA.from_chain_type(
         llm=llm,
         retriever=retriever,
         chain_type="stuff",
+        return_source_documents=True,
+        chain_type_kwargs={"prompt": prompt}
     )
     return qa_chain
+def create_streaming_response(qa_chain, question: str, placeholder):
+    """Create a streaming response using the QA chain."""
+    try:
+        # Get the response first
+        result = qa_chain.invoke({"query": question})
+        # Extract just the answer part
+        answer = result.get("result", "")
+        # Clean up the response
+        answer = clean_response(answer)
+        # Simulate streaming by displaying character by character
+        import time
+        displayed_text = ""
+        for i, char in enumerate(answer):
+            displayed_text += char
+            placeholder.markdown(displayed_text + "▌")
+            # Add small delay for streaming effect
+            if i % 3 == 0:  # Every 3 characters
+                time.sleep(0.02)  # 20ms delay
+        # Final display without cursor
+        placeholder.markdown(displayed_text)
+        return displayed_text
+    except Exception as e:
+        placeholder.error(f"Error generating response: {e}")
+        return "I apologize, but I encountered an error while processing your question."
+def clean_response(text: str) -> str:
+    """Clean up the response to remove repetition and improve quality."""
+    if not text:
+        return "I couldn't find relevant information to answer your question."
+    # Remove the prompt part if it's included in the response
+    if "Answer: Let me help you with that based on the information provided." in text:
+        text = text.split("Answer: Let me help you with that based on the information provided.", 1)[-1].strip()
+    # Remove common prefixes that models add
+    prefixes_to_remove = [
+        "Based on the context provided,",
+        "According to the document,",
+        "The document states that",
+        "From the information given,",
+        "Let me help you with that based on the information provided."
+    ]
+    for prefix in prefixes_to_remove:
+        if text.startswith(prefix):
+            text = text[len(prefix):].strip()
+    # Split into sentences and remove repetitive ones
+    sentences = text.split('.')
+    cleaned_sentences = []
+    for sentence in sentences:
+        sentence = sentence.strip()
+        if sentence and len(sentence) > 10:  # Filter out very short fragments
+            # Check if this sentence is too similar to recent ones
+            is_repetitive = False
+            for recent in cleaned_sentences[-2:]:
+                if len(set(sentence.split()) & set(recent.split())) > len(sentence.split()) * 0.7:
+                    is_repetitive = True
+                    break
+            if not is_repetitive:
+                cleaned_sentences.append(sentence)
+    # Join sentences back
+    result = '. '.join(cleaned_sentences)
+    # Ensure it ends properly
+    if result and not result.endswith('.'):
+        result += '.'
+    # Limit length and ensure quality
+    if len(result) > 500:
+        # Cut at sentence boundary
+        sentences = result[:500].split('.')
+        result = '. '.join(sentences[:-1]) + '.'
+    return result if result.strip() else "I couldn't generate a proper response. Please try rephrasing your question."