Spaces:

achraf2203
/

RAG-Chatbot

Sleeping

App Files Files Community

mohamedachraf commited on Aug 3, 2025

Commit

3e32b0f

1 Parent(s): eda06e0

Add application file

Browse files

Files changed (1) hide show

app.py +74 -17

app.py CHANGED Viewed

@@ -29,23 +29,20 @@ import tempfile
 # Prompt template
-template = """Instruction:
-You are an AI assistant for answering questions about the provided context.
-You are given the following extracted parts of a long document and a question. Provide a detailed answer.
-If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
-=======
-{context}
-=======
 Question: {question}
-Output:\n"""
 # Multi-query generation prompt
-multi_query_template = """You are an AI language model assistant. Your task is to generate 3
-different versions of the given user question to retrieve relevant documents from a vector
-database. By generating multiple perspectives on the user question, your goal is to help
-the user overcome some of the limitations of the distance-based similarity search.
-Provide these alternative questions separated by newlines.
-Original question: {question}"""
 QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
 MULTI_QUERY_PROMPT = PromptTemplate(template=multi_query_template, input_variables=["question"])
@@ -54,6 +51,9 @@ MULTI_QUERY_PROMPT = PromptTemplate(template=multi_query_template, input_variabl
 model_id = "microsoft/phi-2"
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_id, torch_dtype=torch.float32, trust_remote_code=True
 )
@@ -66,6 +66,38 @@ embeddings = HuggingFaceEmbeddings(
 )
 # Returns a faiss vector store retriever given a txt or pdf file
 def prepare_vector_store_retriever(filename):
     # Load data based on file extension
@@ -208,6 +240,10 @@ def generate(question, answer, text_file, max_new_tokens, use_multi_query, store
             max_new_tokens=max_new_tokens,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
             streamer=streamer,
         )
@@ -245,7 +281,23 @@ def generate(question, answer, text_file, max_new_tokens, use_multi_query, store
         try:
             for token in streamer:
                 response += token
-                yield response.strip()
         except Exception as e:
             yield f"Error during streaming: {str(e)}"
             return
@@ -259,8 +311,13 @@ def generate(question, answer, text_file, max_new_tokens, use_multi_query, store
             return
         # Store Q&A pair if requested and response is valid
-        final_response = response.strip()
-        if store_qa and final_response and "Error" not in final_response and len(final_response) > 0:
             try:
                 store_qa_pair(question, final_response, vectorstore)
             except Exception as e:

 # Prompt template
+template = """Context: {context}
 Question: {question}
+Answer: Based on the provided context, """
 # Multi-query generation prompt
+multi_query_template = """Generate 3 different ways to ask this question:
+Original: {question}
+Alternative 1:
+Alternative 2:
+Alternative 3:"""
 QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
 MULTI_QUERY_PROMPT = PromptTemplate(template=multi_query_template, input_variables=["question"])
 model_id = "microsoft/phi-2"
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
 model = AutoModelForCausalLM.from_pretrained(
     model_id, torch_dtype=torch.float32, trust_remote_code=True
 )
 )
+def clean_response(text):
+    """Clean up the generated response"""
+    # Remove excessive whitespace and newlines
+    text = ' '.join(text.split())
+    # Remove repetitive patterns
+    words = text.split()
+    cleaned_words = []
+    for word in words:
+        # Skip if the same word appears too many times consecutively
+        if len(cleaned_words) >= 3 and all(w == word for w in cleaned_words[-3:]):
+            continue
+        cleaned_words.append(word)
+    cleaned_text = ' '.join(cleaned_words)
+    # Truncate at natural stopping points
+    sentences = cleaned_text.split('.')
+    if len(sentences) > 1:
+        # Keep complete sentences
+        good_sentences = []
+        for sentence in sentences[:-1]:  # Exclude last potentially incomplete sentence
+            if len(sentence.strip()) > 5:  # Avoid very short fragments
+                good_sentences.append(sentence.strip())
+        if good_sentences:
+            return '. '.join(good_sentences) + '.'
+    return cleaned_text[:500]  # Fallback: truncate to reasonable length
 # Returns a faiss vector store retriever given a txt or pdf file
 def prepare_vector_store_retriever(filename):
     # Load data based on file extension
             max_new_tokens=max_new_tokens,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.1,
             streamer=streamer,
         )
         try:
             for token in streamer:
                 response += token
+                # Clean up the response - stop at natural points
+                cleaned_response = response.strip()
+                # Stop if we hit repetitive patterns
+                words = cleaned_response.split()
+                if len(words) > 10:
+                    # Check for repetitive patterns
+                    last_words = words[-5:]
+                    if len(set(last_words)) <= 2:  # Too much repetition
+                        break
+                # Stop at sentence endings if we have enough content
+                if len(cleaned_response) > 50 and cleaned_response.endswith(('.', '!', '?')):
+                    yield cleaned_response
+                    break
+                yield cleaned_response
         except Exception as e:
             yield f"Error during streaming: {str(e)}"
             return
             return
         # Store Q&A pair if requested and response is valid
+        final_response = clean_response(response.strip())
+        # Yield the final cleaned response
+        if final_response != response.strip():
+            yield final_response
+        if store_qa and final_response and "Error" not in final_response and len(final_response) > 10:
             try:
                 store_qa_pair(question, final_response, vectorstore)
             except Exception as e: