Spaces:

achraf2203
/

RAG-Chatbot

Sleeping

App Files Files Community

mohamedachraf commited on Aug 3, 2025

Commit

8dc5c8f

1 Parent(s): d941be5

Add application file

Browse files

Files changed (2) hide show

app.py +155 -29
requirements.txt +4 -1

app.py CHANGED Viewed

@@ -5,10 +5,11 @@ nltk.download('punkt_tab')
 import gradio as gr
 from langchain.text_splitter import CharacterTextSplitter
-from langchain_community.document_loaders import UnstructuredFileLoader
 from langchain.vectorstores.faiss import FAISS
 from langchain.vectorstores.utils import DistanceStrategy
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
 from langchain.prompts.prompt import PromptTemplate
@@ -20,6 +21,8 @@ from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 from transformers import TextIteratorStreamer
 from threading import Thread
 # Prompt template
@@ -33,7 +36,16 @@ If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up
 Question: {question}
 Output:\n"""
 QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
 # Load Phi-2 model from hugging face hub
 model_id = "microsoft/phi-2"
@@ -51,10 +63,14 @@ embeddings = HuggingFaceEmbeddings(
 )
-# Returns a faiss vector store retriever given a txt file
 def prepare_vector_store_retriever(filename):
-    # Load data
-    loader = UnstructuredFileLoader(filename)
     raw_documents = loader.load()
     # Split the text
@@ -69,25 +85,104 @@ def prepare_vector_store_retriever(filename):
         documents, embeddings, distance_strategy=DistanceStrategy.DOT_PRODUCT
     )
-    return VectorStoreRetriever(vectorstore=vectorstore, search_kwargs={"k": 2})
-# Retrieveal QA chian
-def get_retrieval_qa_chain(text_file, hf_model):
     retriever = default_retriever
     if text_file != default_text_file:
-        retriever = prepare_vector_store_retriever(text_file)
-    chain = RetrievalQA.from_chain_type(
-        llm=hf_model,
-        retriever=retriever,
-        chain_type_kwargs={"prompt": QA_PROMPT},
-    )
-    return chain
 # Generates response using the question answering chain defined earlier
-def generate(question, answer, text_file, max_new_tokens):
     streamer = TextIteratorStreamer(
         tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0
     )
@@ -102,56 +197,87 @@ def generate(question, answer, text_file, max_new_tokens):
     )
     hf_model = HuggingFacePipeline(pipeline=phi2_pipeline)
-    qa_chain = get_retrieval_qa_chain(text_file, hf_model)
     query = f"{question}"
     if len(tokenizer.tokenize(query)) >= 512:
         query = "Repeat 'Your question is too long!'"
-    thread = Thread(target=qa_chain.invoke, kwargs={"input": {"query": query}})
     thread.start()
     response = ""
     for token in streamer:
         response += token
         yield response.strip()
-# replaces the retreiver in the question answering chain whenever a new file is uploaded
 def upload_file(file):
-    return file, file
 with gr.Blocks() as demo:
     gr.Markdown(
         """
   # Retrieval Augmented Generation with Phi-2: Question Answering demo
-  ### This demo uses the Phi-2 language model and Retrieval Augmented Generation (RAG). It allows you to upload a txt file and ask the model questions related to the content of that file.
   ### If you don't have one, there is a txt file already loaded, the new Oppenheimer movie's entire wikipedia page. The movie came out very recently in July, 2023, so the Phi-2 model is not aware of it.
   The context size of the Phi-2 model is 2048 tokens, so even this medium size wikipedia page (11.5k tokens) does not fit in the context window.
   Retrieval Augmented Generation (RAG) enables us to retrieve just the few small chunks of the document that are relevant to the our query and inject it into our prompt.
-  The model is then able to answer questions by incorporating knowledge from the newly provided document. RAG can be used with thousands of documents, but this demo is limited to just one txt file.
   """
     )
     default_text_file = "Oppenheimer-movie-wiki.txt"
-    default_retriever = prepare_vector_store_retriever(default_text_file)
     text_file = gr.State(default_text_file)
     gr.Markdown(
-        "## Upload a txt file or Use the Default 'Oppenheimer-movie-wiki.txt' that has already been loaded"
     )
     file_name = gr.Textbox(
-        label="Loaded text file", value=default_text_file, lines=1, interactive=False
     )
     upload_button = gr.UploadButton(
-        label="Click to upload a text file", file_types=["text"], file_count="single"
     )
     upload_button.upload(upload_file, upload_button, [file_name, text_file])
     gr.Markdown("## Enter your question")
     tokens_slider = gr.Slider(
         8,
@@ -172,7 +298,7 @@ with gr.Blocks() as demo:
         with gr.Column():
             clear = gr.ClearButton([ques, ans])
-    btn.click(fn=generate, inputs=[ques, ans, text_file, tokens_slider], outputs=[ans])
     examples = gr.Examples(
         examples=[
             "Who portrayed J. Robert Oppenheimer in the new Oppenheimer movie?",

 import gradio as gr
 from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.document_loaders import UnstructuredFileLoader, PyPDFLoader
 from langchain.vectorstores.faiss import FAISS
 from langchain.vectorstores.utils import DistanceStrategy
 from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.schema import Document
 from langchain.chains import RetrievalQA
 from langchain.prompts.prompt import PromptTemplate
 from transformers import TextIteratorStreamer
 from threading import Thread
+import os
+import tempfile
 # Prompt template
 Question: {question}
 Output:\n"""
+# Multi-query generation prompt
+multi_query_template = """You are an AI language model assistant. Your task is to generate 3
+different versions of the given user question to retrieve relevant documents from a vector
+database. By generating multiple perspectives on the user question, your goal is to help
+the user overcome some of the limitations of the distance-based similarity search.
+Provide these alternative questions separated by newlines.
+Original question: {question}"""
 QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
+MULTI_QUERY_PROMPT = PromptTemplate(template=multi_query_template, input_variables=["question"])
 # Load Phi-2 model from hugging face hub
 model_id = "microsoft/phi-2"
 )
+# Returns a faiss vector store retriever given a txt or pdf file
 def prepare_vector_store_retriever(filename):
+    # Load data based on file extension
+    if filename.lower().endswith('.pdf'):
+        loader = PyPDFLoader(filename)
+    else:
+        loader = UnstructuredFileLoader(filename)
     raw_documents = loader.load()
     # Split the text
         documents, embeddings, distance_strategy=DistanceStrategy.DOT_PRODUCT
     )
+    return VectorStoreRetriever(vectorstore=vectorstore, search_kwargs={"k": 2}), vectorstore
+# Generate multiple queries for better retrieval
+def generate_multiple_queries(question, hf_model):
+    """Generate multiple variations of the question for better retrieval"""
+    try:
+        result = hf_model.invoke(MULTI_QUERY_PROMPT.format(question=question))
+        queries = [q.strip() for q in result.split('\n') if q.strip()]
+        # Always include the original question
+        if question not in queries:
+            queries.insert(0, question)
+        return queries[:4]  # Limit to 4 queries max
+    except:
+        # Fallback to original question if generation fails
+        return [question]
+# Multi-query retrieval function
+def multi_query_retrieve(queries, retriever):
+    """Retrieve documents using multiple queries and combine results"""
+    all_docs = []
+    seen_content = set()
+    for query in queries:
+        try:
+            docs = retriever.get_relevant_documents(query)
+            for doc in docs:
+                if doc.page_content not in seen_content:
+                    all_docs.append(doc)
+                    seen_content.add(doc.page_content)
+        except:
+            continue
+    return all_docs[:6]  # Limit to top 6 unique documents
+# Store Q&A pairs in vector database
+def store_qa_pair(question, answer, vectorstore):
+    """Store the question-answer pair as a new document in the vector database"""
+    try:
+        qa_content = f"Question: {question}\nAnswer: {answer}"
+        qa_doc = Document(page_content=qa_content, metadata={"type": "qa_pair"})
+        # Add the Q&A pair to the existing vectorstore
+        vectorstore.add_documents([qa_doc])
+        return True
+    except Exception as e:
+        print(f"Error storing Q&A pair: {e}")
+        return False
+# Retrieval QA chain with multi-query support
+def get_retrieval_qa_chain(text_file, hf_model, use_multi_query=False):
     retriever = default_retriever
+    vectorstore = default_vectorstore
     if text_file != default_text_file:
+        retriever, vectorstore = prepare_vector_store_retriever(text_file)
+    if use_multi_query:
+        # Custom retrieval function for multi-query
+        class MultiQueryRetriever:
+            def __init__(self, retriever, vectorstore, hf_model):
+                self.retriever = retriever
+                self.vectorstore = vectorstore
+                self.hf_model = hf_model
+            def get_relevant_documents(self, query):
+                # Generate multiple queries
+                queries = generate_multiple_queries(query, self.hf_model)
+                # Retrieve documents using all queries
+                return multi_query_retrieve(queries, self.retriever)
+        multi_retriever = MultiQueryRetriever(retriever, vectorstore, hf_model)
+        # Custom chain that uses multi-query retrieval
+        class MultiQueryRetrievalQA:
+            def __init__(self, llm, retriever, prompt):
+                self.llm = llm
+                self.retriever = retriever
+                self.prompt = prompt
+            def invoke(self, input_dict):
+                query = input_dict["query"]
+                docs = self.retriever.get_relevant_documents(query)
+                context = "\n\n".join([doc.page_content for doc in docs])
+                prompt_text = self.prompt.format(context=context, question=query)
+                return self.llm.invoke(prompt_text)
+        return MultiQueryRetrievalQA(hf_model, multi_retriever, QA_PROMPT), vectorstore
+    else:
+        chain = RetrievalQA.from_chain_type(
+            llm=hf_model,
+            retriever=retriever,
+            chain_type_kwargs={"prompt": QA_PROMPT},
+        )
+        return chain, vectorstore
 # Generates response using the question answering chain defined earlier
+def generate(question, answer, text_file, max_new_tokens, use_multi_query, store_qa):
     streamer = TextIteratorStreamer(
         tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0
     )
     )
     hf_model = HuggingFacePipeline(pipeline=phi2_pipeline)
+    qa_chain, vectorstore = get_retrieval_qa_chain(text_file, hf_model, use_multi_query)
     query = f"{question}"
     if len(tokenizer.tokenize(query)) >= 512:
         query = "Repeat 'Your question is too long!'"
+    def run_chain():
+        result = qa_chain.invoke({"input": {"query": query}} if hasattr(qa_chain, 'retriever') else {"query": query})
+        return result
+    thread = Thread(target=run_chain)
     thread.start()
     response = ""
     for token in streamer:
         response += token
         yield response.strip()
+    # Store Q&A pair if requested
+    if store_qa and response.strip() and "Your question is too long!" not in response:
+        store_qa_pair(question, response.strip(), vectorstore)
+# replaces the retriever in the question answering chain whenever a new file is uploaded
 def upload_file(file):
+    if file is not None:
+        # Save uploaded file to temporary location
+        temp_path = os.path.join(tempfile.gettempdir(), file.name)
+        with open(temp_path, 'wb') as f:
+            f.write(file.read())
+        return file.name, temp_path
+    return None, None
 with gr.Blocks() as demo:
     gr.Markdown(
         """
   # Retrieval Augmented Generation with Phi-2: Question Answering demo
+  ### This demo uses the Phi-2 language model and Retrieval Augmented Generation (RAG). It allows you to upload a txt or PDF file and ask the model questions related to the content of that file.
+  ### Features:
+  - Support for both PDF and text files
+  - Multi-query RAG for improved retrieval
+  - Store Q&A pairs in vector database for future reference
   ### If you don't have one, there is a txt file already loaded, the new Oppenheimer movie's entire wikipedia page. The movie came out very recently in July, 2023, so the Phi-2 model is not aware of it.
   The context size of the Phi-2 model is 2048 tokens, so even this medium size wikipedia page (11.5k tokens) does not fit in the context window.
   Retrieval Augmented Generation (RAG) enables us to retrieve just the few small chunks of the document that are relevant to the our query and inject it into our prompt.
+  The model is then able to answer questions by incorporating knowledge from the newly provided document. RAG can be used with thousands of documents, but this demo is limited to just one file at a time.
   """
     )
     default_text_file = "Oppenheimer-movie-wiki.txt"
+    default_retriever, default_vectorstore = prepare_vector_store_retriever(default_text_file)
     text_file = gr.State(default_text_file)
     gr.Markdown(
+        "## Upload a txt or PDF file or Use the Default 'Oppenheimer-movie-wiki.txt' that has already been loaded"
     )
     file_name = gr.Textbox(
+        label="Loaded file", value=default_text_file, lines=1, interactive=False
     )
     upload_button = gr.UploadButton(
+        label="Click to upload a text or PDF file", file_types=[".txt", ".pdf"], file_count="single"
     )
     upload_button.upload(upload_file, upload_button, [file_name, text_file])
+    gr.Markdown("## RAG Settings")
+    with gr.Row():
+        use_multi_query = gr.Checkbox(
+            label="Use Multi-Query RAG",
+            value=False,
+            info="Generate multiple query variations for better retrieval"
+        )
+        store_qa = gr.Checkbox(
+            label="Store Q&A pairs",
+            value=True,
+            info="Add question-answer pairs to vector database"
+        )
     gr.Markdown("## Enter your question")
     tokens_slider = gr.Slider(
         8,
         with gr.Column():
             clear = gr.ClearButton([ques, ans])
+    btn.click(fn=generate, inputs=[ques, ans, text_file, tokens_slider, use_multi_query, store_qa], outputs=[ans])
     examples = gr.Examples(
         examples=[
             "Who portrayed J. Robert Oppenheimer in the new Oppenheimer movie?",

requirements.txt CHANGED Viewed

@@ -9,4 +9,7 @@ langchain-community==0.0.13
 unstructured==0.12.2
 huggingface_hub>=0.20.0
 gradio
-nltk

 unstructured==0.12.2
 huggingface_hub>=0.20.0
 gradio
+nltk
+pypdf2
+pdfplumber
+python-multipart