TI_RAG_Demo_L3.1

Sleeping

App Files Files Community

arjunanand13 commited on Jun 28, 2024

Commit

fc583d6

verified ·

1 Parent(s): 7b1bacb

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -29

app.py CHANGED Viewed

@@ -3,11 +3,11 @@ import json
 from torch import cuda, bfloat16
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig, StoppingCriteria, StoppingCriteriaList
 from langchain.llms import HuggingFacePipeline
 import gradio as gr
-import os
-import faiss
-import numpy as np
 from langchain.embeddings import HuggingFaceEmbeddings
 class Chatbot:
     def __init__(self):
@@ -37,19 +37,14 @@ class Chatbot:
         )
         self.llm = HuggingFacePipeline(pipeline=self.generate_text)
-        # Initialize the embedding model
-        self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cuda"})
         try:
-            # Initialize FAISS with GPU support
-            cpu_index = faiss.read_index('faiss_index_new_model3.index')
-            res = faiss.StandardGpuResources()  # Use this to allocate the GPU resources
-            co = faiss.GpuClonerOptions()
-            co.useFloat16 = True  # Enable float16 for better performance
-            self.vectorstore = faiss.index_cpu_to_gpu(res, 0, cpu_index, co)
             print("Loaded embedding successfully")
-        except Exception as e:
-            print("FAISS could not be imported or index could not be loaded.")
             raise e
         self.chain = ConversationalRetrievalChain.from_llm(self.llm, self.vectorstore.as_retriever(), return_source_documents=True)
@@ -63,10 +58,10 @@ class Chatbot:
             return False
     def format_prompt(self, query):
-        prompt = f"""
         You are a knowledgeable assistant with access to a comprehensive database.
         I need you to answer my question and provide related information in a specific format.
-        I have provided four relatable json files, choose the most suitable chunks for answering the query.
         Here's what I need:
         Include a final answer without additional comments, sign-offs, or extra phrases. Be direct and to the point.
@@ -86,20 +81,10 @@ class Chatbot:
     def qa_infer(self, query):
         content = ""
         formatted_prompt = self.format_prompt(query)
-        # Embed the query
-        query_embedding = self.embeddings.embed_query(formatted_prompt)
-        # Perform the search
-        distances, indices = self.vectorstore.search(np.array([query_embedding]), k=5)
-        # Retrieve the top documents
-        for idx in indices[0]:
-            doc = self.vectorstore.get_document(idx)
             content += "-" * 50 + "\n"
             content += doc.page_content + "\n"
-        result = self.chain({"question": formatted_prompt, "chat_history": self.chat_history})
         print(content)
         print("#" * 100)
         print(result['answer'])
@@ -158,4 +143,4 @@ class Chatbot:
 # Instantiate and launch the chatbot
 chatbot = Chatbot()
-chatbot.launch_interface()

 from torch import cuda, bfloat16
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig, StoppingCriteria, StoppingCriteriaList
 from langchain.llms import HuggingFacePipeline
+from langchain.vectorstores import FAISS
+from langchain.chains import ConversationalRetrievalChain
 import gradio as gr
 from langchain.embeddings import HuggingFaceEmbeddings
+import os
 class Chatbot:
     def __init__(self):
         )
         self.llm = HuggingFacePipeline(pipeline=self.generate_text)
         try:
+            # self.vectorstore = FAISS.load_local('faiss_index', HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cuda"}))
+            self.vectorstore = FAISS.load_local('faiss_index_new_model3.index', HuggingFaceEmbeddings(model_name="flax-sentence-embeddings/all_datasets_v3_MiniLM-L12", model_kwargs={"device": "cuda"}))
+            # cpu_index = faiss.read_index('faiss_index_new_model3.index')
+            # gpu_index = faiss.index_cpu_to_gpu(gpu_resource, 0, cpu_index)
             print("Loaded embedding successfully")
+        except ImportError as e:
+            print("FAISS could not be imported. Make sure FAISS is installed correctly.")
             raise e
         self.chain = ConversationalRetrievalChain.from_llm(self.llm, self.vectorstore.as_retriever(), return_source_documents=True)
             return False
     def format_prompt(self, query):
+        prompt=f"""
         You are a knowledgeable assistant with access to a comprehensive database.
         I need you to answer my question and provide related information in a specific format.
+        I have provided four relatable json files , choose the most suitable chunks for answering the query
         Here's what I need:
         Include a final answer without additional comments, sign-offs, or extra phrases. Be direct and to the point.
     def qa_infer(self, query):
         content = ""
         formatted_prompt = self.format_prompt(query)
+        result = self.chain({"question": formatted_prompt, "chat_history": self.chat_history})
+        for doc in result['source_documents']:
             content += "-" * 50 + "\n"
             content += doc.page_content + "\n"
         print(content)
         print("#" * 100)
         print(result['answer'])
 # Instantiate and launch the chatbot
 chatbot = Chatbot()
+chatbot.launch_interface()