import os from langchain_core.runnables.base import RunnableSequence from langchain_core.runnables.passthrough import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_openai import ChatOpenAI from langchain_community.document_loaders import CSVLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_openai import OpenAIEmbeddings from langchain.embeddings import CacheBackedEmbeddings from langchain.storage import LocalFileStore from langchain_community.vectorstores import FAISS # PIP Install the following packages: # pip install -q langchain # pip install -q langchain_openai # pip install -q faiss-cpu tiktoken # pip install -q -U langchain # pip install -U langchain-community class RAGModel: def __init__(self, api_key): self.api_key = api_key # for the RAG model. First we need to get the Dcouments processed to be used as context for the model. # Load DataSet csv_file = "imdb_datasets.csv" loader = CSVLoader(csv_file) csv_data = loader.load() # 1. Split the dataset text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) chunked_documents = text_splitter.split_documents(csv_data) print(f"Number of documents: {len(chunked_documents)}") #len(chunked_documents) # ensure we have actually split the data into chunks # 2. Create embeddings embedding_model = OpenAIEmbeddings(model="text-embedding-3-large", openai_api_key=self.api_key) print("Created embeddings") # Create a cache backed embeddings local_store = LocalFileStore("./cache/") cached_embedder = CacheBackedEmbeddings.from_bytes_store(embedding_model, local_store, namespace=embedding_model.model) print("Created cache backed embeddings") # 3. Save the documents in the vector store as embeddings self.vector_store = FAISS.from_documents(chunked_documents, cached_embedder) self.vector_store.save_local("faiss_index") # 3. Retrive the vector store # create a retriever retriever = self.vector_store.as_retriever() # 4. Create a prompt - LangChain # The prompt has place for the context from the raq and the question from the user prompt_template = ChatPromptTemplate.from_messages( [ ("system", "You are an excellent movie critic who always includes great movie recommendations in your response. If the answer is not in the context let the user know "), ("human", "Using this context: {context}, please answer this question: {question}") ] ) chat_model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, api_key=api_key) parser = StrOutputParser() self.runnable_chain = ( { "context": retriever, "question": RunnablePassthrough(), } | prompt_template | chat_model | parser ) def query(self, question) -> str: print(f"Querying the RAG instance with the question: {question}") output_chunks = self.runnable_chain.invoke(question) return ''.join(output_chunks) # def main(): # Create an instance of RAG class # api_key = os.getenv("OPENAI_API_KEY") # rag = RAGModel(api_key=api_key) # while True: # # Take input from command line # question = input("Enter your question (or type 'exit' to quit): ") # # Check if user wants to exit # if question.lower() == "exit": # break # # Query the RAG instance # answer = rag.query(question) # # Print the answer # print("Answer:", answer) # if __name__ == "__main__": # main()