| | import os |
| | from langchain_core.runnables.base import RunnableSequence |
| | from langchain_core.runnables.passthrough import RunnablePassthrough |
| | from langchain_core.output_parsers import StrOutputParser |
| | from langchain_core.prompts import ChatPromptTemplate |
| | from langchain_openai import ChatOpenAI |
| | from langchain_community.document_loaders import CSVLoader |
| | from langchain.text_splitter import RecursiveCharacterTextSplitter |
| | from langchain_openai import OpenAIEmbeddings |
| | from langchain.embeddings import CacheBackedEmbeddings |
| | from langchain.storage import LocalFileStore |
| | from langchain_community.vectorstores import FAISS |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | class RAGModel: |
| | def __init__(self, api_key): |
| | self.api_key = api_key |
| | |
| | |
| | |
| | csv_file = "imdb_datasets.csv" |
| | loader = CSVLoader(csv_file) |
| | csv_data = loader.load() |
| | |
| | |
| | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) |
| | chunked_documents = text_splitter.split_documents(csv_data) |
| | print(f"Number of documents: {len(chunked_documents)}") |
| | |
| | |
| | |
| | embedding_model = OpenAIEmbeddings(model="text-embedding-3-large", openai_api_key=self.api_key) |
| | print("Created embeddings") |
| | |
| | |
| | local_store = LocalFileStore("./cache/") |
| | cached_embedder = CacheBackedEmbeddings.from_bytes_store(embedding_model, local_store, namespace=embedding_model.model) |
| | print("Created cache backed embeddings") |
| | |
| | |
| |
|
| | self.vector_store = FAISS.from_documents(chunked_documents, cached_embedder) |
| | self.vector_store.save_local("faiss_index") |
| |
|
| | |
| | |
| | retriever = self.vector_store.as_retriever() |
| |
|
| | |
| | |
| | prompt_template = ChatPromptTemplate.from_messages( |
| | [ |
| | ("system", "You are an excellent movie critic who always includes great movie recommendations in your response. If the answer is not in the context let the user know "), |
| | ("human", "Using this context: {context}, please answer this question: {question}") |
| | ] |
| | ) |
| | |
| | chat_model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, api_key=api_key) |
| | parser = StrOutputParser() |
| |
|
| | self.runnable_chain = ( |
| | { |
| | "context": retriever, |
| | "question": RunnablePassthrough(), |
| | } |
| | | prompt_template |
| | | chat_model |
| | | parser |
| | ) |
| | |
| | def query(self, question) -> str: |
| | print(f"Querying the RAG instance with the question: {question}") |
| | output_chunks = self.runnable_chain.invoke(question) |
| | return ''.join(output_chunks) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |