Navya-Sree's picture
Create rag.py
cfdaece verified
raw
history blame
1.78 kB
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
# We'll assume you have a documentation text file. If not, we can use some sample Python docs.
# Let's create a sample if the file doesn't exist, or load it.
def load_documents():
# Load the documents from a file (or multiple files)
# For demonstration, we'll create a sample document if it doesn't exist.
doc_path = "python_docs.txt"
if not os.path.exists(doc_path):
# Create a sample documentation about Python functions
with open(doc_path, 'w') as f:
f.write("""
Functions in Python are defined using the def keyword.
For example: def hello_world(): print("Hello, world!")
Functions can take parameters and return values.
""")
loader = TextLoader(doc_path)
documents = loader.load()
return documents
def create_vector_store(documents):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents=texts, embedding=embeddings)
return vectorstore
def retrieve_relevant_docs(vectorstore, query, k=3):
"""
Retrieve relevant documents for the query.
"""
docs = vectorstore.similarity_search(query, k=k)
return "\n".join([doc.page_content for doc in docs])
# Initialize the vector store once (for performance)
documents = load_documents()
vectorstore = create_vector_store(documents)
def get_rag_context(query):
return retrieve_relevant_docs(vectorstore, query)