Ai-tutor / rag.py
oluinioluwa814's picture
Update rag.py
ae4babb verified
raw
history blame contribute delete
888 Bytes
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import FAISS
from pypdf import PdfReader
from langchain_community.document_loaders import PyPDFLoader
embeddings = SentenceTransformerEmbeddings(
model_name="all-MiniLM-L6-v2"
)
vector_store = None
def load_pdf(file):
global vector_store
reader = PdfReader(file)
text = "".join([page.extract_text() for page in reader.pages])
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.create_documents([text])
vector_store = FAISS.from_documents(docs, embeddings)
def retrieve_context(query):
if vector_store:
docs = vector_store.similarity_search(query, k=3)
return "\n".join([d.page_content for d in docs])
return ""