Spaces:
Sleeping
Sleeping
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import SentenceTransformerEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from pypdf import PdfReader | |
| from langchain_community.document_loaders import PyPDFLoader | |
| embeddings = SentenceTransformerEmbeddings( | |
| model_name="all-MiniLM-L6-v2" | |
| ) | |
| vector_store = None | |
| def load_pdf(file): | |
| global vector_store | |
| reader = PdfReader(file) | |
| text = "".join([page.extract_text() for page in reader.pages]) | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| docs = splitter.create_documents([text]) | |
| vector_store = FAISS.from_documents(docs, embeddings) | |
| def retrieve_context(query): | |
| if vector_store: | |
| docs = vector_store.similarity_search(query, k=3) | |
| return "\n".join([d.page_content for d in docs]) | |
| return "" |