Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import tempfile | |
| import torch | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import SentenceTransformersTokenTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings | |
| from langchain.vectorstores import FAISS | |
| model_name = "dangvantuan/sentence-camembert-large" | |
| model_kwargs = {'device': 'cpu'} | |
| encode_kwargs = {'convert_to_tensor': True} | |
| splitter = SentenceTransformersTokenTextSplitter(model_name=model_name, | |
| tokens_per_chunk=380, | |
| chunk_overlap=100) | |
| embeddings_fun = HuggingFaceEmbeddings( | |
| model_name=model_name, | |
| model_kwargs=model_kwargs, | |
| encode_kwargs=encode_kwargs | |
| ) | |
| def read_pdf(file): | |
| with tempfile.NamedTemporaryFile(delete=False) as temp: | |
| temp.write(file.getvalue()) | |
| loader = PyPDFLoader(temp.name) | |
| raw_documents = loader.load() | |
| return raw_documents | |
| st.title('PDF Text Extractor') | |
| uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
| query = st.text_input("Entrer une question") | |
| st.text('La reponse à votre question:') | |
| if uploaded_file is not None: | |
| raw_documents = read_pdf(uploaded_file) | |
| documents = splitter.split_documents(raw_documents) | |
| # embeddings_text = embeddings_fun.embed_documents(documents) | |
| faiss_db = FAISS.from_documents(documents, embeddings_fun) | |
| docs = faiss_db.similarity_search(query) | |
| st.write(docs[0].page_content) | |
| else: | |
| st.write("file not uploaded correctly") | |