Spaces:

medelharchaoui
/

SemanticSearch

Runtime error

Update app.py

9111075 almost 3 years ago

1.61 kB

	import streamlit as st
	import tempfile
	import torch
	from langchain.document_loaders import PyPDFLoader

	from langchain.text_splitter import SentenceTransformersTokenTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
	from langchain.vectorstores import FAISS


	model_name = "dangvantuan/sentence-camembert-large"
	model_kwargs = {'device': 'cpu'}
	encode_kwargs = {'convert_to_tensor': True}
	splitter = SentenceTransformersTokenTextSplitter(model_name=model_name,
	tokens_per_chunk=380,
	chunk_overlap=100)

	embeddings_fun = HuggingFaceEmbeddings(
	model_name=model_name,
	model_kwargs=model_kwargs,
	encode_kwargs=encode_kwargs
	)


	def read_pdf(file):
	with tempfile.NamedTemporaryFile(delete=False) as temp:
	temp.write(file.getvalue())
	loader = PyPDFLoader(temp.name)
	raw_documents = loader.load()
	return raw_documents

	st.title('PDF Text Extractor')

	uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

	query = st.text_input("Entrer une question")

	st.text('La reponse à votre question:')

	if uploaded_file is not None:
	raw_documents = read_pdf(uploaded_file)
	documents = splitter.split_documents(raw_documents)

	# embeddings_text = embeddings_fun.embed_documents(documents)

	faiss_db = FAISS.from_documents(documents, embeddings_fun)

	docs = faiss_db.similarity_search(query)

	st.write(docs[0].page_content)
	else:
	st.write("file not uploaded correctly")