Spaces:

ali121300
/

st_1

Sleeping

App Files Files Community

st_1 / app.py

ali121300

Update app.py

7ec96df verified about 2 years ago

raw

history blame contribute delete

2.59 kB


	import streamlit as st
	from PyPDF2 import PdfReader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS


	#########################################################

	def get_pdf_text(pdf_docs):
	text = ""
	for pdf in pdf_docs:
	pdf_reader = PdfReader(pdf)
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text
	#################################################################
	def wrap_text_preserve_newlines(text, width=300):
	# Split the input text into lines based on newline characters
	lines = text.split('\n')
	# Wrap each line individually
	wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
	# Join the wrapped lines back together using newline characters
	wrapped_text = '\n'.join(wrapped_lines)
	return wrapped_text
	########################################################################
	# Text Splitter
	def text_split(ducuments):
	text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=20)
	docs = text_splitter.split_documents(documents)
	return docs
	######################################################################
	# Embeddings
	def embeddings_f():
	embeddings = HuggingFaceEmbeddings()
	return embeddings
	##########################################################################
	#upload pdf
	def pdf_upload():
	pdf_docs=st.file_uploader("Upload your PDF Files",accept_multiple_files=True)
	return pdf_docs
	##############################################################################
	def show(text):
	text_show=st.write(text)
	return text_show




	def main():
	st.set_page_config(page_title="Multiple pdf chat", page_icon=":books:")
	if st.button("upload show"):
	with st.spinner("showing"):
	#get pdf text
	raw_text=get_pdf_text(pdf_docs)
	show(raw_text)



	###############
	with st.sidebar:
	st.subheader("your document")
	pdf_docs=pdf_upload()
	if st.button("process"):
	with st.spinner("processing"):
	#get pdf text
	raw_text=get_pdf_text(pdf_docs)
	st.write(raw_text)
	file=open("T1.txt","w")
	file.write(raw_text)
	file.close()
	###############



	# Press the green button in the gutter to run the script.
	if __name__ == '__main__':
	main()