st_1 / app.py
ali121300's picture
Update app.py
7ec96df verified
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
#########################################################
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
#################################################################
def wrap_text_preserve_newlines(text, width=300):
# Split the input text into lines based on newline characters
lines = text.split('\n')
# Wrap each line individually
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
# Join the wrapped lines back together using newline characters
wrapped_text = '\n'.join(wrapped_lines)
return wrapped_text
########################################################################
# Text Splitter
def text_split(ducuments):
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=20)
docs = text_splitter.split_documents(documents)
return docs
######################################################################
# Embeddings
def embeddings_f():
embeddings = HuggingFaceEmbeddings()
return embeddings
##########################################################################
#upload pdf
def pdf_upload():
pdf_docs=st.file_uploader("Upload your PDF Files",accept_multiple_files=True)
return pdf_docs
##############################################################################
def show(text):
text_show=st.write(text)
return text_show
def main():
st.set_page_config(page_title="Multiple pdf chat", page_icon=":books:")
if st.button("upload show"):
with st.spinner("showing"):
#get pdf text
raw_text=get_pdf_text(pdf_docs)
show(raw_text)
###############
with st.sidebar:
st.subheader("your document")
pdf_docs=pdf_upload()
if st.button("process"):
with st.spinner("processing"):
#get pdf text
raw_text=get_pdf_text(pdf_docs)
st.write(raw_text)
file=open("T1.txt","w")
file.write(raw_text)
file.close()
###############
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
main()