|
|
| import streamlit as st |
| from PyPDF2 import PdfReader |
| from langchain.text_splitter import CharacterTextSplitter |
| from langchain.embeddings import HuggingFaceEmbeddings |
| from langchain.vectorstores import FAISS |
|
|
|
|
| |
| |
| def get_pdf_text(pdf_docs): |
| text = "" |
| for pdf in pdf_docs: |
| pdf_reader = PdfReader(pdf) |
| for page in pdf_reader.pages: |
| text += page.extract_text() |
| return text |
| |
| def wrap_text_preserve_newlines(text, width=300): |
| |
| lines = text.split('\n') |
| |
| wrapped_lines = [textwrap.fill(line, width=width) for line in lines] |
| |
| wrapped_text = '\n'.join(wrapped_lines) |
| return wrapped_text |
| |
| |
| def text_split(ducuments): |
| text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=20) |
| docs = text_splitter.split_documents(documents) |
| return docs |
| |
| |
| def embeddings_f(): |
| embeddings = HuggingFaceEmbeddings() |
| return embeddings |
| |
| |
| def pdf_upload(): |
| pdf_docs=st.file_uploader("Upload your PDF Files",accept_multiple_files=True) |
| return pdf_docs |
| |
| def show(text): |
| text_show=st.write(text) |
| return text_show |
| |
| |
|
|
|
|
| def main(): |
| st.set_page_config(page_title="Multiple pdf chat", page_icon=":books:") |
| if st.button("upload show"): |
| with st.spinner("showing"): |
| |
| raw_text=get_pdf_text(pdf_docs) |
| show(raw_text) |
| |
| |
|
|
| |
| with st.sidebar: |
| st.subheader("your document") |
| pdf_docs=pdf_upload() |
| if st.button("process"): |
| with st.spinner("processing"): |
| |
| raw_text=get_pdf_text(pdf_docs) |
| st.write(raw_text) |
| file=open("T1.txt","w") |
| file.write(raw_text) |
| file.close() |
| |
| |
| |
|
|
| |
| if __name__ == '__main__': |
| main() |
|
|
|
|