Spaces:
Build error
Build error
| from pypdf import PdfReader | |
| import streamlit as st | |
| def get_pdf_text(pdf_docs): | |
| text = "" | |
| for pdf in pdf_docs: | |
| pdf_reader = PdfReader(pdf) | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| raw_text="" | |
| with st.sidebar: | |
| st.title("Menu:") | |
| pdf_docs = st.file_uploader( | |
| "Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True | |
| ) | |
| if st.button("Submit & Process"): | |
| with st.spinner("Processing..."): | |
| raw_text = get_pdf_text(pdf_docs) | |
| # st.write(raw_text) | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| # Load the pre-trained tokenizer and model | |
| tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn") | |
| # Tokenize the text | |
| inputs = tokenizer(raw_text, return_tensors="pt", max_length=1024, truncation=True) | |
| # Generate the summary | |
| summary_ids = model.generate(inputs["input_ids"], num_beams=4, min_length=30, max_length=200, early_stopping=True) | |
| # Decode the summary | |
| summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
| st.write("\n\nSummary:\n", summary) | |
| st.write("\n\n\nOriginal text:\n", raw_text) | |