text-sum / app.py
swamisharan's picture
Update app.py
904134e verified
import streamlit as st
from PyPDF2 import PdfReader
from gensim import corpora
from gensim.models import TfidfModel
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
import requests
from io import BytesIO
from transformers import BartTokenizer, BartForConditionalGeneration, pipeline
@st.cache(allow_output_mutation=True)
def load_model():
# Load pre-trained model and tokenizer
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
return model, tokenizer
def read_pdf_from_url(url):
# Fetch PDF file from URL
response = requests.get(url)
pdf = PdfReader(BytesIO(response.content))
text = ""
for page in range(len(pdf.pages)):
text += pdf.pages[page].extract_text()
return text
def generate_summary(model, tokenizer, text):
# Use the pre-trained model to generate a summary
inputs = tokenizer([text], max_length=1024, return_tensors='pt')
summary_ids = model.generate(inputs['input_ids'], num_beams=40, max_length=1024, early_stopping=False)
summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]
return summary
def main():
st.title("Pdf Summarizer")
st.write("""Exp.-Book--The Art of War
Summary--The art of war is governed by five constant factors, to be taken into account in one's deliberations. These are: (1) The Moral Law; (2) Heaven; (3) Earth; (4) The Commander; (5) Method and discipline. By means of these seven considerations I can forecast victory or defeat. All warfare is based on deception. When able to attack, we must seem unable. When we are near, we must make the enemy believe we are far away. If your opponent is of choleric temper, seek to irritate him. If he is taking no rest, give him no rest. If his forces are united, separate them. Attack where he is unprepared, where you are not expected to win. These military devices, leading to victory, must not be divulged beforehand. It is by this attention to this point that I can foresee who is likely to win or lose a battle. The general who loses a battle makes many calculations but few beforehand. This version was generated automatically at www.suntzusaid.com/SunTzu/Art-of-War-By-Sun-Tzu.html.""")
default_url = "https://huggingface.co/spaces/swamisharan/text-sum/resolve/main/The%20Art%20of%20War.pdf?download=true"
button_clicked = st.button("The Art of War-Sun Tzu")
pdf_url = st.text_input("Enter the URL of the PDF file:", value=default_url if button_clicked else "")
if pdf_url:
model, tokenizer = load_model()
text = read_pdf_from_url(pdf_url)
summary = generate_summary(model, tokenizer, text)
st.write(f"Summary: {summary}")
if __name__ == '__main__':
main()