File size: 2,883 Bytes
1f526ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
07e3d6d
1f526ca
 
 
 
 
 
 
 
 
 
 
 
ece589c
904134e
 
ece589c
 
 
1f526ca
 
 
 
 
 
ece589c
1f526ca
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import streamlit as st
from PyPDF2 import PdfReader
from gensim import corpora
from gensim.models import TfidfModel
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
import requests
from io import BytesIO
from transformers import BartTokenizer, BartForConditionalGeneration, pipeline

@st.cache(allow_output_mutation=True)
def load_model():
    # Load pre-trained model and tokenizer
    tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
    model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
    return model, tokenizer

def read_pdf_from_url(url):
    # Fetch PDF file from URL
    response = requests.get(url)
    pdf = PdfReader(BytesIO(response.content))
    text = ""
    for page in range(len(pdf.pages)):
        text += pdf.pages[page].extract_text()
    return text

def generate_summary(model, tokenizer, text):
    # Use the pre-trained model to generate a summary
    inputs = tokenizer([text], max_length=1024, return_tensors='pt')
    summary_ids = model.generate(inputs['input_ids'], num_beams=40, max_length=1024, early_stopping=False)
    summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]
    return summary

def main():
    st.title("Pdf Summarizer")
    st.write("""Exp.-Book--The Art of War
                    Summary--The art of war is governed by five constant factors, to be taken into account in one's deliberations. These are: (1) The Moral Law; (2) Heaven; (3) Earth; (4) The Commander; (5) Method and discipline. By means of these seven considerations I can forecast victory or defeat. All warfare is based on deception. When able to attack, we must seem unable. When we are near, we  must make the enemy believe we are far away. If your opponent is of choleric temper, seek to irritate him. If he is taking no rest, give him no rest. If his forces are united, separate them. Attack where he is unprepared, where you are not expected to win. These military devices, leading to victory, must not be divulged beforehand. It is by this attention to this point that I can foresee who is likely to win or lose a battle. The general who loses a battle makes many calculations but few beforehand. This version was generated automatically at www.suntzusaid.com/SunTzu/Art-of-War-By-Sun-Tzu.html.""")
    default_url = "https://huggingface.co/spaces/swamisharan/text-sum/resolve/main/The%20Art%20of%20War.pdf?download=true"
    button_clicked = st.button("The Art of War-Sun Tzu")
    pdf_url = st.text_input("Enter the URL of the PDF file:", value=default_url if button_clicked else "")
    if pdf_url:
        model, tokenizer = load_model()
        text = read_pdf_from_url(pdf_url)
        summary = generate_summary(model, tokenizer, text)
        st.write(f"Summary: {summary}")


if __name__ == '__main__':
    main()