swamisharan commited on
Commit
1f526ca
·
verified ·
1 Parent(s): 6a0efd8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from gensim import corpora
4
+ from gensim.models import TfidfModel
5
+ import nltk
6
+ nltk.download('punkt')
7
+ from nltk.tokenize import word_tokenize
8
+ import requests
9
+ from io import BytesIO
10
+ from transformers import BartTokenizer, BartForConditionalGeneration, pipeline
11
+
12
+ @st.cache(allow_output_mutation=True)
13
+ def load_model():
14
+ # Load pre-trained model and tokenizer
15
+ tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
16
+ model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
17
+ return model, tokenizer
18
+
19
+ def read_pdf_from_url(url):
20
+ # Fetch PDF file from URL
21
+ response = requests.get(url)
22
+ pdf = PdfReader(BytesIO(response.content))
23
+ text = "https://huggingface.co/spaces/swamisharan/text-sum/blob/6a0efd87210bbefad5f6640dad9968e96389fcd6/The%20Art%20of%20War.pdf"
24
+ for page in range(len(pdf.pages)):
25
+ text += pdf.pages[page].extract_text()
26
+ return text
27
+
28
+ def generate_summary(model, tokenizer, text):
29
+ # Use the pre-trained model to generate a summary
30
+ inputs = tokenizer([text], max_length=1024, return_tensors='pt')
31
+ summary_ids = model.generate(inputs['input_ids'], num_beams=40, max_length=1024, early_stopping=False)
32
+ summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]
33
+ return summary
34
+
35
+ def main():
36
+ st.title("PDF Summarizer")
37
+ pdf_url = st.text_input("Enter the URL of the PDF file:")
38
+ if pdf_url:
39
+ model, tokenizer = load_model()
40
+ text = read_pdf_from_url(pdf_url)
41
+ summary = generate_summary(model, tokenizer, text)
42
+ st.write(f"Summary: {summary}")
43
+
44
+ if __name__ == '__main__':
45
+ main()