Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from PyPDF2 import PdfReader | |
| from gensim import corpora | |
| from gensim.models import TfidfModel | |
| import nltk | |
| nltk.download('punkt') | |
| from nltk.tokenize import word_tokenize | |
| import requests | |
| from io import BytesIO | |
| from transformers import BartTokenizer, BartForConditionalGeneration, pipeline | |
| def load_model(): | |
| # Load pre-trained model and tokenizer | |
| tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn') | |
| model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn') | |
| return model, tokenizer | |
| def read_pdf_from_url(url): | |
| # Fetch PDF file from URL | |
| response = requests.get(url) | |
| pdf = PdfReader(BytesIO(response.content)) | |
| text = "" | |
| for page in range(len(pdf.pages)): | |
| text += pdf.pages[page].extract_text() | |
| return text | |
| def generate_summary(model, tokenizer, text): | |
| # Use the pre-trained model to generate a summary | |
| inputs = tokenizer([text], max_length=1024, return_tensors='pt') | |
| summary_ids = model.generate(inputs['input_ids'], num_beams=40, max_length=1024, early_stopping=False) | |
| summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids] | |
| return summary | |
| def main(): | |
| st.title("Pdf Summarizer") | |
| st.write("""Exp.-Book--The Art of War | |
| Summary--The art of war is governed by five constant factors, to be taken into account in one's deliberations. These are: (1) The Moral Law; (2) Heaven; (3) Earth; (4) The Commander; (5) Method and discipline. By means of these seven considerations I can forecast victory or defeat. All warfare is based on deception. When able to attack, we must seem unable. When we are near, we must make the enemy believe we are far away. If your opponent is of choleric temper, seek to irritate him. If he is taking no rest, give him no rest. If his forces are united, separate them. Attack where he is unprepared, where you are not expected to win. These military devices, leading to victory, must not be divulged beforehand. It is by this attention to this point that I can foresee who is likely to win or lose a battle. The general who loses a battle makes many calculations but few beforehand. This version was generated automatically at www.suntzusaid.com/SunTzu/Art-of-War-By-Sun-Tzu.html.""") | |
| default_url = "https://huggingface.co/spaces/swamisharan/text-sum/resolve/main/The%20Art%20of%20War.pdf?download=true" | |
| button_clicked = st.button("The Art of War-Sun Tzu") | |
| pdf_url = st.text_input("Enter the URL of the PDF file:", value=default_url if button_clicked else "") | |
| if pdf_url: | |
| model, tokenizer = load_model() | |
| text = read_pdf_from_url(pdf_url) | |
| summary = generate_summary(model, tokenizer, text) | |
| st.write(f"Summary: {summary}") | |
| if __name__ == '__main__': | |
| main() | |