| from sumy.parsers.plaintext import PlaintextParser | |
| from sumy.nlp.tokenizers import Tokenizer | |
| from sumy.summarizers.text_rank import TextRankSummarizer | |
| from sumy.nlp.stemmers import Stemmer | |
| from sumy.utils import get_stop_words | |
| import gradio as gr | |
| import nltk | |
| import time | |
| def textrank_summarizer(text_corpus): | |
| start_time = time.time() | |
| parser = PlaintextParser.from_string(text_corpus, Tokenizer("english")) | |
| stemmer = Stemmer("english") | |
| summarizer = TextRankSummarizer(stemmer) | |
| summarizer.stop_words = get_stop_words("english") | |
| sentences = summarizer(parser.document, 25) | |
| summary = "" | |
| for sentence in sentences: | |
| summary += str(sentence) + "" | |
| end_time = time.time() | |
| print(f"Time taken: {end_time - start_time:.2f} seconds") | |
| return summary | |
| def clear_everything(text_corpus, summary): | |
| return None, None | |
| theme = gr.themes.Soft( | |
| primary_hue="purple", | |
| secondary_hue="cyan", | |
| neutral_hue="slate", | |
| font=[ | |
| gr.themes.GoogleFont('Syne'), | |
| gr.themes.GoogleFont('Poppins'), | |
| gr.themes.GoogleFont('Poppins'), | |
| gr.themes.GoogleFont('Poppins') | |
| ], | |
| ) | |
| with gr.Blocks(theme=theme, title="TextRank Summarizer", fill_height=True) as app: | |
| gr.HTML( | |
| value =''' | |
| <h1 style="text-align: center;">TextRank Summarizer</h1> | |
| <p style="text-align: center;">This app uses a TextRank approach to summarize PDF documents based on CPU.</p> | |
| <p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p> | |
| ''') | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5) | |
| with gr.Row(): | |
| clear_btn = gr.Button(value="Clear", variant='stop') | |
| summarize_btn = gr.Button(value="Summarize", variant='primary') | |
| summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True) | |
| summarize_btn.click( | |
| textrank_summarizer, | |
| inputs=[text_corpus], | |
| outputs=[summary], | |
| concurrency_limit=25, | |
| scroll_to_output=True, | |
| show_api=True, | |
| api_name="textrank_summarizer", | |
| show_progress="full", | |
| ) | |
| clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False) | |
| nltk.download('punkt', quiet=True) | |
| nltk.download('punkt_tab', quiet=True) | |
| app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False) | |