Spaces:

raannakasturi
/

TextRankSummarizer

Running

App Files Files Community

raannakasturi commited on Dec 14, 2024

Commit

ba5f64c

verified ·

1 Parent(s): defd5de

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -71

app.py CHANGED Viewed

@@ -1,71 +1,68 @@
-from sumy.parsers.plaintext import PlaintextParser
-from sumy.nlp.tokenizers import Tokenizer
-from sumy.summarizers.text_rank import TextRankSummarizer
-from sumy.summarizers.luhn import LuhnSummarizer
-from sumy.summarizers.lex_rank import LexRankSummarizer
-from sumy.summarizers.lsa import LsaSummarizer
-from sumy.nlp.stemmers import Stemmer
-from sumy.utils import get_stop_words
-import gradio as gr
-import nltk
-import time
-def textrank_summarizer(text_corpus):
-    start_time = time.time()
-    parser = PlaintextParser.from_string(text_corpus, Tokenizer("english"))
-    stemmer = Stemmer("english")
-    summarizer = TextRankSummarizer(stemmer)
-    summarizer.stop_words = get_stop_words("english")
-    sentences = summarizer(parser.document, 15)
-    summary = ""
-    for sentence in sentences:
-        summary += str(sentence) + ""
-    end_time = time.time()
-    print(f"Time taken: {end_time - start_time:.2f} seconds")
-    return summary
-def clear_everything(text_corpus, summary):
-    return None, None
-theme = gr.themes.Soft(
-    primary_hue="purple",
-    secondary_hue="cyan",
-    neutral_hue="slate",
-    font=[
-        gr.themes.GoogleFont('Syne'),
-        gr.themes.GoogleFont('Poppins'),
-        gr.themes.GoogleFont('Poppins'),
-        gr.themes.GoogleFont('Poppins')
-    ],
-)
-with gr.Blocks(theme=theme, title="TextRank Summarizer", fill_height=True) as app:
-    gr.HTML(
-        value ='''
-        <h1 style="text-align: center;">TextRank Summarizer</h1>
-        <p style="text-align: center;">This app uses a TextRank approach to summarize PDF documents based on CPU.</p>
-        <p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
-        ''')
-    with gr.Row():
-        with gr.Column():
-            text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5)
-            with gr.Row():
-                clear_btn = gr.Button(value="Clear", variant='stop')
-                summarize_btn = gr.Button(value="Summarize", variant='primary')
-        summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
-    summarize_btn.click(
-        textrank_summarizer,
-        inputs=[text_corpus],
-        outputs=[summary],
-        concurrency_limit=25,
-        scroll_to_output=True,
-        show_api=True,
-        api_name="textrank_summarizer",
-        show_progress="full",
-    )
-    clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False)
-nltk.download('punkt', quiet=True)
-nltk.download('punkt_tab', quiet=True)
-app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False)

+from sumy.parsers.plaintext import PlaintextParser
+from sumy.nlp.tokenizers import Tokenizer
+from sumy.summarizers.text_rank import TextRankSummarizer
+from sumy.nlp.stemmers import Stemmer
+from sumy.utils import get_stop_words
+import gradio as gr
+import nltk
+import time
+def textrank_summarizer(text_corpus):
+    start_time = time.time()
+    parser = PlaintextParser.from_string(text_corpus, Tokenizer("english"))
+    stemmer = Stemmer("english")
+    summarizer = TextRankSummarizer(stemmer)
+    summarizer.stop_words = get_stop_words("english")
+    sentences = summarizer(parser.document, 10)
+    summary = ""
+    for sentence in sentences:
+        summary += str(sentence) + ""
+    end_time = time.time()
+    print(f"Time taken: {end_time - start_time:.2f} seconds")
+    return summary
+def clear_everything(text_corpus, summary):
+    return None, None
+theme = gr.themes.Soft(
+    primary_hue="purple",
+    secondary_hue="cyan",
+    neutral_hue="slate",
+    font=[
+        gr.themes.GoogleFont('Syne'),
+        gr.themes.GoogleFont('Poppins'),
+        gr.themes.GoogleFont('Poppins'),
+        gr.themes.GoogleFont('Poppins')
+    ],
+)
+with gr.Blocks(theme=theme, title="TextRank Summarizer", fill_height=True) as app:
+    gr.HTML(
+        value ='''
+        <h1 style="text-align: center;">TextRank Summarizer</h1>
+        <p style="text-align: center;">This app uses a TextRank approach to summarize PDF documents based on CPU.</p>
+        <p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
+        ''')
+    with gr.Row():
+        with gr.Column():
+            text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5)
+            with gr.Row():
+                clear_btn = gr.Button(value="Clear", variant='stop')
+                summarize_btn = gr.Button(value="Summarize", variant='primary')
+        summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
+    summarize_btn.click(
+        textrank_summarizer,
+        inputs=[text_corpus],
+        outputs=[summary],
+        concurrency_limit=25,
+        scroll_to_output=True,
+        show_api=True,
+        api_name="textrank_summarizer",
+        show_progress="full",
+    )
+    clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False)
+nltk.download('punkt', quiet=True)
+nltk.download('punkt_tab', quiet=True)
+app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False)