Spaces:
Sleeping
Sleeping
| from sumy.parsers.plaintext import PlaintextParser | |
| from sumy.parsers.html import HtmlParser | |
| from sumy.nlp.tokenizers import Tokenizer | |
| from sumy.nlp.stemmers import Stemmer | |
| from sumy.utils import get_stop_words | |
| import gradio as gr | |
| import nltk | |
| nltk.download('punkt_tab') | |
| def summarize(method, language, sentence_count, input_type, input_): | |
| if method== 'LSA': | |
| from sumy.summarizers.lsa import LsaSummarizer as Summarizer | |
| if method=='text-rank': | |
| from sumy.summarizers.text_rank import TextRankSummarizer as Summarizer | |
| if method=='lex-rank': | |
| from sumy.summarizers.lex_rank import LexRankSummarizer as Summarizer | |
| if method=='edmundson': | |
| from sumy.summarizers.edmundson import EdmundsonSummarizer as Summarizer | |
| if method=='luhn': | |
| from sumy.summarizers.luhn import LuhnSummarizer as Summarizer | |
| if method=='kl-sum': | |
| from sumy.summarizers.kl import KLSummarizer as Summarizer | |
| if method=='random': | |
| from sumy.summarizers.random import RandomSummarizer as Summarizer | |
| if method=='reduction': | |
| from sumy.summarizers.reduction import ReductionSummarizer as Summarizer | |
| if input_type=="URL": | |
| parser = HtmlParser.from_url(input_, Tokenizer(language)) | |
| if input_type=="text": | |
| parser = PlaintextParser.from_string(input_, Tokenizer(language)) | |
| stemmer = Stemmer(language) | |
| summarizer = Summarizer(stemmer) | |
| stop_words = get_stop_words(language) | |
| if method=='edmundson': | |
| summarizer.null_words = stop_words | |
| summarizer.bonus_words = parser.significant_words | |
| summarizer.stigma_words = parser.stigma_words | |
| else: | |
| summarizer.stop_words = stop_words | |
| summary_sentences = summarizer(parser.document, sentence_count) | |
| summary = ' '.join([str(sentence) for sentence in summary_sentences]) | |
| return summary | |
| title = "sumy library space for automatic text summarization" | |
| description = """ | |
| This is a space for [sumy](https://github.com/miso-belica/sumy), an automatic text summarization library. | |
| The summary can be extracted either from an HTML page or plain text. You can find a list of available summarization | |
| methods [here](https://github.com/miso-belica/sumy/blob/main/docs/summarizators.md). | |
| """ | |
| methods = ["LSA", "luhn", "edmundson", "text-rank", "lex-rank", "random", "reduction", "kl-sum"] | |
| supported_languages = ["english", "french", "arabic", "chinese", "czech", "german", "italian", "hebrew", | |
| "japanese", "portuguese", "slovak", "spanish", "ukrainian", "greek"] | |
| iface = gr.Interface( | |
| summarize, | |
| [ | |
| gr.Dropdown(methods), | |
| gr.Dropdown(supported_languages), | |
| gr.Number(value=5), | |
| gr.Radio(choices=["URL", "text"], value="URL"), | |
| gr.Textbox("https://en.wikipedia.org/wiki/Automatic_summarization"), | |
| ], | |
| "text", | |
| title=title, | |
| description=description, | |
| examples=[ | |
| ["luhn", 'english', 2, "URL", "https://en.wikipedia.org/wiki/Automatic_summarization"], | |
| ["LSA", 'english', 1, "text", "Automatic text summarization is a computational process that condenses a larger text into a shorter version while retaining key information. It helps extract the most important or relevant details from the original content. There are two main approaches: extractive summarization, which selects key sentences from the text, and abstractive summarization, which generates new sentences to convey the same meaning. This technique is widely used in applications like news aggregation, document summarization, and AI-powered assistants. By reducing information overload, automatic summarization enhances readability and efficiency in processing large volumes of text."] | |
| ], | |
| ) | |
| iface.launch() |