Spaces:
Sleeping
Sleeping
| import nltk | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| # Fonction summarize | |
| def summarizer_FS(Text): | |
| # Tokeniser article_text en phrase | |
| sentences_list = nltk.sent_tokenize(Text) | |
| # Stopwords | |
| stopwords = nltk.corpus.stopwords.words('english') | |
| # Dictionnaire de fréquences des mots | |
| word_frequencies = {} | |
| # calculer les fréquences des mots | |
| for word in nltk.word_tokenize(Text): | |
| if word not in stopwords: | |
| if word not in word_frequencies.keys(): | |
| word_frequencies[word] = 1 | |
| else: | |
| word_frequencies[word] += 1 | |
| # Fréquence maximale de word_frequencies | |
| freq_maximum =max(word_frequencies.values()) | |
| # Calculer la fréquence pondérée de chaque mot | |
| for word in word_frequencies.keys(): | |
| word_frequencies[word] = word_frequencies[word]/freq_maximum | |
| # Liste des scores de chaque phrase | |
| sentence_scores = {} | |
| # Calculer le score de chaque phrase | |
| for sent in sentences_list: | |
| for word in nltk.word_tokenize(sent.lower()): | |
| if word in word_frequencies.keys(): | |
| # if len(sent.split(' ')) < 30: | |
| if sent not in sentence_scores.keys(): | |
| sentence_scores[sent]= word_frequencies[word] | |
| else: | |
| sentence_scores[sent] +=word_frequencies[word] | |
| # Ordonner les phrases par pondération et recupérer les 10 premières phrases | |
| summary_sentences = sorted(sentence_scores, key = sentence_scores.get, reverse = True)[:5] | |
| # regrouper ensemble les phrases qui ont les poids les plus élévés | |
| summary = ' '.join(summary_sentences) | |
| return summary | |
| # Importer pipeline | |
| from transformers import pipeline | |
| # Modèle de summarization | |
| summarizer = pipeline(task="summarization", | |
| model="facebook/bart-large-cnn") | |
| # Définir une fonction summarize_t | |
| def summarizer_t(input): | |
| output = summarizer(input) | |
| return output[0]['summary_text'] | |
| # Déployer | |
| import gradio as gr | |
| import os | |
| # Création de blocks | |
| demo = gr.Blocks() | |
| summarizer1 = gr.Interface(fn=summarizer_FS, | |
| inputs=[gr.Textbox(label="Text to summarize", lines=6)], | |
| outputs=[gr.Textbox(label="Result", lines=3)], | |
| title="Text summarization with NLTK", | |
| description="Summarize any text from scracth under the hood!" | |
| ) | |
| summarizer2 = gr.Interface(fn=summarizer_t, | |
| inputs=[gr.Textbox(label="Text to summarize", lines=6)], | |
| outputs=[gr.Textbox(label="Result", lines=3)], | |
| title="Text summarization with bart-large-cnn ", | |
| description="Summarize any text using facebook/bart-large-cnn model under the hood!" | |
| ) | |
| with demo: | |
| gr.TabbedInterface( | |
| [summarizer1, | |
| summarizer2], | |
| ["Summarize from scrath", | |
| "Summarize using bart"], | |
| ) | |
| demo.launch() |