Spaces:
Sleeping
Sleeping
File size: 2,855 Bytes
4456d86 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | import nltk
nltk.download('punkt')
nltk.download('stopwords')
# Fonction summarize
def summarizer_FS(Text):
# Tokeniser article_text en phrase
sentences_list = nltk.sent_tokenize(Text)
# Stopwords
stopwords = nltk.corpus.stopwords.words('english')
# Dictionnaire de fréquences des mots
word_frequencies = {}
# calculer les fréquences des mots
for word in nltk.word_tokenize(Text):
if word not in stopwords:
if word not in word_frequencies.keys():
word_frequencies[word] = 1
else:
word_frequencies[word] += 1
# Fréquence maximale de word_frequencies
freq_maximum =max(word_frequencies.values())
# Calculer la fréquence pondérée de chaque mot
for word in word_frequencies.keys():
word_frequencies[word] = word_frequencies[word]/freq_maximum
# Liste des scores de chaque phrase
sentence_scores = {}
# Calculer le score de chaque phrase
for sent in sentences_list:
for word in nltk.word_tokenize(sent.lower()):
if word in word_frequencies.keys():
# if len(sent.split(' ')) < 30:
if sent not in sentence_scores.keys():
sentence_scores[sent]= word_frequencies[word]
else:
sentence_scores[sent] +=word_frequencies[word]
# Ordonner les phrases par pondération et recupérer les 10 premières phrases
summary_sentences = sorted(sentence_scores, key = sentence_scores.get, reverse = True)[:5]
# regrouper ensemble les phrases qui ont les poids les plus élévés
summary = ' '.join(summary_sentences)
return summary
# Importer pipeline
from transformers import pipeline
# Modèle de summarization
summarizer = pipeline(task="summarization",
model="facebook/bart-large-cnn")
# Définir une fonction summarize_t
def summarizer_t(input):
output = summarizer(input)
return output[0]['summary_text']
# Déployer
import gradio as gr
import os
# Création de blocks
demo = gr.Blocks()
summarizer1 = gr.Interface(fn=summarizer_FS,
inputs=[gr.Textbox(label="Text to summarize", lines=6)],
outputs=[gr.Textbox(label="Result", lines=3)],
title="Text summarization with NLTK",
description="Summarize any text from scracth under the hood!"
)
summarizer2 = gr.Interface(fn=summarizer_t,
inputs=[gr.Textbox(label="Text to summarize", lines=6)],
outputs=[gr.Textbox(label="Result", lines=3)],
title="Text summarization with bart-large-cnn ",
description="Summarize any text using facebook/bart-large-cnn model under the hood!"
)
with demo:
gr.TabbedInterface(
[summarizer1,
summarizer2],
["Summarize from scrath",
"Summarize using bart"],
)
demo.launch() |