abdwahdia's picture
Create app.py
4456d86 verified
import nltk
nltk.download('punkt')
nltk.download('stopwords')
# Fonction summarize
def summarizer_FS(Text):
# Tokeniser article_text en phrase
sentences_list = nltk.sent_tokenize(Text)
# Stopwords
stopwords = nltk.corpus.stopwords.words('english')
# Dictionnaire de fréquences des mots
word_frequencies = {}
# calculer les fréquences des mots
for word in nltk.word_tokenize(Text):
if word not in stopwords:
if word not in word_frequencies.keys():
word_frequencies[word] = 1
else:
word_frequencies[word] += 1
# Fréquence maximale de word_frequencies
freq_maximum =max(word_frequencies.values())
# Calculer la fréquence pondérée de chaque mot
for word in word_frequencies.keys():
word_frequencies[word] = word_frequencies[word]/freq_maximum
# Liste des scores de chaque phrase
sentence_scores = {}
# Calculer le score de chaque phrase
for sent in sentences_list:
for word in nltk.word_tokenize(sent.lower()):
if word in word_frequencies.keys():
# if len(sent.split(' ')) < 30:
if sent not in sentence_scores.keys():
sentence_scores[sent]= word_frequencies[word]
else:
sentence_scores[sent] +=word_frequencies[word]
# Ordonner les phrases par pondération et recupérer les 10 premières phrases
summary_sentences = sorted(sentence_scores, key = sentence_scores.get, reverse = True)[:5]
# regrouper ensemble les phrases qui ont les poids les plus élévés
summary = ' '.join(summary_sentences)
return summary
# Importer pipeline
from transformers import pipeline
# Modèle de summarization
summarizer = pipeline(task="summarization",
model="facebook/bart-large-cnn")
# Définir une fonction summarize_t
def summarizer_t(input):
output = summarizer(input)
return output[0]['summary_text']
# Déployer
import gradio as gr
import os
# Création de blocks
demo = gr.Blocks()
summarizer1 = gr.Interface(fn=summarizer_FS,
inputs=[gr.Textbox(label="Text to summarize", lines=6)],
outputs=[gr.Textbox(label="Result", lines=3)],
title="Text summarization with NLTK",
description="Summarize any text from scracth under the hood!"
)
summarizer2 = gr.Interface(fn=summarizer_t,
inputs=[gr.Textbox(label="Text to summarize", lines=6)],
outputs=[gr.Textbox(label="Result", lines=3)],
title="Text summarization with bart-large-cnn ",
description="Summarize any text using facebook/bart-large-cnn model under the hood!"
)
with demo:
gr.TabbedInterface(
[summarizer1,
summarizer2],
["Summarize from scrath",
"Summarize using bart"],
)
demo.launch()