abdwahdia commited on
Commit
4456d86
·
verified ·
1 Parent(s): fe7996a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ nltk.download('punkt')
3
+ nltk.download('stopwords')
4
+
5
+ # Fonction summarize
6
+ def summarizer_FS(Text):
7
+ # Tokeniser article_text en phrase
8
+ sentences_list = nltk.sent_tokenize(Text)
9
+ # Stopwords
10
+ stopwords = nltk.corpus.stopwords.words('english')
11
+ # Dictionnaire de fréquences des mots
12
+ word_frequencies = {}
13
+ # calculer les fréquences des mots
14
+ for word in nltk.word_tokenize(Text):
15
+ if word not in stopwords:
16
+ if word not in word_frequencies.keys():
17
+ word_frequencies[word] = 1
18
+ else:
19
+ word_frequencies[word] += 1
20
+ # Fréquence maximale de word_frequencies
21
+ freq_maximum =max(word_frequencies.values())
22
+ # Calculer la fréquence pondérée de chaque mot
23
+ for word in word_frequencies.keys():
24
+ word_frequencies[word] = word_frequencies[word]/freq_maximum
25
+ # Liste des scores de chaque phrase
26
+ sentence_scores = {}
27
+ # Calculer le score de chaque phrase
28
+ for sent in sentences_list:
29
+ for word in nltk.word_tokenize(sent.lower()):
30
+ if word in word_frequencies.keys():
31
+ # if len(sent.split(' ')) < 30:
32
+ if sent not in sentence_scores.keys():
33
+ sentence_scores[sent]= word_frequencies[word]
34
+ else:
35
+ sentence_scores[sent] +=word_frequencies[word]
36
+ # Ordonner les phrases par pondération et recupérer les 10 premières phrases
37
+ summary_sentences = sorted(sentence_scores, key = sentence_scores.get, reverse = True)[:5]
38
+ # regrouper ensemble les phrases qui ont les poids les plus élévés
39
+ summary = ' '.join(summary_sentences)
40
+
41
+ return summary
42
+
43
+ # Importer pipeline
44
+ from transformers import pipeline
45
+ # Modèle de summarization
46
+ summarizer = pipeline(task="summarization",
47
+ model="facebook/bart-large-cnn")
48
+
49
+ # Définir une fonction summarize_t
50
+ def summarizer_t(input):
51
+ output = summarizer(input)
52
+ return output[0]['summary_text']
53
+
54
+
55
+ # Déployer
56
+ import gradio as gr
57
+ import os
58
+ # Création de blocks
59
+ demo = gr.Blocks()
60
+
61
+ summarizer1 = gr.Interface(fn=summarizer_FS,
62
+ inputs=[gr.Textbox(label="Text to summarize", lines=6)],
63
+ outputs=[gr.Textbox(label="Result", lines=3)],
64
+ title="Text summarization with NLTK",
65
+ description="Summarize any text from scracth under the hood!"
66
+ )
67
+ summarizer2 = gr.Interface(fn=summarizer_t,
68
+ inputs=[gr.Textbox(label="Text to summarize", lines=6)],
69
+ outputs=[gr.Textbox(label="Result", lines=3)],
70
+ title="Text summarization with bart-large-cnn ",
71
+ description="Summarize any text using facebook/bart-large-cnn model under the hood!"
72
+ )
73
+
74
+ with demo:
75
+ gr.TabbedInterface(
76
+ [summarizer1,
77
+ summarizer2],
78
+ ["Summarize from scrath",
79
+ "Summarize using bart"],
80
+ )
81
+
82
+ demo.launch()