chinhon commited on
Commit
73e689e
·
1 Parent(s): c4ba590

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+
4
+ from transformers import (
5
+ AutoTokenizer,
6
+ AutoModelForSeq2SeqLM,
7
+ )
8
+
9
+ #define function for text cleaning
10
+ def clean_text(text):
11
+ text = text.encode("ascii", errors="ignore").decode(
12
+ "ascii"
13
+ ) # remove non-ascii, Chinese characters
14
+ text = re.sub(r"http\S+", "", text)
15
+ text = re.sub(r"ADVERTISEMENT", " ", text)
16
+ text = re.sub(r"\n", " ", text)
17
+ text = re.sub(r"\n\n", " ", text)
18
+ text = re.sub(r"\t", " ", text)
19
+ text = text.strip(" ")
20
+ text = re.sub(
21
+ " +", " ", text
22
+ ).strip() # get rid of multiple spaces and replace with a single
23
+ return text
24
+
25
+ # define function for headlines generator 1-3
26
+ modchoice_1 = "chinhon/pegasus-large-commentaries_hd"
27
+
28
+ def commentaries_headline1(text):
29
+ input_text = clean_text(text)
30
+
31
+ tokenizer_1 = AutoTokenizer.from_pretrained(modchoice_1)
32
+
33
+ model_1 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_1)
34
+
35
+ with tokenizer_1.as_target_tokenizer():
36
+ batch = tokenizer_1(
37
+ input_text, truncation=True, padding="longest", return_tensors="pt"
38
+ )
39
+
40
+ translated = model_1.generate(**batch)
41
+
42
+ summary_1 = tokenizer_1.batch_decode(translated, skip_special_tokens=True)
43
+
44
+ return summary_1[0]
45
+
46
+
47
+ headline1 = gr.Interface(
48
+ fn=commentaries_headline1,
49
+ inputs=gr.inputs.Textbox(),
50
+ outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-large"),
51
+ )
52
+
53
+ modchoice_2 = "chinhon/pegasus-multi_news-commentaries_hdwriter"
54
+
55
+ def commentaries_headline2(text):
56
+ input_text = clean_text(text)
57
+
58
+ tokenizer_2 = AutoTokenizer.from_pretrained(modchoice_2)
59
+
60
+ model_2 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_2)
61
+
62
+ with tokenizer_2.as_target_tokenizer():
63
+ batch = tokenizer_2(
64
+ input_text, truncation=True, padding="longest", return_tensors="pt"
65
+ )
66
+
67
+ translated = model_2.generate(**batch)
68
+
69
+ summary_2 = tokenizer_2.batch_decode(translated, skip_special_tokens=True)
70
+
71
+ return summary_2[0]
72
+
73
+ headline2 = gr.Interface(
74
+ fn=commentaries_headline2,
75
+ inputs=gr.inputs.Textbox(),
76
+ outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-multi_news"),
77
+ )
78
+
79
+
80
+ modchoice_3 = "chinhon/pegasus-newsroom-commentaries_hdwriter"
81
+
82
+ def commentaries_headline3(text):
83
+ input_text = clean_text(text)
84
+
85
+ tokenizer_3 = AutoTokenizer.from_pretrained(modchoice_3)
86
+
87
+ model_3 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_3)
88
+
89
+ with tokenizer_3.as_target_tokenizer():
90
+ batch = tokenizer_3(
91
+ input_text, truncation=True, padding="longest", return_tensors="pt"
92
+ )
93
+
94
+ translated = model_3.generate(**batch)
95
+
96
+ summary_3 = tokenizer_3.batch_decode(
97
+ translated, skip_special_tokens=True, max_length=100
98
+ )
99
+
100
+ return summary_3[0]
101
+
102
+
103
+ headline3 = gr.Interface(
104
+ fn=commentaries_headline3,
105
+ inputs=gr.inputs.Textbox(),
106
+ outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-newsroom"),
107
+ )
108
+
109
+ #define Gradio interface for 3 parallel apps
110
+ gradio_ui = gr.Parallel(
111
+ headline1,
112
+ headline2,
113
+ headline3,
114
+ title="Commentaries Headlines Generator",
115
+ inputs=gr.inputs.Textbox(
116
+ lines=20,
117
+ label="Paste parts of your commentary here, and choose from 3 suggested headlines",
118
+ ),
119
+ theme="huggingface",
120
+ )
121
+
122
+ gradio_ui.launch(enable_queue=True)