chinhon commited on
Commit
c4ba590
·
1 Parent(s): b184233

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -121
app.py DELETED
@@ -1,121 +0,0 @@
1
- import gradio as gr
2
- import re
3
-
4
- from gradio.mix import Parallel
5
- from transformers import (
6
- AutoTokenizer,
7
- AutoModelForSeq2SeqLM,
8
- )
9
-
10
- #define function for text cleaning
11
- def clean_text(text):
12
- text = text.encode("ascii", errors="ignore").decode(
13
- "ascii"
14
- ) # remove non-ascii, Chinese characters
15
- text = re.sub(r"http\S+", "", text)
16
- text = re.sub(r"ADVERTISEMENT", " ", text)
17
- text = re.sub(r"\n", " ", text)
18
- text = re.sub(r"\n\n", " ", text)
19
- text = re.sub(r"\t", " ", text)
20
- text = text.strip(" ")
21
- text = re.sub(
22
- " +", " ", text
23
- ).strip() # get rid of multiple spaces and replace with a single
24
- return text
25
-
26
- # define function for headlines generator 1-3
27
- modchoice_1 = "chinhon/pegasus-large-commentaries_hd"
28
-
29
- def commentaries_headline1(text):
30
- input_text = clean_text(text)
31
-
32
- tokenizer_1 = AutoTokenizer.from_pretrained(modchoice_1)
33
-
34
- model_1 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_1)
35
-
36
- with tokenizer_1.as_target_tokenizer():
37
- batch = tokenizer_1(
38
- input_text, truncation=True, padding="longest", return_tensors="pt"
39
- )
40
-
41
- translated = model_1.generate(**batch)
42
-
43
- summary_1 = tokenizer_1.batch_decode(translated, skip_special_tokens=True)
44
-
45
- return summary_1[0]
46
-
47
-
48
- headline1 = gr.Interface(
49
- fn=commentaries_headline1,
50
- inputs=gr.inputs.Textbox(),
51
- outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-large"),
52
- )
53
-
54
- modchoice_2 = "chinhon/pegasus-multi_news-commentaries_hdwriter"
55
-
56
- def commentaries_headline2(text):
57
- input_text = clean_text(text)
58
-
59
- tokenizer_2 = AutoTokenizer.from_pretrained(modchoice_2)
60
-
61
- model_2 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_2)
62
-
63
- with tokenizer_2.as_target_tokenizer():
64
- batch = tokenizer_2(
65
- input_text, truncation=True, padding="longest", return_tensors="pt"
66
- )
67
-
68
- translated = model_2.generate(**batch)
69
-
70
- summary_2 = tokenizer_2.batch_decode(translated, skip_special_tokens=True)
71
-
72
- return summary_2[0]
73
-
74
- headline2 = gr.Interface(
75
- fn=commentaries_headline2,
76
- inputs=gr.inputs.Textbox(),
77
- outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-multi_news"),
78
- )
79
-
80
-
81
- modchoice_3 = "chinhon/pegasus-newsroom-commentaries_hdwriter"
82
-
83
- def commentaries_headline3(text):
84
- input_text = clean_text(text)
85
-
86
- tokenizer_3 = AutoTokenizer.from_pretrained(modchoice_3)
87
-
88
- model_3 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_3)
89
-
90
- with tokenizer_3.as_target_tokenizer():
91
- batch = tokenizer_3(
92
- input_text, truncation=True, padding="longest", return_tensors="pt"
93
- )
94
-
95
- translated = model_3.generate(**batch)
96
-
97
- summary_3 = tokenizer_3.batch_decode(
98
- translated, skip_special_tokens=True, max_length=100
99
- )
100
-
101
- return summary_3[0]
102
-
103
-
104
- headline3 = gr.Interface(
105
- fn=commentaries_headline3,
106
- inputs=gr.inputs.Textbox(),
107
- outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-newsroom"),
108
- )
109
-
110
- #define Gradio interface for 3 parallel apps
111
- Parallel(
112
- headline1,
113
- headline2,
114
- headline3,
115
- title="Commentaries Headlines Generator",
116
- inputs=gr.inputs.Textbox(
117
- lines=20,
118
- label="Paste parts of your commentary here, and choose from 3 suggested headlines",
119
- ),
120
- theme="huggingface",
121
- ).launch(enable_queue=True)