Spaces:
Runtime error
Runtime error
vteam27
commited on
Commit
·
79868fd
1
Parent(s):
ef6d6f0
Added upload button
Browse files- app.py +9 -7
- lang_list.py +1 -93
app.py
CHANGED
|
@@ -16,7 +16,9 @@ processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
|
|
| 16 |
# print(translated_text_from_text)
|
| 17 |
|
| 18 |
|
| 19 |
-
def run_t2tt(input_text: str, source_language: str, target_language: str) -> str:
|
|
|
|
|
|
|
| 20 |
source_language_code = LANGUAGE_NAME_TO_CODE[source_language]
|
| 21 |
target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
|
| 22 |
text_inputs = processor(text = input_text, src_lang=source_language_code , return_tensors="pt")
|
|
@@ -25,17 +27,17 @@ def run_t2tt(input_text: str, source_language: str, target_language: str) -> str
|
|
| 25 |
return str(output)
|
| 26 |
|
| 27 |
|
| 28 |
-
|
| 29 |
with gr.Blocks() as demo_t2tt:
|
| 30 |
with gr.Row():
|
| 31 |
with gr.Column():
|
| 32 |
with gr.Group():
|
|
|
|
| 33 |
input_text = gr.Textbox(label="Input text")
|
| 34 |
with gr.Row():
|
| 35 |
source_language = gr.Dropdown(
|
| 36 |
label="Source language",
|
| 37 |
choices=TEXT_SOURCE_LANGUAGE_NAMES,
|
| 38 |
-
value="
|
| 39 |
)
|
| 40 |
target_language = gr.Dropdown(
|
| 41 |
label="Target language",
|
|
@@ -49,7 +51,7 @@ with gr.Blocks() as demo_t2tt:
|
|
| 49 |
gr.Examples(
|
| 50 |
examples=[
|
| 51 |
[
|
| 52 |
-
"The sinister destruction of the holy Akal Takht and the ruthless massacre of thousands of innocent pilgrims had unmasked the deep-seated hatred and animosity that the Indian Government had been nurturing against Sikhs ever since
|
| 53 |
"English",
|
| 54 |
"Punjabi",
|
| 55 |
],
|
|
@@ -69,17 +71,17 @@ with gr.Blocks() as demo_t2tt:
|
|
| 69 |
"English",
|
| 70 |
],
|
| 71 |
],
|
| 72 |
-
inputs=[input_text, source_language, target_language],
|
| 73 |
outputs=output_text,
|
| 74 |
fn=run_t2tt,
|
| 75 |
-
cache_examples=
|
| 76 |
api_name=False,
|
| 77 |
)
|
| 78 |
|
| 79 |
gr.on(
|
| 80 |
triggers=[input_text.submit, btn.click],
|
| 81 |
fn=run_t2tt,
|
| 82 |
-
inputs=[input_text, source_language, target_language],
|
| 83 |
outputs=output_text,
|
| 84 |
api_name="t2tt",
|
| 85 |
)
|
|
|
|
| 16 |
# print(translated_text_from_text)
|
| 17 |
|
| 18 |
|
| 19 |
+
def run_t2tt(file_uploader , input_text: str, source_language: str, target_language: str) -> str:
|
| 20 |
+
if file_uploader is not None:
|
| 21 |
+
input_text = file_uploader.read().decode("utf-8")
|
| 22 |
source_language_code = LANGUAGE_NAME_TO_CODE[source_language]
|
| 23 |
target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
|
| 24 |
text_inputs = processor(text = input_text, src_lang=source_language_code , return_tensors="pt")
|
|
|
|
| 27 |
return str(output)
|
| 28 |
|
| 29 |
|
|
|
|
| 30 |
with gr.Blocks() as demo_t2tt:
|
| 31 |
with gr.Row():
|
| 32 |
with gr.Column():
|
| 33 |
with gr.Group():
|
| 34 |
+
file_uploader = gr.File(label="Upload a text file (Optional)", type="txt")
|
| 35 |
input_text = gr.Textbox(label="Input text")
|
| 36 |
with gr.Row():
|
| 37 |
source_language = gr.Dropdown(
|
| 38 |
label="Source language",
|
| 39 |
choices=TEXT_SOURCE_LANGUAGE_NAMES,
|
| 40 |
+
value="Punjabi",
|
| 41 |
)
|
| 42 |
target_language = gr.Dropdown(
|
| 43 |
label="Target language",
|
|
|
|
| 51 |
gr.Examples(
|
| 52 |
examples=[
|
| 53 |
[
|
| 54 |
+
"The sinister destruction of the holy Akal Takht and the ruthless massacre of thousands of innocent pilgrims had unmasked the deep-seated hatred and animosity that the Indian Government had been nurturing against Sikhs ever since independence",
|
| 55 |
"English",
|
| 56 |
"Punjabi",
|
| 57 |
],
|
|
|
|
| 71 |
"English",
|
| 72 |
],
|
| 73 |
],
|
| 74 |
+
inputs=[file_uploader ,input_text, source_language, target_language],
|
| 75 |
outputs=output_text,
|
| 76 |
fn=run_t2tt,
|
| 77 |
+
cache_examples=False,
|
| 78 |
api_name=False,
|
| 79 |
)
|
| 80 |
|
| 81 |
gr.on(
|
| 82 |
triggers=[input_text.submit, btn.click],
|
| 83 |
fn=run_t2tt,
|
| 84 |
+
inputs=[file_uploader, input_text, source_language, target_language],
|
| 85 |
outputs=output_text,
|
| 86 |
api_name="t2tt",
|
| 87 |
)
|
lang_list.py
CHANGED
|
@@ -108,101 +108,9 @@ LANGUAGE_NAME_TO_CODE = {v: k for k, v in language_code_to_name.items()}
|
|
| 108 |
# Source langs: S2ST / S2TT / ASR don't need source lang
|
| 109 |
# T2TT / T2ST use this
|
| 110 |
text_source_language_codes = [
|
| 111 |
-
"afr",
|
| 112 |
-
"amh",
|
| 113 |
-
"arb",
|
| 114 |
-
"ary",
|
| 115 |
-
"arz",
|
| 116 |
-
"asm",
|
| 117 |
-
"azj",
|
| 118 |
-
"bel",
|
| 119 |
-
"ben",
|
| 120 |
-
"bos",
|
| 121 |
-
"bul",
|
| 122 |
-
"cat",
|
| 123 |
-
"ceb",
|
| 124 |
-
"ces",
|
| 125 |
-
"ckb",
|
| 126 |
-
"cmn",
|
| 127 |
-
"cym",
|
| 128 |
-
"dan",
|
| 129 |
-
"deu",
|
| 130 |
-
"ell",
|
| 131 |
-
"eng",
|
| 132 |
-
"est",
|
| 133 |
-
"eus",
|
| 134 |
-
"fin",
|
| 135 |
-
"fra",
|
| 136 |
-
"gaz",
|
| 137 |
-
"gle",
|
| 138 |
-
"glg",
|
| 139 |
-
"guj",
|
| 140 |
-
"heb",
|
| 141 |
"hin",
|
| 142 |
-
"hrv",
|
| 143 |
-
"hun",
|
| 144 |
-
"hye",
|
| 145 |
-
"ibo",
|
| 146 |
-
"ind",
|
| 147 |
-
"isl",
|
| 148 |
-
"ita",
|
| 149 |
-
"jav",
|
| 150 |
-
"jpn",
|
| 151 |
-
"kan",
|
| 152 |
-
"kat",
|
| 153 |
-
"kaz",
|
| 154 |
-
"khk",
|
| 155 |
-
"khm",
|
| 156 |
-
"kir",
|
| 157 |
-
"kor",
|
| 158 |
-
"lao",
|
| 159 |
-
"lit",
|
| 160 |
-
"lug",
|
| 161 |
-
"luo",
|
| 162 |
-
"lvs",
|
| 163 |
-
"mai",
|
| 164 |
-
"mal",
|
| 165 |
-
"mar",
|
| 166 |
-
"mkd",
|
| 167 |
-
"mlt",
|
| 168 |
-
"mni",
|
| 169 |
-
"mya",
|
| 170 |
-
"nld",
|
| 171 |
-
"nno",
|
| 172 |
-
"nob",
|
| 173 |
-
"npi",
|
| 174 |
-
"nya",
|
| 175 |
-
"ory",
|
| 176 |
"pan",
|
| 177 |
-
"
|
| 178 |
-
"pes",
|
| 179 |
-
"pol",
|
| 180 |
-
"por",
|
| 181 |
-
"ron",
|
| 182 |
-
"rus",
|
| 183 |
-
"slk",
|
| 184 |
-
"slv",
|
| 185 |
-
"sna",
|
| 186 |
-
"snd",
|
| 187 |
-
"som",
|
| 188 |
-
"spa",
|
| 189 |
-
"srp",
|
| 190 |
-
"swe",
|
| 191 |
-
"swh",
|
| 192 |
-
"tam",
|
| 193 |
-
"tel",
|
| 194 |
-
"tgk",
|
| 195 |
-
"tgl",
|
| 196 |
-
"tha",
|
| 197 |
-
"tur",
|
| 198 |
-
"ukr",
|
| 199 |
-
"urd",
|
| 200 |
-
"uzn",
|
| 201 |
-
"vie",
|
| 202 |
-
"yor",
|
| 203 |
-
"yue",
|
| 204 |
-
"zsm",
|
| 205 |
-
"zul",
|
| 206 |
]
|
| 207 |
TEXT_SOURCE_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in text_source_language_codes])
|
| 208 |
|
|
|
|
| 108 |
# Source langs: S2ST / S2TT / ASR don't need source lang
|
| 109 |
# T2TT / T2ST use this
|
| 110 |
text_source_language_codes = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
"hin",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
"pan",
|
| 113 |
+
"eng",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
]
|
| 115 |
TEXT_SOURCE_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in text_source_language_codes])
|
| 116 |
|