Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import nltk | |
| import numpy as np | |
| import re | |
| import warnings | |
| from nltk.tokenize import sent_tokenize | |
| from transformers import ( | |
| MarianTokenizer, | |
| MarianMTModel, | |
| ) | |
| nltk.download('punkt') | |
| #define function for text cleaning | |
| def clean_text(text): | |
| text = text.encode("ascii", errors="ignore").decode( | |
| "ascii" | |
| ) # remove non-ascii, Chinese characters | |
| text = re.sub(r"\n", " ", text) | |
| text = re.sub(r"\n\n", " ", text) | |
| text = re.sub(r"\t", " ", text) | |
| text = re.sub(r"http\S+", "", text) | |
| text = re.sub(r"ADVERTISEMENT", " ", text) | |
| text = re.sub( | |
| r"Download our app or subscribe to our Telegram channel for the latest updates on the coronavirus outbreak: https://cna.asia/telegram", | |
| " ", | |
| text, | |
| ) | |
| text = re.sub( | |
| r"Download our app or subscribe to our Telegram channel for the latest updates on the COVID-19 outbreak: https://cna.asia/telegram", | |
| " ", | |
| text, | |
| ) | |
| text = text.strip(" ") | |
| text = re.sub( | |
| " +", " ", text | |
| ).strip() # get rid of multiple spaces and replace with a single | |
| return text | |
| # define function for translation | |
| modchoice = "Helsinki-NLP/opus-mt-en-zh" | |
| def translate(text): | |
| input_text = clean_text(text) | |
| tokenizer = MarianTokenizer.from_pretrained(modchoice) | |
| model = MarianMTModel.from_pretrained(modchoice) | |
| if input_text is None or text == "": | |
| return ("Error",) | |
| translated = model.generate( | |
| **tokenizer.prepare_seq2seq_batch( | |
| sent_tokenize(input_text), | |
| truncation=True, | |
| padding="longest", | |
| return_tensors="pt" | |
| ) | |
| ) | |
| tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated] | |
| return " ".join(tgt_text) | |
| gradio_ui = gr.Interface( | |
| fn=translate, | |
| title="English-to-Chinese translation", | |
| description="Translate English text into Chinese using MarianMT's opus-mt-en-zh model.", | |
| inputs=gr.inputs.Textbox( | |
| lines=20, label="Paste English text here" | |
| ), | |
| outputs=gr.outputs.Textbox(label="Chinese translation"), | |
| theme="huggingface", | |
| ) | |
| gradio_ui.launch(enable_queue=True) | |