Spaces:
Sleeping
Sleeping
File size: 5,031 Bytes
2370590 97ae3c2 d176bfb 2370590 d176bfb 2370590 d176bfb 3c7157a 781ddba 3c7157a d176bfb 3c7157a c069d29 2370590 3c7157a 2370590 9869f73 2370590 3c7157a 2370590 3c7157a 2370590 3c7157a 2370590 3c7157a 2370590 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import gradio as gr
import torch
from kuidastaltsutadalaamat.trainllm import load_model, load_tokenizer
from kuidastaltsutadalaamat.inference import llm_generate
from kuidastaltsutadalaamat.data import LazyTokenizingInferenceDataset
from kuidastaltsutadalaamat.promptops import *
accel = None
model_id = "tartuNLP/smugri4-mt"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = load_model(model_id, device, accelerator=accel, attention="eager") #eager for cpu
model.eval()
tokenizer = load_tokenizer(model_id, accelerator=accel)
lang_raw_to_label = {"English": "English",
"Erzya": "Erzya",
"Estonian": "Estonian",
"Estonian, Alutaguse, Lüg, dictionary": "Alutaguse",
"Estonian, Hiiu, Rei, dictionary": "Hiiu",
"Estonian, Ida, Kod, dictionary": "Ida",
"Estonian, Kesk, Kjn, dictionary": "Keskmurre",
"Estonian, Kihnu, dictionary": "Kihnu",
"Estonian, Lääne, Mar, dictionary": "Lääne",
"Estonian, Muhu, dictionary": "Muhu",
"Estonian, Ranna, Kuu, dictionary": "Rannakeel",
"Estonian, Saare, Khk, dictionary": "Saare",
"Southern Estonian, Mulgi, Krk, dictionary": "Mulgi",
"Southern Estonian, Seto, dictionary": "Seto",
"Southern Estonian, Tartu, Nõo, dictionary": "Tartu",
"Southern Estonian, Võro, Lei, dictionary": "Leivu",
"Southern Estonian, Võro, Lut, dictionary": "Lutsi",
"Southern Estonian, Võro, Sõnaq": "Võro (Sõnaq orth)",
"Southern Estonian, Võro, Uma": "Võro (Umaleht orth)",
"Finnish": "Finnish",
"Kven": "Kven",
"Meänkieli": "Meänkieli",
"Hill Mari": "Hill Mari",
"Meadow Mari": "Meadow Mari",
"Hungarian": "Hungarian",
"Inari Sami": "Inari Sami",
"Pite Sami": "Pite Sami",
"Kildin Sami, Antonova": "Kildin Sami (Antonova orth)",
"Kildin Sami, Kuruch": "Kildin Sami (Kuruch orth)",
"Lule Sami": "Lule Sami",
"Northern Sami": "Northern Sami",
"Skolt Sami": "Skolt Sami",
"Southern Sami": "Southern Sami",
"Ume Sami": "Ume Sami",
"Izhorian, Mehmet": "Ingrian (Ala-Laukaa / simplified)",
"Izhorian, Alamaluuga, speech": "Ingrian (Ala-Laukaa)",
"Izhorian, Soikkola": "Ingrian (Soikkola)",
"Votic, Standard": "Votic",
"Komi-Permyak": "Komi-Permyak",
"Komi-Zyrian": "Komi-Zyrian",
"Latvian": "Latvian",
"Livonian, Standard": "Livonian",
"Livvi, Newwritten": "Livvi",
"Ludian, Miikul": "Ludian (ü)",
"Ludian, Newwritten": "Ludian (y)",
"Mansi, Unk": "Mansi (Northern)",
"Moksha": "Moksha",
"Norwegian": "Norwegian",
"Kazym Khanty, 2013": "Kazym Khanty",
"Priur Khanty": "Priur Khanty",
"Shur Khanty, 2013": "Shur Khanty",
"Sred Khanty": "Sred Khanty",
"Surgut Khanty, 2013": "Surgut Khanty",
"Vakh Khanty, 2013": "Vakh Khanty",
"Proper Karelian, Newwritten": "Proper Karelian",
"Russian": "Russian",
"Swedish": "Swedish",
"Udmurt": "Udmurt",
"Veps, Newwritten": "Veps"
}
label_to_raw = { e[1]: e[0] for e in lang_raw_to_label.items() }
languages_labels = sorted(list(label_to_raw.keys()))
def run_inference(text, from_lang, to_lang, mode):
entry = {"src_segm": text, "task": mode}
if mode == "translate":
entry.update({"src_lang": label_to_raw[from_lang], "tgt_lang": label_to_raw[to_lang]})
prompt_format = PF_SMUGRI_MT
else:
prompt_format = PF_SMUGRI_LID
ds = LazyTokenizingInferenceDataset([entry], tokenizer, prompt_format)
tok = ds[0]
output = llm_generate(model, tokenizer, tok, debug=False, max_len=512)
return output[0]
with gr.Blocks() as demo:
text_input = gr.Textbox(label="Text", lines=6, placeholder="Enter text...")
#identify_btn = gr.Button("Identify language", interactive=False)
with gr.Row():
from_dropdown = gr.Dropdown(choices=languages_labels, label="From", value=None)
to_dropdown = gr.Dropdown(choices=languages_labels, label="To", value=None)
translate_btn = gr.Button("Translate", interactive=False)
output = gr.Textbox(label="Output", lines=6)
#def toggle_identify(text):
# return gr.update(interactive=bool(text.strip()))
#text_input.change(toggle_identify, [text_input], [identify_btn])
def toggle_translate(text, f, t):
return gr.update(interactive=bool(text.strip() and f and t))
text_input.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn])
from_dropdown.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn])
to_dropdown.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn])
#identify_btn.click(
# fn=lambda text: run_inference(text, None, None, mode="identify"),
# inputs=[text_input],
# outputs=[from_dropdown],
#).then(
# lambda *args: gr.update(interactive=bool(text_input.value.strip() and from_dropdown.value and to_dropdown.value)),
# [], [translate_btn]
#)
translate_btn.click(
fn=lambda text, f, t: run_inference(text, f, t, mode="translate"),
inputs=[text_input, from_dropdown, to_dropdown],
outputs=[output]
)
if __name__ == "__main__":
demo.launch()
|