| | --- |
| | language: |
| | - ug |
| | - en |
| | tags: |
| | - translation |
| | pipeline_tag: translation |
| | --- |
| | |
| | # Usage |
| |
|
| | ```python |
| | import torch |
| | from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
| | import logging |
| | |
| | logging.getLogger("transformers.tokenization_utils_base").setLevel(logging.ERROR) |
| | |
| | model_name = f"piyazon/uyghur_translate_dev2" |
| | src_lang = "eng_Latn" |
| | tgt_lang = "uig_Arab" |
| | |
| | # Priority: CUDA > MPS > CPU |
| | if torch.cuda.is_available(): |
| | device = torch.device("cuda") |
| | elif torch.backends.mps.is_available(): |
| | device = torch.device("mps") |
| | else: |
| | device = torch.device("cpu") |
| | |
| | print(f"Using device: cuda") |
| | |
| | tokenizer = AutoTokenizer.from_pretrained(model_name) |
| | model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device) |
| | |
| | tokenizer.src_lang = src_lang |
| | |
| | text = "Let's answer a question: What is the radius of the Earth? The Earth's average radius is approximately 6371 kilometers, which is the average value of the distance from the equator to the poles." |
| | |
| | |
| | # 1. PRE-PROCESSING (Crucial Step) |
| | inputs = tokenizer( |
| | text, |
| | return_tensors="pt", |
| | padding=True, |
| | truncation=True, |
| | ).to(device) |
| | |
| | # 2. PREPARE TARGET TOKEN |
| | forced_bos_token_id = tokenizer.convert_tokens_to_ids(tgt_lang) |
| | |
| | # 3. GENERATION |
| | with torch.no_grad(): |
| | out = model.generate( |
| | **inputs, |
| | forced_bos_token_id=forced_bos_token_id, |
| | max_new_tokens=128, |
| | num_beams=4, |
| | no_repeat_ngram_size=3 |
| | ) |
| | |
| | # 4. DECODE |
| | # Clean up the output |
| | translation = tokenizer.batch_decode(out, skip_special_tokens=True)[0] |
| | |
| | print(translation) |
| | ``` |
| |
|
| |
|