| | |
| |
|
| | from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
| | import torch |
| |
|
| | |
| | |
| | model_name = "facebook/nllb-200-distilled-600M" |
| |
|
| | |
| | |
| | print(f"Loading model: {model_name}") |
| | tokenizer = AutoTokenizer.from_pretrained(model_name) |
| | model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
| | print("Model loaded successfully!") |
| |
|
| | |
| | sentences_to_translate = { |
| | "nep_Npan": "नेपालको राजधानी काठमाडौं हो।", |
| | "sin_Sinh": "ශ්රී ලංකාවේ අගනුවර කොළඹ වේ." |
| | } |
| |
|
| | print("\n--- Starting Translation ---") |
| |
|
| | |
| | for lang_code, text in sentences_to_translate.items(): |
| | |
| | |
| | |
| | tokenizer.src_lang = lang_code |
| | |
| | |
| | inputs = tokenizer(text, return_tensors="pt") |
| |
|
| | |
| | |
| | translated_tokens = model.generate( |
| | **inputs, |
| | forced_bos_token_id=tokenizer.lang_code_to_id["eng_Latn"], |
| | max_length=50 |
| | ) |
| |
|
| | |
| | |
| | translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0] |
| |
|
| | |
| | print(f"\nOriginal ({lang_code}): {text}") |
| | print(f"Translation (eng_Latn): {translation}") |
| |
|
| | print("\n--- Translation Complete ---") |