File size: 1,754 Bytes
b653f91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# src/translate.py
# src/translate.py
import torch
from transformers import MBartForConditionalGeneration, NllbTokenizer
import argparse
# --- 1. Configuration ---
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# --- 2. Load Models and Tokenizers ---
print(f"Loading models on {DEVICE.upper()}...")
models = {
"nepali": MBartForConditionalGeneration.from_pretrained("models/nllb-finetuned-nepali-en").to(DEVICE)
}
tokenizers = {
"nepali": NllbTokenizer.from_pretrained("models/nllb-finetuned-nepali-en")
}
print("All models loaded successfully!")
def translate_text(text_to_translate: str, source_language: str) -> str:
"""
Translates a single string of text to English using our fine-tuned models.
"""
model = models[source_language]
tokenizer = tokenizers[source_language]
tokenizer.src_lang = "nep_Npan"
inputs = tokenizer(text_to_translate, return_tensors="pt").to(DEVICE)
generated_tokens = model.generate(
**inputs,
forced_bos_token_id=tokenizer.convert_tokens_to_ids("eng_Latn"),
max_length=128
)
translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
return translation
# --- 3. Example Usage ---
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Translate text using a fine-tuned model.")
parser.add_argument("--text", type=str, required=True, help="Text to translate.")
parser.add_argument("--lang", type=str, required=True, choices=["nepali"], help="Source language: 'nepali'.")
args = parser.parse_args()
translated_sentence = translate_text(args.text, args.lang)
print(f"\nOriginal ({args.lang}): {args.text}")
print(f"Translated (en): {translated_sentence}")
|