File size: 4,678 Bytes
5d3912b b8db96b 5d3912b b8db96b 5d3912b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch
class ImprovedTranslator:
def __init__(self):
"""Initialize translator with multiple model options"""
self.device = 0 if torch.cuda.is_available() else -1
self.models = {}
self.current_model = "nllb" # Default model
def load_model(self, model_type="nllb"):
"""Load translation model based on type"""
if model_type == "nllb" and "nllb" not in self.models:
# NLLB (No Language Left Behind) - Meta's multilingual model
# Better quality, supports 200+ languages
self.models["nllb"] = pipeline(
"translation",
model="facebook/nllb-200-distilled-600M",
device=self.device,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
elif model_type == "mbart" and "mbart" not in self.models:
# mBART - Good for Indonesian
self.models["mbart"] = pipeline(
"translation",
model="facebook/mbart-large-50-many-to-many-mmt",
device=self.device
)
elif model_type == "opus" and "opus" not in self.models:
# Original Helsinki-NLP (fallback)
self.models["opus"] = {
"id_en": pipeline("translation", model="Helsinki-NLP/opus-mt-id-en", device=self.device),
"en_id": pipeline("translation", model="Helsinki-NLP/opus-mt-en-id", device=self.device)
}
elif model_type == "t5" and "t5" not in self.models:
# T5-based Indonesian model
self.models["t5"] = pipeline(
"translation",
model="google/flan-t5-base",
device=self.device
)
def translate_with_nllb(self, text, direction):
"""Translate using NLLB model"""
if "nllb" not in self.models:
self.load_model("nllb")
if direction == "ID β EN":
src_lang = "ind_Latn" # Indonesian
tgt_lang = "eng_Latn" # English
else:
src_lang = "eng_Latn" # English
tgt_lang = "ind_Latn" # Indonesian
result = self.models["nllb"](
text,
src_lang=src_lang,
tgt_lang=tgt_lang,
max_length=512
)
return result[0]['translation_text']
def translate_with_mbart(self, text, direction):
"""Translate using mBART model"""
if "mbart" not in self.models:
self.load_model("mbart")
if direction == "ID β EN":
# mBART format
text = f">>en<< {text}" # Target English
else:
text = f">>id<< {text}" # Target Indonesian
result = self.models["mbart"](text)
return result[0]['translation_text']
def translate_with_opus(self, text, direction):
"""Translate using original Helsinki-NLP model"""
if "opus" not in self.models:
self.load_model("opus")
if direction == "ID β EN":
return self.models["opus"]["id_en"](text)[0]['translation_text']
else:
return self.models["opus"]["en_id"](text)[0]['translation_text']
# Global translator instance
translator = ImprovedTranslator()
def translate(text, direction, model_type="nllb"):
"""
Main translation function
Args:
text (str): Text to translate
direction (str): "ID β EN" or "EN β ID"
model_type (str): "nllb", "mbart", "opus", or "t5"
"""
try:
if model_type == "nllb":
return translator.translate_with_nllb(text, direction)
elif model_type == "mbart":
return translator.translate_with_mbart(text, direction)
elif model_type == "opus":
return translator.translate_with_opus(text, direction)
else:
# Default to NLLB if unknown model
return translator.translate_with_nllb(text, direction)
except Exception as e:
print(f"Translation error with {model_type}: {e}")
# Fallback to OPUS if other models fail
if model_type != "opus":
return translator.translate_with_opus(text, direction)
else:
return f"Translation failed: {str(e)}"
# Wrapper untuk kompatibilitas dengan kode lama
def translate_simple(text, direction):
"""Simple wrapper for backward compatibility"""
return translate(text, direction, "nllb") |