from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM import torch class ImprovedTranslator: def __init__(self): """Initialize translator with multiple model options""" self.device = 0 if torch.cuda.is_available() else -1 self.models = {} self.current_model = "nllb" # Default model def load_model(self, model_type="nllb"): """Load translation model based on type""" if model_type == "nllb" and "nllb" not in self.models: # NLLB (No Language Left Behind) - Meta's multilingual model # Better quality, supports 200+ languages self.models["nllb"] = pipeline( "translation", model="facebook/nllb-200-distilled-600M", device=self.device, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 ) elif model_type == "mbart" and "mbart" not in self.models: # mBART - Good for Indonesian self.models["mbart"] = pipeline( "translation", model="facebook/mbart-large-50-many-to-many-mmt", device=self.device ) elif model_type == "opus" and "opus" not in self.models: # Original Helsinki-NLP (fallback) self.models["opus"] = { "id_en": pipeline("translation", model="Helsinki-NLP/opus-mt-id-en", device=self.device), "en_id": pipeline("translation", model="Helsinki-NLP/opus-mt-en-id", device=self.device) } elif model_type == "t5" and "t5" not in self.models: # T5-based Indonesian model self.models["t5"] = pipeline( "translation", model="google/flan-t5-base", device=self.device ) def translate_with_nllb(self, text, direction): """Translate using NLLB model""" if "nllb" not in self.models: self.load_model("nllb") if direction == "ID → EN": src_lang = "ind_Latn" # Indonesian tgt_lang = "eng_Latn" # English else: src_lang = "eng_Latn" # English tgt_lang = "ind_Latn" # Indonesian result = self.models["nllb"]( text, src_lang=src_lang, tgt_lang=tgt_lang, max_length=512 ) return result[0]['translation_text'] def translate_with_mbart(self, text, direction): """Translate using mBART model""" if "mbart" not in self.models: self.load_model("mbart") if direction == "ID → EN": # mBART format text = f">>en<< {text}" # Target English else: text = f">>id<< {text}" # Target Indonesian result = self.models["mbart"](text) return result[0]['translation_text'] def translate_with_opus(self, text, direction): """Translate using original Helsinki-NLP model""" if "opus" not in self.models: self.load_model("opus") if direction == "ID → EN": return self.models["opus"]["id_en"](text)[0]['translation_text'] else: return self.models["opus"]["en_id"](text)[0]['translation_text'] # Global translator instance translator = ImprovedTranslator() def translate(text, direction, model_type="nllb"): """ Main translation function Args: text (str): Text to translate direction (str): "ID → EN" or "EN → ID" model_type (str): "nllb", "mbart", "opus", or "t5" """ try: if model_type == "nllb": return translator.translate_with_nllb(text, direction) elif model_type == "mbart": return translator.translate_with_mbart(text, direction) elif model_type == "opus": return translator.translate_with_opus(text, direction) else: # Default to NLLB if unknown model return translator.translate_with_nllb(text, direction) except Exception as e: print(f"Translation error with {model_type}: {e}") # Fallback to OPUS if other models fail if model_type != "opus": return translator.translate_with_opus(text, direction) else: return f"Translation failed: {str(e)}" # Wrapper untuk kompatibilitas dengan kode lama def translate_simple(text, direction): """Simple wrapper for backward compatibility""" return translate(text, direction, "nllb")