File size: 4,678 Bytes
5d3912b
 
b8db96b
5d3912b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8db96b
5d3912b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch

class ImprovedTranslator:
    def __init__(self):
        """Initialize translator with multiple model options"""
        self.device = 0 if torch.cuda.is_available() else -1
        self.models = {}
        self.current_model = "nllb"  # Default model
        
    def load_model(self, model_type="nllb"):
        """Load translation model based on type"""
        if model_type == "nllb" and "nllb" not in self.models:
            # NLLB (No Language Left Behind) - Meta's multilingual model
            # Better quality, supports 200+ languages
            self.models["nllb"] = pipeline(
                "translation",
                model="facebook/nllb-200-distilled-600M",
                device=self.device,
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
            )
            
        elif model_type == "mbart" and "mbart" not in self.models:
            # mBART - Good for Indonesian
            self.models["mbart"] = pipeline(
                "translation",
                model="facebook/mbart-large-50-many-to-many-mmt",
                device=self.device
            )
            
        elif model_type == "opus" and "opus" not in self.models:
            # Original Helsinki-NLP (fallback)
            self.models["opus"] = {
                "id_en": pipeline("translation", model="Helsinki-NLP/opus-mt-id-en", device=self.device),
                "en_id": pipeline("translation", model="Helsinki-NLP/opus-mt-en-id", device=self.device)
            }
            
        elif model_type == "t5" and "t5" not in self.models:
            # T5-based Indonesian model
            self.models["t5"] = pipeline(
                "translation",
                model="google/flan-t5-base",
                device=self.device
            )
    
    def translate_with_nllb(self, text, direction):
        """Translate using NLLB model"""
        if "nllb" not in self.models:
            self.load_model("nllb")
            
        if direction == "ID β†’ EN":
            src_lang = "ind_Latn"  # Indonesian
            tgt_lang = "eng_Latn"  # English
        else:
            src_lang = "eng_Latn"  # English  
            tgt_lang = "ind_Latn"  # Indonesian
            
        result = self.models["nllb"](
            text,
            src_lang=src_lang,
            tgt_lang=tgt_lang,
            max_length=512
        )
        return result[0]['translation_text']
    
    def translate_with_mbart(self, text, direction):
        """Translate using mBART model"""
        if "mbart" not in self.models:
            self.load_model("mbart")
            
        if direction == "ID β†’ EN":
            # mBART format
            text = f">>en<< {text}"  # Target English
        else:
            text = f">>id<< {text}"  # Target Indonesian
            
        result = self.models["mbart"](text)
        return result[0]['translation_text']
    
    def translate_with_opus(self, text, direction):
        """Translate using original Helsinki-NLP model"""
        if "opus" not in self.models:
            self.load_model("opus")
            
        if direction == "ID β†’ EN":
            return self.models["opus"]["id_en"](text)[0]['translation_text']
        else:
            return self.models["opus"]["en_id"](text)[0]['translation_text']

# Global translator instance
translator = ImprovedTranslator()

def translate(text, direction, model_type="nllb"):
    """
    Main translation function
    
    Args:
        text (str): Text to translate
        direction (str): "ID β†’ EN" or "EN β†’ ID" 
        model_type (str): "nllb", "mbart", "opus", or "t5"
    """
    try:
        if model_type == "nllb":
            return translator.translate_with_nllb(text, direction)
        elif model_type == "mbart":
            return translator.translate_with_mbart(text, direction)
        elif model_type == "opus":
            return translator.translate_with_opus(text, direction)
        else:
            # Default to NLLB if unknown model
            return translator.translate_with_nllb(text, direction)
            
    except Exception as e:
        print(f"Translation error with {model_type}: {e}")
        # Fallback to OPUS if other models fail
        if model_type != "opus":
            return translator.translate_with_opus(text, direction)
        else:
            return f"Translation failed: {str(e)}"

# Wrapper untuk kompatibilitas dengan kode lama
def translate_simple(text, direction):
    """Simple wrapper for backward compatibility"""
    return translate(text, direction, "nllb")