syempuna commited on
Commit
5d3912b
Β·
verified Β·
1 Parent(s): d00d9dd
Files changed (1) hide show
  1. translator.py +122 -8
translator.py CHANGED
@@ -1,10 +1,124 @@
1
- from transformers import pipeline
 
2
 
3
- translator_id_en = pipeline("translation", model="Helsinki-NLP/opus-mt-id-en")
4
- translator_en_id = pipeline("translation", model="Helsinki-NLP/opus-mt-en-id")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- def translate(text, direction):
7
- if direction == "ID β†’ EN":
8
- return translator_id_en(text)[0]['translation_text']
9
- else:
10
- return translator_en_id(text)[0]['translation_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
2
+ import torch
3
 
4
+ class ImprovedTranslator:
5
+ def __init__(self):
6
+ """Initialize translator with multiple model options"""
7
+ self.device = 0 if torch.cuda.is_available() else -1
8
+ self.models = {}
9
+ self.current_model = "nllb" # Default model
10
+
11
+ def load_model(self, model_type="nllb"):
12
+ """Load translation model based on type"""
13
+ if model_type == "nllb" and "nllb" not in self.models:
14
+ # NLLB (No Language Left Behind) - Meta's multilingual model
15
+ # Better quality, supports 200+ languages
16
+ self.models["nllb"] = pipeline(
17
+ "translation",
18
+ model="facebook/nllb-200-distilled-600M",
19
+ device=self.device,
20
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
21
+ )
22
+
23
+ elif model_type == "mbart" and "mbart" not in self.models:
24
+ # mBART - Good for Indonesian
25
+ self.models["mbart"] = pipeline(
26
+ "translation",
27
+ model="facebook/mbart-large-50-many-to-many-mmt",
28
+ device=self.device
29
+ )
30
+
31
+ elif model_type == "opus" and "opus" not in self.models:
32
+ # Original Helsinki-NLP (fallback)
33
+ self.models["opus"] = {
34
+ "id_en": pipeline("translation", model="Helsinki-NLP/opus-mt-id-en", device=self.device),
35
+ "en_id": pipeline("translation", model="Helsinki-NLP/opus-mt-en-id", device=self.device)
36
+ }
37
+
38
+ elif model_type == "t5" and "t5" not in self.models:
39
+ # T5-based Indonesian model
40
+ self.models["t5"] = pipeline(
41
+ "translation",
42
+ model="google/flan-t5-base",
43
+ device=self.device
44
+ )
45
+
46
+ def translate_with_nllb(self, text, direction):
47
+ """Translate using NLLB model"""
48
+ if "nllb" not in self.models:
49
+ self.load_model("nllb")
50
+
51
+ if direction == "ID β†’ EN":
52
+ src_lang = "ind_Latn" # Indonesian
53
+ tgt_lang = "eng_Latn" # English
54
+ else:
55
+ src_lang = "eng_Latn" # English
56
+ tgt_lang = "ind_Latn" # Indonesian
57
+
58
+ result = self.models["nllb"](
59
+ text,
60
+ src_lang=src_lang,
61
+ tgt_lang=tgt_lang,
62
+ max_length=512
63
+ )
64
+ return result[0]['translation_text']
65
+
66
+ def translate_with_mbart(self, text, direction):
67
+ """Translate using mBART model"""
68
+ if "mbart" not in self.models:
69
+ self.load_model("mbart")
70
+
71
+ if direction == "ID β†’ EN":
72
+ # mBART format
73
+ text = f">>en<< {text}" # Target English
74
+ else:
75
+ text = f">>id<< {text}" # Target Indonesian
76
+
77
+ result = self.models["mbart"](text)
78
+ return result[0]['translation_text']
79
+
80
+ def translate_with_opus(self, text, direction):
81
+ """Translate using original Helsinki-NLP model"""
82
+ if "opus" not in self.models:
83
+ self.load_model("opus")
84
+
85
+ if direction == "ID β†’ EN":
86
+ return self.models["opus"]["id_en"](text)[0]['translation_text']
87
+ else:
88
+ return self.models["opus"]["en_id"](text)[0]['translation_text']
89
 
90
+ # Global translator instance
91
+ translator = ImprovedTranslator()
92
+
93
+ def translate(text, direction, model_type="nllb"):
94
+ """
95
+ Main translation function
96
+
97
+ Args:
98
+ text (str): Text to translate
99
+ direction (str): "ID β†’ EN" or "EN β†’ ID"
100
+ model_type (str): "nllb", "mbart", "opus", or "t5"
101
+ """
102
+ try:
103
+ if model_type == "nllb":
104
+ return translator.translate_with_nllb(text, direction)
105
+ elif model_type == "mbart":
106
+ return translator.translate_with_mbart(text, direction)
107
+ elif model_type == "opus":
108
+ return translator.translate_with_opus(text, direction)
109
+ else:
110
+ # Default to NLLB if unknown model
111
+ return translator.translate_with_nllb(text, direction)
112
+
113
+ except Exception as e:
114
+ print(f"Translation error with {model_type}: {e}")
115
+ # Fallback to OPUS if other models fail
116
+ if model_type != "opus":
117
+ return translator.translate_with_opus(text, direction)
118
+ else:
119
+ return f"Translation failed: {str(e)}"
120
+
121
+ # Wrapper untuk kompatibilitas dengan kode lama
122
+ def translate_simple(text, direction):
123
+ """Simple wrapper for backward compatibility"""
124
+ return translate(text, direction, "nllb")