|
|
|
|
|
import torch |
|
|
from transformers import MarianMTModel, MarianTokenizer |
|
|
import json |
|
|
import os |
|
|
import time |
|
|
|
|
|
class OptimizedMeetingTranslator: |
|
|
""" |
|
|
Production-ready translator yang dioptimalkan untuk real-time meeting translation |
|
|
Fokus pada kecepatan dan akurasi untuk konteks meeting |
|
|
""" |
|
|
|
|
|
def __init__(self, model_path="./optimized_marian_meeting_translator"): |
|
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
self.model_path = model_path |
|
|
self.model = None |
|
|
self.tokenizer = None |
|
|
self.config = None |
|
|
self.load_model() |
|
|
|
|
|
def load_model(self): |
|
|
"""Load model dan tokenizer yang telah dioptimalkan""" |
|
|
try: |
|
|
self.tokenizer = MarianTokenizer.from_pretrained(self.model_path) |
|
|
self.model = MarianMTModel.from_pretrained(self.model_path) |
|
|
self.model.to(self.device) |
|
|
self.model.eval() |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
self.model.half() |
|
|
|
|
|
print(f"โ
Model dioptimalkan berhasil dimuat dari {self.model_path}") |
|
|
|
|
|
|
|
|
config_path = os.path.join(self.model_path, "model_config.json") |
|
|
if os.path.exists(config_path): |
|
|
with open(config_path, 'r') as f: |
|
|
self.config = json.load(f) |
|
|
print(f"๐ BLEU Score: {self.config.get('best_bleu_score', 'N/A'):.3f}") |
|
|
print(f"โก Target Speed: {self.config.get('performance', {}).get('target_speed', 'N/A')}") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"โ Error loading optimized model: {e}") |
|
|
raise |
|
|
|
|
|
def preprocess_text(self, text): |
|
|
"""Preprocessing minimal untuk mempertahankan kualitas""" |
|
|
|
|
|
text = ' '.join(text.split()) |
|
|
return text.strip() |
|
|
|
|
|
def translate(self, text, max_length=96): |
|
|
""" |
|
|
Translate Indonesian to English dengan optimasi real-time |
|
|
|
|
|
Args: |
|
|
text (str): Teks Indonesia yang akan diterjemahkan |
|
|
max_length (int): Panjang maksimal output (default: 96 untuk speed) |
|
|
|
|
|
Returns: |
|
|
dict: {'translation': str, 'time': float, 'success': bool} |
|
|
""" |
|
|
if not self.model or not self.tokenizer: |
|
|
raise ValueError("Model belum dimuat. Panggil load_model() terlebih dahulu.") |
|
|
|
|
|
start_time = time.time() |
|
|
|
|
|
try: |
|
|
|
|
|
processed_text = self.preprocess_text(text) |
|
|
|
|
|
|
|
|
inputs = self.tokenizer( |
|
|
processed_text, |
|
|
return_tensors='pt', |
|
|
max_length=max_length, |
|
|
truncation=True, |
|
|
padding=True |
|
|
).to(self.device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = self.model.generate( |
|
|
**inputs, |
|
|
max_length=max_length, |
|
|
num_beams=2, |
|
|
early_stopping=True, |
|
|
pad_token_id=self.tokenizer.pad_token_id, |
|
|
do_sample=False, |
|
|
use_cache=True |
|
|
) |
|
|
|
|
|
|
|
|
translation = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
elapsed_time = time.time() - start_time |
|
|
|
|
|
return { |
|
|
'translation': translation.strip(), |
|
|
'time': elapsed_time, |
|
|
'success': True |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
elapsed_time = time.time() - start_time |
|
|
return { |
|
|
'translation': f"Error: {str(e)}", |
|
|
'time': elapsed_time, |
|
|
'success': False |
|
|
} |
|
|
|
|
|
def batch_translate(self, texts, max_length=96): |
|
|
"""Translate multiple texts dengan optimasi batch processing""" |
|
|
results = [] |
|
|
total_time = 0 |
|
|
|
|
|
for text in texts: |
|
|
result = self.translate(text, max_length) |
|
|
results.append(result) |
|
|
total_time += result['time'] |
|
|
|
|
|
return { |
|
|
'results': results, |
|
|
'total_time': total_time, |
|
|
'average_time': total_time / len(texts) if texts else 0 |
|
|
} |
|
|
|
|
|
def get_model_info(self): |
|
|
"""Return informasi model dan performa""" |
|
|
if self.config: |
|
|
return { |
|
|
'model_name': self.config.get('model_name'), |
|
|
'bleu_score': self.config.get('best_bleu_score'), |
|
|
'improvement': self.config.get('improvement'), |
|
|
'target_speed': self.config.get('performance', {}).get('target_speed'), |
|
|
'optimizations': self.config.get('optimizations', []) |
|
|
} |
|
|
return {'message': 'Model config tidak tersedia'} |
|
|
|
|
|
def benchmark(self, test_sentences=None): |
|
|
"""Benchmark performa model dengan test sentences""" |
|
|
if test_sentences is None: |
|
|
test_sentences = [ |
|
|
"Selamat pagi, mari kita mulai rapat hari ini.", |
|
|
"Apakah ada pertanyaan mengenai proposal tersebut?", |
|
|
"Tim development akan handle implementasi fitur baru.", |
|
|
"Berdasarkan diskusi, kita putuskan untuk melanjutkan proyek.", |
|
|
"Terima kasih atas partisipasi aktif dalam meeting." |
|
|
] |
|
|
|
|
|
print("๐งช Benchmarking Optimized Meeting Translator:") |
|
|
print("-" * 50) |
|
|
|
|
|
results = self.batch_translate(test_sentences) |
|
|
|
|
|
for i, (sentence, result) in enumerate(zip(test_sentences, results['results']), 1): |
|
|
status = "โ
" if result['success'] else "โ" |
|
|
print(f"{i}. {status} ({result['time']:.3f}s)") |
|
|
print(f" ๐ฎ๐ฉ {sentence}") |
|
|
print(f" ๐บ๐ธ {result['translation']}") |
|
|
print() |
|
|
|
|
|
print(f"๐ Benchmark Results:") |
|
|
print(f" Average Speed: {results['average_time']:.3f}s per sentence") |
|
|
print(f" Total Time: {results['total_time']:.3f}s") |
|
|
print(f" Target Achievement: {'โ
ACHIEVED' if results['average_time'] < 1.0 else 'โ NOT ACHIEVED'}") |
|
|
|
|
|
return results |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
translator = OptimizedMeetingTranslator() |
|
|
|
|
|
|
|
|
print("๐ Model Information:") |
|
|
info = translator.get_model_info() |
|
|
for key, value in info.items(): |
|
|
print(f" {key}: {value}") |
|
|
|
|
|
print("\n" + "="*50) |
|
|
|
|
|
|
|
|
translator.benchmark() |
|
|
|