from tokenizers import Tokenizer from tensorflow.keras.models import load_model from tensorflow.keras.preprocessing.sequence import pad_sequences import numpy as np # 1. Load từ file tokenizer.json src_tokenizer = Tokenizer.from_file("khmer_tokenizer.json") tgt_tokenizer = Tokenizer.from_file("vietnamese_tokenizer.json") model = load_model("translate_khmer_to_vi_fix_final.keras") max_len_src= 963 max_len_tgt= 268 # model.summary() def translate_sentence(sentence): sentence_ids = src_tokenizer.encode(sentence).ids sentence_decode_in = tgt_tokenizer.encode("" + sentence).ids # return sentence_ids x_train = pad_sequences([sentence_ids], maxlen=max_len_src, padding='post') train_encoder = pad_sequences([sentence_decode_in],maxlen=max_len_tgt-1,padding='post') output = model.predict([x_train,train_encoder]) # return output predicted_ids = np.argmax(output[0], axis=-1) # return predicted_ids # return tgt_tokenizer.decode(output[0]), tgt_tokenizer.decode(output[1]) tokens = tgt_tokenizer.decode(predicted_ids) return tokens # return predicted_ids khmer_sentence = "លោកនាយករដ្ឋមន្រ្តីទើបតែបានអត្ថាធិប្បាយលើព័ត៌មានមួយចំនួនដែលបានលើកឡើងនៅក្នុងកាសែតអេឡិចត្រូនិក VietNamNet ដែលទាក់ទងនឹង…" print(translate_sentence(khmer_sentence))