AnTrc2 commited on
Commit
92b86b6
Β·
verified Β·
1 Parent(s): b703a6b

Upload 2 files

Browse files
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  khmer_to_vi_folder/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
37
  khmer_to_vietnamese_fix_final3.keras filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  khmer_to_vi_folder/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
37
  khmer_to_vietnamese_fix_final3.keras filter=lfs diff=lfs merge=lfs -text
38
+ translate_khmer_to_vi_fix_final.keras filter=lfs diff=lfs merge=lfs -text
run.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tokenizers import Tokenizer
2
+ from tensorflow.keras.models import load_model
3
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
4
+ import numpy as np
5
+ # 1. Load tα»« file tokenizer.json
6
+ src_tokenizer = Tokenizer.from_file("khmer_tokenizer.json")
7
+ tgt_tokenizer = Tokenizer.from_file("vietnamese_tokenizer.json")
8
+ model = load_model("translate_khmer_to_vi_fix_final.keras")
9
+ max_len_src= 963
10
+ max_len_tgt= 268
11
+ model.summary()
12
+ # def translate_sentence(sentence):
13
+
14
+ # sentence_ids = src_tokenizer.encode(sentence).ids
15
+ # sentence_decode_in = tgt_tokenizer.encode("<s>" + sentence).ids
16
+
17
+ # # return sentence_ids
18
+ # x_train = pad_sequences([sentence_ids], maxlen=max_len_src, padding='post')
19
+ # train_encoder = pad_sequences([sentence_decode_in],maxlen=max_len_tgt-1,padding='post')
20
+ # output = model.predict([x_train,train_encoder])
21
+ # # return output
22
+
23
+ # predicted_ids = np.argmax(output[0], axis=-1)
24
+ # # return predicted_ids
25
+ # # return tgt_tokenizer.decode(output[0]), tgt_tokenizer.decode(output[1])
26
+ # tokens = tgt_tokenizer.decode(predicted_ids)
27
+
28
+ # return tokens
29
+
30
+ # # return predicted_ids
31
+
32
+
33
+ # khmer_sentence = "αž›αŸ„αž€αž“αžΆαž™αž€αžšαžŠαŸ’αž‹αž˜αž“αŸ’αžšαŸ’αžαžΈαž‘αžΎαž”αžαŸ‚αž”αžΆαž“αž’αžαŸ’αžαžΆαž’αž·αž”αŸ’αž”αžΆαž™αž›αžΎαž–αŸαžαŸŒαž˜αžΆαž“αž˜αž½αž™αž…αŸ†αž“αž½αž“αžŠαŸ‚αž›αž”αžΆαž“αž›αžΎαž€αž‘αžΎαž„αž“αŸ…αž€αŸ’αž“αž»αž„αž€αžΆαžŸαŸ‚αžαž’αŸαž‘αž·αž…αžαŸ’αžšαžΌαž“αž·αž€ VietNamNet αžŠαŸ‚αž›αž‘αžΆαž€αŸ‹αž‘αž„αž“αžΉαž„β€¦"
34
+
35
+
36
+ # print(translate_sentence(khmer_sentence))
translate_khmer_to_vi_fix_final.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6b021ff9f6fa2fd952e08d5a0f57e6ea81ef03931c539097e92a174e7aa3b20
3
+ size 162507369