mavietduc commited on
Commit
fc13aa3
·
verified ·
1 Parent(s): e7d6656

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +27 -2
  2. config.json +16 -0
  3. pytorch_model.bin +3 -0
  4. spm.model +3 -0
README.md CHANGED
@@ -1,3 +1,28 @@
 
 
 
 
 
 
 
 
1
  ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ license mit
2
+ language
3
+ - vi
4
+ - ty
5
+ tags
6
+ - translation
7
+ - transformer
8
+ library_name pytorch
9
  ---
10
+
11
+ # Vi → Tày Transformer (custom)
12
+
13
+ - Kiến trúc Encoder–Decoder Transformer (PyTorch thuần), beam search.
14
+ - Tokenizer SentencePiece (file `spm.model`).
15
+ - Trained for Vietnamese → Tày.
16
+
17
+ ## Cách load
18
+ ```python
19
+ import json, torch, sentencepiece as spm
20
+ from model import ModelConfig, Seq2SeqTransformer, PAD, BOS, EOS, LANG2ID # (bạn định nghĩa trong code của mình)
21
+
22
+ cfg = json.load(open(config.json,r,encoding=utf-8))
23
+ cfg = ModelConfig(cfg)
24
+ model = Seq2SeqTransformer(cfg)
25
+ model.load_state_dict(torch.load(pytorch_model.bin, map_location=cpu))
26
+ model.eval()
27
+
28
+ sp = spm.SentencePieceProcessor(model_file=spm.model)
config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 32000,
3
+ "d_model": 384,
4
+ "num_heads": 6,
5
+ "d_ff": 1536,
6
+ "num_encoder_layers": 6,
7
+ "num_decoder_layers": 6,
8
+ "max_pos": 1024,
9
+ "emb_dropout": 0.1,
10
+ "attn_pdrop": 0.1,
11
+ "resid_pdrop": 0.1,
12
+ "layerdrop": 0.1,
13
+ "pad_token_id": 0,
14
+ "tie_embeddings": true,
15
+ "num_langs": 2
16
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c77e4768b976b2ac7642626bc8d3589b3e1558aaa41baf8c8c9c355e3df8832
3
+ size 166757503
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd2fa575aee9f09b9d95917125e4f1fe483a6db55527abd8a61bdf7fae54b68a
3
+ size 765927