Translation
English
Chinese
Eval Results (legacy)
radinplaid commited on
Commit
e1fabc3
·
verified ·
1 Parent(s): a767eae

Delete eole_model

Browse files
eole_model/config.json DELETED
@@ -1,150 +0,0 @@
1
- {
2
- "save_data": "zh_en/data_spm",
3
- "src_vocab": "zh-en-benchmark/src.eole.vocab",
4
- "report_every": 100,
5
- "share_vocab": false,
6
- "tgt_vocab": "zh-en-benchmark/tgt.eole.vocab",
7
- "vocab_size_multiple": 8,
8
- "tensorboard_log_dir_dated": "tensorboard/Feb-12_13-34-26",
9
- "src_vocab_size": 32000,
10
- "tensorboard": true,
11
- "n_sample": 0,
12
- "tgt_vocab_size": 32000,
13
- "valid_metrics": [
14
- "BLEU"
15
- ],
16
- "tensorboard_log_dir": "tensorboard",
17
- "seed": 1234,
18
- "overwrite": true,
19
- "transforms": [
20
- "sentencepiece",
21
- "filtertoolong"
22
- ],
23
- "training": {
24
- "accum_count": [
25
- 16
26
- ],
27
- "train_steps": 100000,
28
- "gpu_ranks": [
29
- 0
30
- ],
31
- "save_checkpoint_steps": 2000,
32
- "decay_method": "noam",
33
- "bucket_size": 128000,
34
- "world_size": 1,
35
- "accum_steps": [
36
- 0
37
- ],
38
- "optim": "pagedadamw8bit",
39
- "prefetch_factor": 100,
40
- "compute_dtype": "torch.bfloat16",
41
- "normalization": "tokens",
42
- "label_smoothing": 0.1,
43
- "batch_size_multiple": 8,
44
- "dropout_steps": [
45
- 0
46
- ],
47
- "average_decay": 0.0001,
48
- "dropout": [
49
- 0.1
50
- ],
51
- "batch_type": "tokens",
52
- "valid_batch_size": 8192,
53
- "param_init_method": "xavier_uniform",
54
- "adam_beta2": 0.998,
55
- "model_path": "zh-en-benchmark/model",
56
- "keep_checkpoint": 4,
57
- "num_workers": 0,
58
- "batch_size": 8192,
59
- "attention_dropout": [
60
- 0.1
61
- ],
62
- "warmup_steps": 10000,
63
- "valid_steps": 2000,
64
- "max_grad_norm": 2.0,
65
- "learning_rate": 2.0
66
- },
67
- "data": {
68
- "corpus_1": {
69
- "path_align": null,
70
- "path_src": "zh-en/train.ready.zh",
71
- "path_tgt": "zh-en/train.ready.en",
72
- "transforms": [
73
- "sentencepiece",
74
- "filtertoolong"
75
- ]
76
- },
77
- "valid": {
78
- "path_align": null,
79
- "path_src": "zh-en-benchmark/dev.zho",
80
- "path_tgt": "zh-en-benchmark/dev.eng",
81
- "transforms": [
82
- "sentencepiece",
83
- "filtertoolong"
84
- ]
85
- }
86
- },
87
- "transforms_configs": {
88
- "sentencepiece": {
89
- "tgt_subword_model": "${MODEL_PATH}/tgt.spm.model",
90
- "src_subword_model": "${MODEL_PATH}/src.spm.model"
91
- },
92
- "filtertoolong": {
93
- "tgt_seq_length": 256,
94
- "src_seq_length": 256
95
- }
96
- },
97
- "model": {
98
- "share_decoder_embeddings": true,
99
- "position_encoding_type": "SinusoidalInterleaved",
100
- "add_qkvbias": false,
101
- "architecture": "transformer",
102
- "add_ffnbias": true,
103
- "hidden_size": 1024,
104
- "transformer_ff": 4096,
105
- "mlp_activation_fn": "gelu",
106
- "norm_eps": 1e-06,
107
- "layer_norm": "standard",
108
- "heads": 16,
109
- "add_estimator": false,
110
- "share_embeddings": false,
111
- "decoder": {
112
- "heads": 16,
113
- "decoder_type": "transformer",
114
- "position_encoding_type": "SinusoidalInterleaved",
115
- "add_qkvbias": false,
116
- "layers": 2,
117
- "add_ffnbias": true,
118
- "hidden_size": 1024,
119
- "n_positions": null,
120
- "transformer_ff": 4096,
121
- "rope_config": null,
122
- "mlp_activation_fn": "gelu",
123
- "norm_eps": 1e-06,
124
- "layer_norm": "standard",
125
- "tgt_word_vec_size": 1024
126
- },
127
- "embeddings": {
128
- "word_vec_size": 1024,
129
- "position_encoding_type": "SinusoidalInterleaved",
130
- "tgt_word_vec_size": 1024,
131
- "src_word_vec_size": 1024
132
- },
133
- "encoder": {
134
- "heads": 16,
135
- "position_encoding_type": "SinusoidalInterleaved",
136
- "add_qkvbias": false,
137
- "layers": 8,
138
- "add_ffnbias": true,
139
- "hidden_size": 1024,
140
- "n_positions": null,
141
- "src_word_vec_size": 1024,
142
- "transformer_ff": 4096,
143
- "rope_config": null,
144
- "mlp_activation_fn": "gelu",
145
- "norm_eps": 1e-06,
146
- "layer_norm": "standard",
147
- "encoder_type": "transformer"
148
- }
149
- }
150
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eole_model/model.00.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6b78dad9fa4e560ce0abe0fc7c2b317ebe560fc23aa1897419253a1b334872d
3
- size 820042008
 
 
 
 
eole_model/src.spm.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:23d03d562fc3f8fe57e497dac0ece4827c254675a80c103fc4bb4040638ceb67
3
- size 733978
 
 
 
 
eole_model/tgt.spm.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c373f1d78753313b0dbc337058bf8450e1fdd6fe662a49e0941affce44ec14c5
3
- size 800955
 
 
 
 
eole_model/vocab.json DELETED
The diff for this file is too large to render. See raw diff