azawahry commited on
Commit
9fa26d2
·
1 Parent(s): c69525e

Initial commit

Browse files
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddc019ba6e602c9f5a017bbc198a03869f04aa594ce3fb9420e22546bddff767
3
+ size 14282885
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf19b41c5bec0a65894cc95fcfb9f59c897c7cab529211199efa8d3f610b5cb2
3
+ size 2451916861
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfa71b730bac45661dcfcf9b188b4e05baac1b1e3be15c45fb24037dc1168128
3
+ size 14511
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c51c8702e03bc24d0566e4b37e893acfcfe0af87dcc3883f22471d07682d7ac5
3
+ size 557
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ab6591ca0c57bc9dc3e78ae2ed403a4b6285c7dac165767c4dac3b4547fc798
3
+ size 627
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "ar_AR",
4
+ "cs_CZ",
5
+ "de_DE",
6
+ "en_XX",
7
+ "es_XX",
8
+ "et_EE",
9
+ "fi_FI",
10
+ "fr_XX",
11
+ "gu_IN",
12
+ "hi_IN",
13
+ "it_IT",
14
+ "ja_XX",
15
+ "kk_KZ",
16
+ "ko_KR",
17
+ "lt_LT",
18
+ "lv_LV",
19
+ "my_MM",
20
+ "ne_NP",
21
+ "nl_XX",
22
+ "ro_RO",
23
+ "ru_RU",
24
+ "si_LK",
25
+ "tr_TR",
26
+ "vi_VN",
27
+ "zh_CN",
28
+ "af_ZA",
29
+ "az_AZ",
30
+ "bn_IN",
31
+ "fa_IR",
32
+ "he_IL",
33
+ "hr_HR",
34
+ "id_ID",
35
+ "ka_GE",
36
+ "km_KH",
37
+ "mk_MK",
38
+ "ml_IN",
39
+ "mn_MN",
40
+ "mr_IN",
41
+ "pl_PL",
42
+ "ps_AF",
43
+ "pt_XX",
44
+ "sv_SE",
45
+ "sw_KE",
46
+ "ta_IN",
47
+ "te_IN",
48
+ "th_TH",
49
+ "tl_XX",
50
+ "uk_UA",
51
+ "ur_PK",
52
+ "xh_ZA",
53
+ "gl_ES",
54
+ "sl_SI"
55
+ ],
56
+ "bos_token": "<s>",
57
+ "cls_token": "<s>",
58
+ "eos_token": "</s>",
59
+ "mask_token": "<mask>",
60
+ "pad_token": "<pad>",
61
+ "sep_token": "</s>",
62
+ "unk_token": "<unk>"
63
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "ar_AR",
4
+ "cs_CZ",
5
+ "de_DE",
6
+ "en_XX",
7
+ "es_XX",
8
+ "et_EE",
9
+ "fi_FI",
10
+ "fr_XX",
11
+ "gu_IN",
12
+ "hi_IN",
13
+ "it_IT",
14
+ "ja_XX",
15
+ "kk_KZ",
16
+ "ko_KR",
17
+ "lt_LT",
18
+ "lv_LV",
19
+ "my_MM",
20
+ "ne_NP",
21
+ "nl_XX",
22
+ "ro_RO",
23
+ "ru_RU",
24
+ "si_LK",
25
+ "tr_TR",
26
+ "vi_VN",
27
+ "zh_CN",
28
+ "af_ZA",
29
+ "az_AZ",
30
+ "bn_IN",
31
+ "fa_IR",
32
+ "he_IL",
33
+ "hr_HR",
34
+ "id_ID",
35
+ "ka_GE",
36
+ "km_KH",
37
+ "mk_MK",
38
+ "ml_IN",
39
+ "mn_MN",
40
+ "mr_IN",
41
+ "pl_PL",
42
+ "ps_AF",
43
+ "pt_XX",
44
+ "sv_SE",
45
+ "sw_KE",
46
+ "ta_IN",
47
+ "te_IN",
48
+ "th_TH",
49
+ "tl_XX",
50
+ "uk_UA",
51
+ "ur_PK",
52
+ "xh_ZA",
53
+ "gl_ES",
54
+ "sl_SI"
55
+ ],
56
+ "bos_token": "<s>",
57
+ "cls_token": "<s>",
58
+ "eos_token": "</s>",
59
+ "language_codes": "ML50",
60
+ "mask_token": {
61
+ "__type": "AddedToken",
62
+ "content": "<mask>",
63
+ "lstrip": true,
64
+ "normalized": true,
65
+ "rstrip": false,
66
+ "single_word": false
67
+ },
68
+ "model_max_length": 1000000000000000019884624838656,
69
+ "pad_token": "<pad>",
70
+ "sep_token": "</s>",
71
+ "sp_model_kwargs": {},
72
+ "special_tokens_map_file": "special_tokens_map.json",
73
+ "src_lang": null,
74
+ "tgt_lang": null,
75
+ "tokenizer_class": "MBart50Tokenizer",
76
+ "tokenizer_file": null,
77
+ "unk_token": "<unk>"
78
+ }
trainer_state.json ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.347782850265503,
3
+ "best_model_checkpoint": "/home/azawahry_sunbird_ai/models/m2e/mbart-luganda-peft-1681982976/checkpoint-150",
4
+ "epoch": 0.8625051750310502,
5
+ "global_step": 150,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "eval_BLEU_lug": 36.0264,
13
+ "eval_BLEU_mean": 36.0264,
14
+ "eval_loss": 2.8559389114379883,
15
+ "eval_runtime": 47.4669,
16
+ "eval_samples_per_second": 10.534,
17
+ "eval_steps_per_second": 1.327,
18
+ "step": 10
19
+ },
20
+ {
21
+ "epoch": 0.12,
22
+ "eval_BLEU_lug": 37.8444,
23
+ "eval_BLEU_mean": 37.8444,
24
+ "eval_loss": 2.4016377925872803,
25
+ "eval_runtime": 47.7157,
26
+ "eval_samples_per_second": 10.479,
27
+ "eval_steps_per_second": 1.32,
28
+ "step": 20
29
+ },
30
+ {
31
+ "epoch": 0.17,
32
+ "eval_BLEU_lug": 38.246,
33
+ "eval_BLEU_mean": 38.246,
34
+ "eval_loss": 2.3729116916656494,
35
+ "eval_runtime": 47.5616,
36
+ "eval_samples_per_second": 10.513,
37
+ "eval_steps_per_second": 1.325,
38
+ "step": 30
39
+ },
40
+ {
41
+ "epoch": 0.23,
42
+ "eval_BLEU_lug": 38.4637,
43
+ "eval_BLEU_mean": 38.4637,
44
+ "eval_loss": 2.367535352706909,
45
+ "eval_runtime": 47.7816,
46
+ "eval_samples_per_second": 10.464,
47
+ "eval_steps_per_second": 1.318,
48
+ "step": 40
49
+ },
50
+ {
51
+ "epoch": 0.29,
52
+ "eval_BLEU_lug": 38.4294,
53
+ "eval_BLEU_mean": 38.4294,
54
+ "eval_loss": 2.3612842559814453,
55
+ "eval_runtime": 48.137,
56
+ "eval_samples_per_second": 10.387,
57
+ "eval_steps_per_second": 1.309,
58
+ "step": 50
59
+ },
60
+ {
61
+ "epoch": 0.35,
62
+ "eval_BLEU_lug": 39.2099,
63
+ "eval_BLEU_mean": 39.2099,
64
+ "eval_loss": 2.3571784496307373,
65
+ "eval_runtime": 46.703,
66
+ "eval_samples_per_second": 10.706,
67
+ "eval_steps_per_second": 1.349,
68
+ "step": 60
69
+ },
70
+ {
71
+ "epoch": 0.4,
72
+ "eval_BLEU_lug": 38.6832,
73
+ "eval_BLEU_mean": 38.6832,
74
+ "eval_loss": 2.35516095161438,
75
+ "eval_runtime": 47.312,
76
+ "eval_samples_per_second": 10.568,
77
+ "eval_steps_per_second": 1.332,
78
+ "step": 70
79
+ },
80
+ {
81
+ "epoch": 0.46,
82
+ "eval_BLEU_lug": 39.1361,
83
+ "eval_BLEU_mean": 39.1361,
84
+ "eval_loss": 2.353118896484375,
85
+ "eval_runtime": 46.6434,
86
+ "eval_samples_per_second": 10.72,
87
+ "eval_steps_per_second": 1.351,
88
+ "step": 80
89
+ },
90
+ {
91
+ "epoch": 0.52,
92
+ "eval_BLEU_lug": 38.9296,
93
+ "eval_BLEU_mean": 38.9296,
94
+ "eval_loss": 2.352966785430908,
95
+ "eval_runtime": 46.5171,
96
+ "eval_samples_per_second": 10.749,
97
+ "eval_steps_per_second": 1.354,
98
+ "step": 90
99
+ },
100
+ {
101
+ "epoch": 0.58,
102
+ "eval_BLEU_lug": 39.1036,
103
+ "eval_BLEU_mean": 39.1036,
104
+ "eval_loss": 2.35172438621521,
105
+ "eval_runtime": 46.318,
106
+ "eval_samples_per_second": 10.795,
107
+ "eval_steps_per_second": 1.36,
108
+ "step": 100
109
+ },
110
+ {
111
+ "epoch": 0.63,
112
+ "eval_BLEU_lug": 39.2866,
113
+ "eval_BLEU_mean": 39.2866,
114
+ "eval_loss": 2.349985361099243,
115
+ "eval_runtime": 46.4059,
116
+ "eval_samples_per_second": 10.774,
117
+ "eval_steps_per_second": 1.358,
118
+ "step": 110
119
+ },
120
+ {
121
+ "epoch": 0.69,
122
+ "eval_BLEU_lug": 39.2961,
123
+ "eval_BLEU_mean": 39.2961,
124
+ "eval_loss": 2.348721742630005,
125
+ "eval_runtime": 46.3187,
126
+ "eval_samples_per_second": 10.795,
127
+ "eval_steps_per_second": 1.36,
128
+ "step": 120
129
+ },
130
+ {
131
+ "epoch": 0.75,
132
+ "eval_BLEU_lug": 39.2138,
133
+ "eval_BLEU_mean": 39.2138,
134
+ "eval_loss": 2.3493213653564453,
135
+ "eval_runtime": 46.9837,
136
+ "eval_samples_per_second": 10.642,
137
+ "eval_steps_per_second": 1.341,
138
+ "step": 130
139
+ },
140
+ {
141
+ "epoch": 0.81,
142
+ "eval_BLEU_lug": 39.2112,
143
+ "eval_BLEU_mean": 39.2112,
144
+ "eval_loss": 2.348573684692383,
145
+ "eval_runtime": 47.285,
146
+ "eval_samples_per_second": 10.574,
147
+ "eval_steps_per_second": 1.332,
148
+ "step": 140
149
+ },
150
+ {
151
+ "epoch": 0.86,
152
+ "eval_BLEU_lug": 39.3584,
153
+ "eval_BLEU_mean": 39.3584,
154
+ "eval_loss": 2.347782850265503,
155
+ "eval_runtime": 46.3784,
156
+ "eval_samples_per_second": 10.781,
157
+ "eval_steps_per_second": 1.358,
158
+ "step": 150
159
+ }
160
+ ],
161
+ "max_steps": 865,
162
+ "num_train_epochs": 5,
163
+ "total_flos": 7.715115655849574e+16,
164
+ "trial_name": null,
165
+ "trial_params": null
166
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c131184b3d322e267d04a1d39eb3dd354b9e594f7d06a637b051e9e67613de9d
3
+ size 3771