Suparnpreet commited on
Commit
1010106
·
verified ·
1 Parent(s): 895315d

Upload folder using huggingface_hub

Browse files
._config.json ADDED
Binary file (4.1 kB). View file
 
._generation_config.json ADDED
Binary file (4.1 kB). View file
 
._model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3801dbdad4d308346f2906e65245e4a34276edd53895dc22506cbb9a1d2084d1
3
+ size 4096
._optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3801dbdad4d308346f2906e65245e4a34276edd53895dc22506cbb9a1d2084d1
3
+ size 4096
._rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3801dbdad4d308346f2906e65245e4a34276edd53895dc22506cbb9a1d2084d1
3
+ size 4096
._scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3801dbdad4d308346f2906e65245e4a34276edd53895dc22506cbb9a1d2084d1
3
+ size 4096
._source.spm ADDED
Binary file (4.1 kB). View file
 
._special_tokens_map.json ADDED
Binary file (4.1 kB). View file
 
._target.spm ADDED
Binary file (4.1 kB). View file
 
._tokenizer_config.json ADDED
Binary file (4.1 kB). View file
 
._trainer_state.json ADDED
Binary file (4.1 kB). View file
 
._training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3801dbdad4d308346f2906e65245e4a34276edd53895dc22506cbb9a1d2084d1
3
+ size 4096
._vocab.json ADDED
Binary file (4.1 kB). View file
 
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ source.spm filter=lfs diff=lfs merge=lfs -text
37
+ target.spm filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_num_labels": 3,
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "swish",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "MarianMTModel"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "classif_dropout": 0.0,
12
+ "classifier_dropout": 0.0,
13
+ "d_model": 512,
14
+ "decoder_attention_heads": 8,
15
+ "decoder_ffn_dim": 2048,
16
+ "decoder_layerdrop": 0.0,
17
+ "decoder_layers": 6,
18
+ "decoder_start_token_id": 58100,
19
+ "decoder_vocab_size": 58101,
20
+ "dropout": 0.1,
21
+ "dtype": "float32",
22
+ "encoder_attention_heads": 8,
23
+ "encoder_ffn_dim": 2048,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 6,
26
+ "eos_token_id": 0,
27
+ "forced_eos_token_id": 0,
28
+ "gradient_checkpointing": false,
29
+ "id2label": {
30
+ "0": "LABEL_0",
31
+ "1": "LABEL_1",
32
+ "2": "LABEL_2"
33
+ },
34
+ "init_std": 0.02,
35
+ "is_encoder_decoder": true,
36
+ "label2id": {
37
+ "LABEL_0": 0,
38
+ "LABEL_1": 1,
39
+ "LABEL_2": 2
40
+ },
41
+ "max_length": null,
42
+ "max_position_embeddings": 512,
43
+ "model_type": "marian",
44
+ "normalize_before": false,
45
+ "normalize_embedding": false,
46
+ "num_beams": null,
47
+ "num_hidden_layers": 6,
48
+ "pad_token_id": 58100,
49
+ "scale_embedding": true,
50
+ "share_encoder_decoder_embeddings": true,
51
+ "static_position_embeddings": true,
52
+ "transformers_version": "4.57.3",
53
+ "use_cache": true,
54
+ "vocab_size": 58101
55
+ }
generation_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 58100
5
+ ]
6
+ ],
7
+ "decoder_start_token_id": 58100,
8
+ "eos_token_id": [
9
+ 0
10
+ ],
11
+ "forced_eos_token_id": 0,
12
+ "max_length": 512,
13
+ "num_beams": 4,
14
+ "pad_token_id": 58100,
15
+ "renormalize_logits": true,
16
+ "transformers_version": "4.57.3"
17
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e98ee8feed1fa7392d59e5b3515cf0fdfcc8bd30b436178cf9622765271dc69
3
+ size 295806292
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0f51c8af8547505510d472f25993936c24a774e300283d2cfec75cd90cdeb85
3
+ size 591305163
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91d07eb4c944a6b3868be18a8724f682beab13a83c89a2061d77cc22af3eee9d
3
+ size 14645
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af0f179fcd2b46d1c9c2abe0b5e3e7342bff81cbf3ed4bedcf58303839e5d71c
3
+ size 1465
source.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:678f2a1177d8389f67b66299762dcc4fc567e89b07e212ba91b0c56daecf47ce
3
+ size 768489
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
target.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbd1f495eea99c8e21ae086d9146e0fa7b096c3dfdd9ba07ab8b631889df5c9b
3
+ size 796845
tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "</s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "58100": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "clean_up_tokenization_spaces": false,
29
+ "eos_token": "</s>",
30
+ "extra_special_tokens": {},
31
+ "model_max_length": 512,
32
+ "pad_token": "<pad>",
33
+ "separate_vocabs": false,
34
+ "source_lang": "en",
35
+ "sp_model_kwargs": {},
36
+ "target_lang": "de",
37
+ "tokenizer_class": "MarianTokenizer",
38
+ "unk_token": "<unk>"
39
+ }
trainer_state.json ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 21930,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.11399908800729594,
14
+ "grad_norm": 1.4812531471252441,
15
+ "learning_rate": 1.9544915640674875e-05,
16
+ "loss": 0.1287,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.22799817601459188,
21
+ "grad_norm": 2.198870897293091,
22
+ "learning_rate": 1.9088919288645692e-05,
23
+ "loss": 0.1045,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.34199726402188785,
28
+ "grad_norm": 1.3517733812332153,
29
+ "learning_rate": 1.863292293661651e-05,
30
+ "loss": 0.0895,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.45599635202918376,
35
+ "grad_norm": 1.352580189704895,
36
+ "learning_rate": 1.8176926584587324e-05,
37
+ "loss": 0.0766,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.5699954400364797,
42
+ "grad_norm": 1.960016131401062,
43
+ "learning_rate": 1.772093023255814e-05,
44
+ "loss": 0.0713,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.6839945280437757,
49
+ "grad_norm": 0.5876455903053284,
50
+ "learning_rate": 1.7264933880528958e-05,
51
+ "loss": 0.0657,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.7979936160510716,
56
+ "grad_norm": 0.9299254417419434,
57
+ "learning_rate": 1.6808937528499772e-05,
58
+ "loss": 0.0623,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.9119927040583675,
63
+ "grad_norm": 1.3892526626586914,
64
+ "learning_rate": 1.635294117647059e-05,
65
+ "loss": 0.0621,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 1.0259917920656634,
70
+ "grad_norm": 1.7030184268951416,
71
+ "learning_rate": 1.5896944824441403e-05,
72
+ "loss": 0.0585,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 1.1399908800729595,
77
+ "grad_norm": 0.9616603255271912,
78
+ "learning_rate": 1.5440948472412224e-05,
79
+ "loss": 0.0448,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 1.2539899680802553,
84
+ "grad_norm": 1.245125412940979,
85
+ "learning_rate": 1.4984952120383038e-05,
86
+ "loss": 0.0429,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 1.3679890560875512,
91
+ "grad_norm": 1.2846943140029907,
92
+ "learning_rate": 1.4528955768353854e-05,
93
+ "loss": 0.0408,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 1.4819881440948472,
98
+ "grad_norm": 1.0010948181152344,
99
+ "learning_rate": 1.407295941632467e-05,
100
+ "loss": 0.0386,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 1.5959872321021433,
105
+ "grad_norm": 1.1298205852508545,
106
+ "learning_rate": 1.3616963064295488e-05,
107
+ "loss": 0.0364,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 1.7099863201094392,
112
+ "grad_norm": 0.5932161211967468,
113
+ "learning_rate": 1.3160966712266304e-05,
114
+ "loss": 0.0344,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 1.823985408116735,
119
+ "grad_norm": 1.195202112197876,
120
+ "learning_rate": 1.270497036023712e-05,
121
+ "loss": 0.033,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 1.937984496124031,
126
+ "grad_norm": 1.2507325410842896,
127
+ "learning_rate": 1.2248974008207935e-05,
128
+ "loss": 0.0321,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 2.0519835841313268,
133
+ "grad_norm": 2.5148258209228516,
134
+ "learning_rate": 1.1792977656178753e-05,
135
+ "loss": 0.0278,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 2.165982672138623,
140
+ "grad_norm": 0.7329283356666565,
141
+ "learning_rate": 1.1336981304149568e-05,
142
+ "loss": 0.0233,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 2.279981760145919,
147
+ "grad_norm": 0.42915332317352295,
148
+ "learning_rate": 1.0880984952120384e-05,
149
+ "loss": 0.0226,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 2.3939808481532148,
154
+ "grad_norm": 0.7090346813201904,
155
+ "learning_rate": 1.04249886000912e-05,
156
+ "loss": 0.0223,
157
+ "step": 10500
158
+ },
159
+ {
160
+ "epoch": 2.5079799361605106,
161
+ "grad_norm": 0.6480665802955627,
162
+ "learning_rate": 9.968992248062017e-06,
163
+ "loss": 0.0204,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 2.621979024167807,
168
+ "grad_norm": 1.7029985189437866,
169
+ "learning_rate": 9.512995896032832e-06,
170
+ "loss": 0.0221,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 2.7359781121751023,
175
+ "grad_norm": 0.9722331166267395,
176
+ "learning_rate": 9.056999544003648e-06,
177
+ "loss": 0.0208,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 2.8499772001823986,
182
+ "grad_norm": 0.3082011938095093,
183
+ "learning_rate": 8.601003191974465e-06,
184
+ "loss": 0.0204,
185
+ "step": 12500
186
+ },
187
+ {
188
+ "epoch": 2.9639762881896945,
189
+ "grad_norm": 0.33128827810287476,
190
+ "learning_rate": 8.145006839945281e-06,
191
+ "loss": 0.0207,
192
+ "step": 13000
193
+ },
194
+ {
195
+ "epoch": 3.0779753761969904,
196
+ "grad_norm": 0.7698410749435425,
197
+ "learning_rate": 7.689010487916098e-06,
198
+ "loss": 0.0171,
199
+ "step": 13500
200
+ },
201
+ {
202
+ "epoch": 3.191974464204286,
203
+ "grad_norm": 0.7875366806983948,
204
+ "learning_rate": 7.233014135886913e-06,
205
+ "loss": 0.0156,
206
+ "step": 14000
207
+ },
208
+ {
209
+ "epoch": 3.305973552211582,
210
+ "grad_norm": 0.6971263289451599,
211
+ "learning_rate": 6.77701778385773e-06,
212
+ "loss": 0.0157,
213
+ "step": 14500
214
+ },
215
+ {
216
+ "epoch": 3.4199726402188784,
217
+ "grad_norm": 0.464100182056427,
218
+ "learning_rate": 6.321021431828546e-06,
219
+ "loss": 0.0143,
220
+ "step": 15000
221
+ },
222
+ {
223
+ "epoch": 3.5339717282261742,
224
+ "grad_norm": 0.5803436636924744,
225
+ "learning_rate": 5.8650250797993626e-06,
226
+ "loss": 0.0147,
227
+ "step": 15500
228
+ },
229
+ {
230
+ "epoch": 3.64797081623347,
231
+ "grad_norm": 0.7879688739776611,
232
+ "learning_rate": 5.409028727770178e-06,
233
+ "loss": 0.0137,
234
+ "step": 16000
235
+ },
236
+ {
237
+ "epoch": 3.761969904240766,
238
+ "grad_norm": 1.0888534784317017,
239
+ "learning_rate": 4.953032375740995e-06,
240
+ "loss": 0.0139,
241
+ "step": 16500
242
+ },
243
+ {
244
+ "epoch": 3.875968992248062,
245
+ "grad_norm": 0.3893286883831024,
246
+ "learning_rate": 4.49703602371181e-06,
247
+ "loss": 0.0137,
248
+ "step": 17000
249
+ },
250
+ {
251
+ "epoch": 3.989968080255358,
252
+ "grad_norm": 0.4917721748352051,
253
+ "learning_rate": 4.041039671682627e-06,
254
+ "loss": 0.0136,
255
+ "step": 17500
256
+ },
257
+ {
258
+ "epoch": 4.1039671682626535,
259
+ "grad_norm": 1.25238835811615,
260
+ "learning_rate": 3.585043319653443e-06,
261
+ "loss": 0.0117,
262
+ "step": 18000
263
+ },
264
+ {
265
+ "epoch": 4.21796625626995,
266
+ "grad_norm": 0.9064317941665649,
267
+ "learning_rate": 3.129046967624259e-06,
268
+ "loss": 0.0108,
269
+ "step": 18500
270
+ },
271
+ {
272
+ "epoch": 4.331965344277246,
273
+ "grad_norm": 0.4522368311882019,
274
+ "learning_rate": 2.6730506155950754e-06,
275
+ "loss": 0.0105,
276
+ "step": 19000
277
+ },
278
+ {
279
+ "epoch": 4.4459644322845415,
280
+ "grad_norm": 1.0190762281417847,
281
+ "learning_rate": 2.217054263565892e-06,
282
+ "loss": 0.0108,
283
+ "step": 19500
284
+ },
285
+ {
286
+ "epoch": 4.559963520291838,
287
+ "grad_norm": 0.23909200727939606,
288
+ "learning_rate": 1.7610579115367079e-06,
289
+ "loss": 0.0114,
290
+ "step": 20000
291
+ },
292
+ {
293
+ "epoch": 4.673962608299133,
294
+ "grad_norm": 0.8679990768432617,
295
+ "learning_rate": 1.3050615595075241e-06,
296
+ "loss": 0.0105,
297
+ "step": 20500
298
+ },
299
+ {
300
+ "epoch": 4.7879616963064295,
301
+ "grad_norm": 0.4421948790550232,
302
+ "learning_rate": 8.490652074783402e-07,
303
+ "loss": 0.0109,
304
+ "step": 21000
305
+ },
306
+ {
307
+ "epoch": 4.901960784313726,
308
+ "grad_norm": 1.2229384183883667,
309
+ "learning_rate": 3.930688554491564e-07,
310
+ "loss": 0.0103,
311
+ "step": 21500
312
+ }
313
+ ],
314
+ "logging_steps": 500,
315
+ "max_steps": 21930,
316
+ "num_input_tokens_seen": 0,
317
+ "num_train_epochs": 5,
318
+ "save_steps": 500,
319
+ "stateful_callbacks": {
320
+ "TrainerControl": {
321
+ "args": {
322
+ "should_epoch_stop": false,
323
+ "should_evaluate": false,
324
+ "should_log": false,
325
+ "should_save": true,
326
+ "should_training_stop": true
327
+ },
328
+ "attributes": {}
329
+ }
330
+ },
331
+ "total_flos": 2352410932543488.0,
332
+ "train_batch_size": 16,
333
+ "trial_name": null,
334
+ "trial_params": null
335
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f608511ca661166de5dce1702aea3cbe71cb65cd4a9a74595d843d5f73fdf73c
3
+ size 6033
vocab.json ADDED
The diff for this file is too large to render. See raw diff