viahes commited on
Commit
bf12fbc
·
verified ·
1 Parent(s): bc02d93

Upload folder using huggingface_hub

Browse files
added_tokens.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "<mask>": 60000,
3
  "<text>": 60001
4
  }
 
1
  {
2
+ "<lang:ca>": 60002,
3
+ "<lang:es>": 60003,
4
  "<mask>": 60000,
5
  "<text>": 60001
6
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./NAS-bilingue",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
@@ -42,8 +42,8 @@
42
  "pad_token_id": 0,
43
  "scale_embedding": false,
44
  "torch_dtype": "float32",
45
- "transformers_version": "4.26.1",
46
  "unk_token_id": 3,
47
  "use_cache": true,
48
- "vocab_size": 60002
49
  }
 
1
  {
2
+ "_name_or_path": "./pre-nasca",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
 
42
  "pad_token_id": 0,
43
  "scale_embedding": false,
44
  "torch_dtype": "float32",
45
+ "transformers_version": "4.27.4",
46
  "unk_token_id": 3,
47
  "use_cache": true,
48
+ "vocab_size": 60004
49
  }
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "decoder_start_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "forced_eos_token_id": 2,
6
+ "pad_token_id": 0,
7
+ "transformers_version": "4.27.4"
8
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a31455ecbf8c898491f4668dc2b32fbe7f917b872651863d86ef4759dfdffeb5
3
- size 1661268813
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9df303c733e557cd238216950c2f323fed37a202206277c807099d6026e1b433
3
+ size 1661277005
tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 512
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": 8,
15
- "pad_id": 0,
16
- "pad_type_id": 0,
17
- "pad_token": "<pad>"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
@@ -54,17 +40,17 @@
54
  "special": true
55
  },
56
  {
57
- "id": 1633,
58
- "content": "ES",
59
  "single_word": false,
60
- "lstrip": false,
61
  "rstrip": false,
62
- "normalized": false,
63
  "special": true
64
  },
65
  {
66
- "id": 9559,
67
- "content": "CA",
68
  "single_word": false,
69
  "lstrip": false,
70
  "rstrip": false,
@@ -72,17 +58,17 @@
72
  "special": true
73
  },
74
  {
75
- "id": 60000,
76
- "content": "<mask>",
77
  "single_word": false,
78
- "lstrip": true,
79
  "rstrip": false,
80
- "normalized": true,
81
  "special": true
82
  },
83
  {
84
- "id": 60001,
85
- "content": "<text>",
86
  "single_word": false,
87
  "lstrip": false,
88
  "rstrip": false,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
40
  "special": true
41
  },
42
  {
43
+ "id": 60000,
44
+ "content": "<mask>",
45
  "single_word": false,
46
+ "lstrip": true,
47
  "rstrip": false,
48
+ "normalized": true,
49
  "special": true
50
  },
51
  {
52
+ "id": 60001,
53
+ "content": "<text>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
 
58
  "special": true
59
  },
60
  {
61
+ "id": 60002,
62
+ "content": "<lang:ca>",
63
  "single_word": false,
64
+ "lstrip": false,
65
  "rstrip": false,
66
+ "normalized": false,
67
  "special": true
68
  },
69
  {
70
+ "id": 60003,
71
+ "content": "<lang:es>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -33,8 +33,7 @@
33
  "rstrip": false,
34
  "single_word": false
35
  },
36
- "model_max_length": 512,
37
- "name_or_path": "./NAS-bilingue",
38
  "pad_token": {
39
  "__type": "AddedToken",
40
  "content": "<pad>",
 
33
  "rstrip": false,
34
  "single_word": false
35
  },
36
+ "model_max_length": 1000000000000000019884624838656,
 
37
  "pad_token": {
38
  "__type": "AddedToken",
39
  "content": "<pad>",
trainer_state.json ADDED
@@ -0,0 +1,403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.03879310344827587,
3
+ "best_model_checkpoint": "NAS-Bi-output/20231214-103634/checkpoint-96",
4
+ "epoch": 3.84,
5
+ "global_step": 120,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "learning_rate": 1.3774193548387096e-06,
13
+ "loss": 4.5801,
14
+ "step": 2
15
+ },
16
+ {
17
+ "epoch": 0.13,
18
+ "learning_rate": 1.3548387096774193e-06,
19
+ "loss": 4.5691,
20
+ "step": 4
21
+ },
22
+ {
23
+ "epoch": 0.19,
24
+ "learning_rate": 1.332258064516129e-06,
25
+ "loss": 4.5408,
26
+ "step": 6
27
+ },
28
+ {
29
+ "epoch": 0.26,
30
+ "learning_rate": 1.3096774193548385e-06,
31
+ "loss": 4.3296,
32
+ "step": 8
33
+ },
34
+ {
35
+ "epoch": 0.32,
36
+ "learning_rate": 1.2870967741935482e-06,
37
+ "loss": 4.0271,
38
+ "step": 10
39
+ },
40
+ {
41
+ "epoch": 0.38,
42
+ "learning_rate": 1.264516129032258e-06,
43
+ "loss": 4.3243,
44
+ "step": 12
45
+ },
46
+ {
47
+ "epoch": 0.45,
48
+ "learning_rate": 1.2419354838709678e-06,
49
+ "loss": 4.1559,
50
+ "step": 14
51
+ },
52
+ {
53
+ "epoch": 0.51,
54
+ "learning_rate": 1.2193548387096774e-06,
55
+ "loss": 4.0442,
56
+ "step": 16
57
+ },
58
+ {
59
+ "epoch": 0.58,
60
+ "learning_rate": 1.1967741935483871e-06,
61
+ "loss": 4.1364,
62
+ "step": 18
63
+ },
64
+ {
65
+ "epoch": 0.64,
66
+ "learning_rate": 1.1741935483870968e-06,
67
+ "loss": 3.9896,
68
+ "step": 20
69
+ },
70
+ {
71
+ "epoch": 0.7,
72
+ "learning_rate": 1.1516129032258065e-06,
73
+ "loss": 3.9312,
74
+ "step": 22
75
+ },
76
+ {
77
+ "epoch": 0.77,
78
+ "learning_rate": 1.129032258064516e-06,
79
+ "loss": 4.3079,
80
+ "step": 24
81
+ },
82
+ {
83
+ "epoch": 0.83,
84
+ "learning_rate": 1.1064516129032257e-06,
85
+ "loss": 3.873,
86
+ "step": 26
87
+ },
88
+ {
89
+ "epoch": 0.9,
90
+ "learning_rate": 1.0838709677419354e-06,
91
+ "loss": 4.1228,
92
+ "step": 28
93
+ },
94
+ {
95
+ "epoch": 0.96,
96
+ "learning_rate": 1.061290322580645e-06,
97
+ "loss": 3.8045,
98
+ "step": 30
99
+ },
100
+ {
101
+ "epoch": 1.02,
102
+ "learning_rate": 1.0387096774193547e-06,
103
+ "loss": 3.8668,
104
+ "step": 32
105
+ },
106
+ {
107
+ "epoch": 1.02,
108
+ "eval_loss": 3.629098415374756,
109
+ "eval_rougeLsum": 0.04047338477718224,
110
+ "eval_runtime": 1.6835,
111
+ "eval_samples_per_second": 5.94,
112
+ "eval_steps_per_second": 1.188,
113
+ "step": 32
114
+ },
115
+ {
116
+ "epoch": 1.09,
117
+ "learning_rate": 1.0161290322580644e-06,
118
+ "loss": 3.86,
119
+ "step": 34
120
+ },
121
+ {
122
+ "epoch": 1.15,
123
+ "learning_rate": 9.935483870967741e-07,
124
+ "loss": 3.5875,
125
+ "step": 36
126
+ },
127
+ {
128
+ "epoch": 1.22,
129
+ "learning_rate": 9.709677419354838e-07,
130
+ "loss": 3.8769,
131
+ "step": 38
132
+ },
133
+ {
134
+ "epoch": 1.28,
135
+ "learning_rate": 9.483870967741934e-07,
136
+ "loss": 3.58,
137
+ "step": 40
138
+ },
139
+ {
140
+ "epoch": 1.34,
141
+ "learning_rate": 9.258064516129032e-07,
142
+ "loss": 3.6624,
143
+ "step": 42
144
+ },
145
+ {
146
+ "epoch": 1.41,
147
+ "learning_rate": 9.032258064516129e-07,
148
+ "loss": 3.7948,
149
+ "step": 44
150
+ },
151
+ {
152
+ "epoch": 1.47,
153
+ "learning_rate": 8.806451612903226e-07,
154
+ "loss": 3.6461,
155
+ "step": 46
156
+ },
157
+ {
158
+ "epoch": 1.54,
159
+ "learning_rate": 8.580645161290323e-07,
160
+ "loss": 3.8532,
161
+ "step": 48
162
+ },
163
+ {
164
+ "epoch": 1.6,
165
+ "learning_rate": 8.354838709677419e-07,
166
+ "loss": 3.8078,
167
+ "step": 50
168
+ },
169
+ {
170
+ "epoch": 1.66,
171
+ "learning_rate": 8.129032258064516e-07,
172
+ "loss": 3.6687,
173
+ "step": 52
174
+ },
175
+ {
176
+ "epoch": 1.73,
177
+ "learning_rate": 7.903225806451613e-07,
178
+ "loss": 3.6238,
179
+ "step": 54
180
+ },
181
+ {
182
+ "epoch": 1.79,
183
+ "learning_rate": 7.677419354838709e-07,
184
+ "loss": 3.3701,
185
+ "step": 56
186
+ },
187
+ {
188
+ "epoch": 1.86,
189
+ "learning_rate": 7.451612903225806e-07,
190
+ "loss": 3.6845,
191
+ "step": 58
192
+ },
193
+ {
194
+ "epoch": 1.92,
195
+ "learning_rate": 7.225806451612903e-07,
196
+ "loss": 3.5206,
197
+ "step": 60
198
+ },
199
+ {
200
+ "epoch": 1.98,
201
+ "learning_rate": 7e-07,
202
+ "loss": 3.6524,
203
+ "step": 62
204
+ },
205
+ {
206
+ "epoch": 2.05,
207
+ "learning_rate": 6.774193548387097e-07,
208
+ "loss": 3.5519,
209
+ "step": 64
210
+ },
211
+ {
212
+ "epoch": 2.05,
213
+ "eval_loss": 3.363889694213867,
214
+ "eval_rougeLsum": 0.03520105471324984,
215
+ "eval_runtime": 0.7217,
216
+ "eval_samples_per_second": 13.857,
217
+ "eval_steps_per_second": 2.771,
218
+ "step": 64
219
+ },
220
+ {
221
+ "epoch": 2.11,
222
+ "learning_rate": 6.548387096774192e-07,
223
+ "loss": 3.534,
224
+ "step": 66
225
+ },
226
+ {
227
+ "epoch": 2.18,
228
+ "learning_rate": 6.32258064516129e-07,
229
+ "loss": 3.4723,
230
+ "step": 68
231
+ },
232
+ {
233
+ "epoch": 2.24,
234
+ "learning_rate": 6.096774193548387e-07,
235
+ "loss": 3.4788,
236
+ "step": 70
237
+ },
238
+ {
239
+ "epoch": 2.3,
240
+ "learning_rate": 5.870967741935484e-07,
241
+ "loss": 3.5552,
242
+ "step": 72
243
+ },
244
+ {
245
+ "epoch": 2.37,
246
+ "learning_rate": 5.64516129032258e-07,
247
+ "loss": 3.6136,
248
+ "step": 74
249
+ },
250
+ {
251
+ "epoch": 2.43,
252
+ "learning_rate": 5.419354838709677e-07,
253
+ "loss": 3.4261,
254
+ "step": 76
255
+ },
256
+ {
257
+ "epoch": 2.5,
258
+ "learning_rate": 5.193548387096774e-07,
259
+ "loss": 3.2605,
260
+ "step": 78
261
+ },
262
+ {
263
+ "epoch": 2.56,
264
+ "learning_rate": 4.967741935483871e-07,
265
+ "loss": 3.4041,
266
+ "step": 80
267
+ },
268
+ {
269
+ "epoch": 2.62,
270
+ "learning_rate": 4.741935483870967e-07,
271
+ "loss": 3.4199,
272
+ "step": 82
273
+ },
274
+ {
275
+ "epoch": 2.69,
276
+ "learning_rate": 4.5161290322580644e-07,
277
+ "loss": 3.5787,
278
+ "step": 84
279
+ },
280
+ {
281
+ "epoch": 2.75,
282
+ "learning_rate": 4.2903225806451613e-07,
283
+ "loss": 3.2749,
284
+ "step": 86
285
+ },
286
+ {
287
+ "epoch": 2.82,
288
+ "learning_rate": 4.064516129032258e-07,
289
+ "loss": 3.2766,
290
+ "step": 88
291
+ },
292
+ {
293
+ "epoch": 2.88,
294
+ "learning_rate": 3.8387096774193545e-07,
295
+ "loss": 3.5915,
296
+ "step": 90
297
+ },
298
+ {
299
+ "epoch": 2.94,
300
+ "learning_rate": 3.6129032258064514e-07,
301
+ "loss": 3.4801,
302
+ "step": 92
303
+ },
304
+ {
305
+ "epoch": 3.01,
306
+ "learning_rate": 3.3870967741935483e-07,
307
+ "loss": 3.4917,
308
+ "step": 94
309
+ },
310
+ {
311
+ "epoch": 3.07,
312
+ "learning_rate": 3.161290322580645e-07,
313
+ "loss": 3.4979,
314
+ "step": 96
315
+ },
316
+ {
317
+ "epoch": 3.07,
318
+ "eval_loss": 3.2409491539001465,
319
+ "eval_rougeLsum": 0.03879310344827587,
320
+ "eval_runtime": 0.7453,
321
+ "eval_samples_per_second": 13.417,
322
+ "eval_steps_per_second": 2.683,
323
+ "step": 96
324
+ },
325
+ {
326
+ "epoch": 3.14,
327
+ "learning_rate": 2.935483870967742e-07,
328
+ "loss": 3.4648,
329
+ "step": 98
330
+ },
331
+ {
332
+ "epoch": 3.2,
333
+ "learning_rate": 2.7096774193548384e-07,
334
+ "loss": 3.4253,
335
+ "step": 100
336
+ },
337
+ {
338
+ "epoch": 3.26,
339
+ "learning_rate": 2.4838709677419353e-07,
340
+ "loss": 3.3574,
341
+ "step": 102
342
+ },
343
+ {
344
+ "epoch": 3.33,
345
+ "learning_rate": 2.2580645161290322e-07,
346
+ "loss": 3.4262,
347
+ "step": 104
348
+ },
349
+ {
350
+ "epoch": 3.39,
351
+ "learning_rate": 2.032258064516129e-07,
352
+ "loss": 3.2748,
353
+ "step": 106
354
+ },
355
+ {
356
+ "epoch": 3.46,
357
+ "learning_rate": 1.8064516129032257e-07,
358
+ "loss": 3.4467,
359
+ "step": 108
360
+ },
361
+ {
362
+ "epoch": 3.52,
363
+ "learning_rate": 1.5806451612903226e-07,
364
+ "loss": 3.3972,
365
+ "step": 110
366
+ },
367
+ {
368
+ "epoch": 3.58,
369
+ "learning_rate": 1.3548387096774192e-07,
370
+ "loss": 3.0868,
371
+ "step": 112
372
+ },
373
+ {
374
+ "epoch": 3.65,
375
+ "learning_rate": 1.1290322580645161e-07,
376
+ "loss": 3.4098,
377
+ "step": 114
378
+ },
379
+ {
380
+ "epoch": 3.71,
381
+ "learning_rate": 9.032258064516128e-08,
382
+ "loss": 3.3889,
383
+ "step": 116
384
+ },
385
+ {
386
+ "epoch": 3.78,
387
+ "learning_rate": 6.774193548387096e-08,
388
+ "loss": 3.4246,
389
+ "step": 118
390
+ },
391
+ {
392
+ "epoch": 3.84,
393
+ "learning_rate": 4.516129032258064e-08,
394
+ "loss": 3.1682,
395
+ "step": 120
396
+ }
397
+ ],
398
+ "max_steps": 124,
399
+ "num_train_epochs": 4,
400
+ "total_flos": 4160840836055040.0,
401
+ "trial_name": null,
402
+ "trial_params": null
403
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81e3b61835cb9e885736d6b9d47f384fbb2e248cd8a93217b0f801e836862e29
3
- size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d5ba13682c76280a9fb401c44134bf15d7c6a0453d3e2424624c79587d7b9a3
3
+ size 3707