freQuensy23 commited on
Commit
a1c53d5
·
verified ·
1 Parent(s): 5efeaf4

Upload 7 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "child_abuse",
14
+ "1": "law",
15
+ "2": "rape",
16
+ "3": "suicide",
17
+ "4": "zoo",
18
+ "5": "med",
19
+ "6": "discrimination",
20
+ "7": "financial",
21
+ "8": "cannibalism",
22
+ "9": "necro",
23
+ "10": "confidentiality",
24
+ "11": "politics",
25
+ "12": "religious",
26
+ "13": "incest"
27
+ },
28
+ "initializer_range": 0.02,
29
+ "intermediate_size": 4096,
30
+ "label2id": {
31
+ "child_abuse": 0,
32
+ "law": 1,
33
+ "rape": 2,
34
+ "suicide": 3,
35
+ "zoo": 4,
36
+ "med": 5,
37
+ "discrimination": 6,
38
+ "financial": 7,
39
+ "cannibalism": 8,
40
+ "necro": 9,
41
+ "confidentiality": 10,
42
+ "politics": 11,
43
+ "religious": 12,
44
+ "incest": 13
45
+ },
46
+ "layer_norm_eps": 1e-05,
47
+ "max_position_embeddings": 514,
48
+ "model_type": "xlm-roberta",
49
+ "num_attention_heads": 16,
50
+ "num_hidden_layers": 24,
51
+ "output_past": true,
52
+ "pad_token_id": 1,
53
+ "position_embedding_type": "absolute",
54
+ "problem_type": "multi_label_classification",
55
+ "torch_dtype": "float32",
56
+ "transformers_version": "4.51.3",
57
+ "type_vocab_size": 1,
58
+ "use_cache": true,
59
+ "vocab_size": 250002
60
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83c1b840e29c1b16e2546478720eb1b650f0bf46c4af4c4a0f5720ffa7f50fa8
3
+ size 2239667872
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca8d7aee44c40793046bba245fe9922f17eb4b2d79c92c4f49493e249974025
3
+ size 1465
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0091a328b3441d754e481db5a390d7f3b8dabc6016869fd13ba350d23ddc4cd
3
+ size 17082832
trainer_state.json ADDED
@@ -0,0 +1,471 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 30.979827089337174,
6
+ "eval_steps": 1200,
7
+ "global_step": 21500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.7204610951008645,
14
+ "grad_norm": 0.19696219265460968,
15
+ "learning_rate": 0.0004967269843558504,
16
+ "loss": 0.115,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.440922190201729,
21
+ "grad_norm": 0.18757623434066772,
22
+ "learning_rate": 0.0004931223415759498,
23
+ "loss": 0.0605,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 1.729106628242075,
28
+ "eval_loss": 0.051301125437021255,
29
+ "eval_runtime": 20.8394,
30
+ "eval_samples_per_second": 111.039,
31
+ "eval_steps_per_second": 0.096,
32
+ "step": 1200
33
+ },
34
+ {
35
+ "epoch": 2.161383285302594,
36
+ "grad_norm": 0.23383216559886932,
37
+ "learning_rate": 0.0004895176987960493,
38
+ "loss": 0.06,
39
+ "step": 1500
40
+ },
41
+ {
42
+ "epoch": 2.881844380403458,
43
+ "grad_norm": 0.8578475713729858,
44
+ "learning_rate": 0.00048591305601614884,
45
+ "loss": 0.045,
46
+ "step": 2000
47
+ },
48
+ {
49
+ "epoch": 3.4582132564841497,
50
+ "eval_loss": 0.048763249069452286,
51
+ "eval_runtime": 21.4687,
52
+ "eval_samples_per_second": 107.785,
53
+ "eval_steps_per_second": 0.093,
54
+ "step": 2400
55
+ },
56
+ {
57
+ "epoch": 3.602305475504323,
58
+ "grad_norm": 0.5358479619026184,
59
+ "learning_rate": 0.0004823084132362483,
60
+ "loss": 0.034,
61
+ "step": 2500
62
+ },
63
+ {
64
+ "epoch": 4.322766570605188,
65
+ "grad_norm": 0.3984196186065674,
66
+ "learning_rate": 0.0004787037704563478,
67
+ "loss": 0.0278,
68
+ "step": 3000
69
+ },
70
+ {
71
+ "epoch": 5.043227665706052,
72
+ "grad_norm": 0.5611603856086731,
73
+ "learning_rate": 0.00047509912767644725,
74
+ "loss": 0.021,
75
+ "step": 3500
76
+ },
77
+ {
78
+ "epoch": 5.187319884726225,
79
+ "eval_loss": 0.05793336406350136,
80
+ "eval_runtime": 21.4122,
81
+ "eval_samples_per_second": 108.069,
82
+ "eval_steps_per_second": 0.093,
83
+ "step": 3600
84
+ },
85
+ {
86
+ "epoch": 5.763688760806916,
87
+ "grad_norm": 0.33251306414604187,
88
+ "learning_rate": 0.0004714944848965468,
89
+ "loss": 0.019,
90
+ "step": 4000
91
+ },
92
+ {
93
+ "epoch": 6.484149855907781,
94
+ "grad_norm": 0.5683927536010742,
95
+ "learning_rate": 0.00046788984211664625,
96
+ "loss": 0.0168,
97
+ "step": 4500
98
+ },
99
+ {
100
+ "epoch": 6.916426512968299,
101
+ "eval_loss": 0.047444652765989304,
102
+ "eval_runtime": 21.5287,
103
+ "eval_samples_per_second": 107.484,
104
+ "eval_steps_per_second": 0.093,
105
+ "step": 4800
106
+ },
107
+ {
108
+ "epoch": 7.204610951008646,
109
+ "grad_norm": 1.6696492433547974,
110
+ "learning_rate": 0.0004642851993367457,
111
+ "loss": 0.0153,
112
+ "step": 5000
113
+ },
114
+ {
115
+ "epoch": 7.92507204610951,
116
+ "grad_norm": 0.6783491373062134,
117
+ "learning_rate": 0.0004606805565568452,
118
+ "loss": 0.0116,
119
+ "step": 5500
120
+ },
121
+ {
122
+ "epoch": 8.645533141210375,
123
+ "grad_norm": 0.4771524667739868,
124
+ "learning_rate": 0.0004570759137769447,
125
+ "loss": 0.0118,
126
+ "step": 6000
127
+ },
128
+ {
129
+ "epoch": 8.645533141210375,
130
+ "eval_loss": 0.06448203325271606,
131
+ "eval_runtime": 20.7245,
132
+ "eval_samples_per_second": 111.655,
133
+ "eval_steps_per_second": 0.097,
134
+ "step": 6000
135
+ },
136
+ {
137
+ "epoch": 9.36599423631124,
138
+ "grad_norm": 0.45867717266082764,
139
+ "learning_rate": 0.0004534712709970442,
140
+ "loss": 0.0095,
141
+ "step": 6500
142
+ },
143
+ {
144
+ "epoch": 10.086455331412104,
145
+ "grad_norm": 1.0143071413040161,
146
+ "learning_rate": 0.0004498666282171437,
147
+ "loss": 0.0081,
148
+ "step": 7000
149
+ },
150
+ {
151
+ "epoch": 10.37463976945245,
152
+ "eval_loss": 0.059642400592565536,
153
+ "eval_runtime": 20.5013,
154
+ "eval_samples_per_second": 112.871,
155
+ "eval_steps_per_second": 0.098,
156
+ "step": 7200
157
+ },
158
+ {
159
+ "epoch": 10.806916426512968,
160
+ "grad_norm": 0.34545987844467163,
161
+ "learning_rate": 0.0004462619854372432,
162
+ "loss": 0.0077,
163
+ "step": 7500
164
+ },
165
+ {
166
+ "epoch": 11.527377521613833,
167
+ "grad_norm": 0.6745367050170898,
168
+ "learning_rate": 0.00044265734265734266,
169
+ "loss": 0.0073,
170
+ "step": 8000
171
+ },
172
+ {
173
+ "epoch": 12.103746397694524,
174
+ "eval_loss": 0.057360123842954636,
175
+ "eval_runtime": 21.5407,
176
+ "eval_samples_per_second": 107.425,
177
+ "eval_steps_per_second": 0.093,
178
+ "step": 8400
179
+ },
180
+ {
181
+ "epoch": 12.247838616714697,
182
+ "grad_norm": 0.3190229535102844,
183
+ "learning_rate": 0.0004390526998774422,
184
+ "loss": 0.0065,
185
+ "step": 8500
186
+ },
187
+ {
188
+ "epoch": 12.968299711815561,
189
+ "grad_norm": 0.20763935148715973,
190
+ "learning_rate": 0.00043544805709754166,
191
+ "loss": 0.0064,
192
+ "step": 9000
193
+ },
194
+ {
195
+ "epoch": 13.688760806916427,
196
+ "grad_norm": 0.11372426152229309,
197
+ "learning_rate": 0.00043184341431764113,
198
+ "loss": 0.0059,
199
+ "step": 9500
200
+ },
201
+ {
202
+ "epoch": 13.832853025936599,
203
+ "eval_loss": 0.08313994109630585,
204
+ "eval_runtime": 21.5595,
205
+ "eval_samples_per_second": 107.331,
206
+ "eval_steps_per_second": 0.093,
207
+ "step": 9600
208
+ },
209
+ {
210
+ "epoch": 14.409221902017292,
211
+ "grad_norm": 0.6901423335075378,
212
+ "learning_rate": 0.0004282387715377406,
213
+ "loss": 0.0055,
214
+ "step": 10000
215
+ },
216
+ {
217
+ "epoch": 15.129682997118156,
218
+ "grad_norm": 0.5882952213287354,
219
+ "learning_rate": 0.0004246341287578401,
220
+ "loss": 0.005,
221
+ "step": 10500
222
+ },
223
+ {
224
+ "epoch": 15.561959654178674,
225
+ "eval_loss": 0.06821350008249283,
226
+ "eval_runtime": 20.3166,
227
+ "eval_samples_per_second": 113.897,
228
+ "eval_steps_per_second": 0.098,
229
+ "step": 10800
230
+ },
231
+ {
232
+ "epoch": 15.85014409221902,
233
+ "grad_norm": 0.4642440676689148,
234
+ "learning_rate": 0.0004210294859779396,
235
+ "loss": 0.0049,
236
+ "step": 11000
237
+ },
238
+ {
239
+ "epoch": 16.570605187319885,
240
+ "grad_norm": 0.9032358527183533,
241
+ "learning_rate": 0.00041742484319803907,
242
+ "loss": 0.0048,
243
+ "step": 11500
244
+ },
245
+ {
246
+ "epoch": 17.29106628242075,
247
+ "grad_norm": 0.5521640777587891,
248
+ "learning_rate": 0.00041382020041813854,
249
+ "loss": 0.0046,
250
+ "step": 12000
251
+ },
252
+ {
253
+ "epoch": 17.29106628242075,
254
+ "eval_loss": 0.08423992991447449,
255
+ "eval_runtime": 21.1812,
256
+ "eval_samples_per_second": 109.248,
257
+ "eval_steps_per_second": 0.094,
258
+ "step": 12000
259
+ },
260
+ {
261
+ "epoch": 18.011527377521613,
262
+ "grad_norm": 0.7376463413238525,
263
+ "learning_rate": 0.000410215557638238,
264
+ "loss": 0.0044,
265
+ "step": 12500
266
+ },
267
+ {
268
+ "epoch": 18.73198847262248,
269
+ "grad_norm": 1.1471983194351196,
270
+ "learning_rate": 0.0004066109148583376,
271
+ "loss": 0.0045,
272
+ "step": 13000
273
+ },
274
+ {
275
+ "epoch": 19.020172910662826,
276
+ "eval_loss": 0.07880275696516037,
277
+ "eval_runtime": 21.5701,
278
+ "eval_samples_per_second": 107.278,
279
+ "eval_steps_per_second": 0.093,
280
+ "step": 13200
281
+ },
282
+ {
283
+ "epoch": 19.45244956772334,
284
+ "grad_norm": 0.053835347294807434,
285
+ "learning_rate": 0.00040300627207843706,
286
+ "loss": 0.0041,
287
+ "step": 13500
288
+ },
289
+ {
290
+ "epoch": 20.172910662824208,
291
+ "grad_norm": 0.7777488231658936,
292
+ "learning_rate": 0.00039940162929853653,
293
+ "loss": 0.0042,
294
+ "step": 14000
295
+ },
296
+ {
297
+ "epoch": 20.7492795389049,
298
+ "eval_loss": 0.062229253351688385,
299
+ "eval_runtime": 20.4938,
300
+ "eval_samples_per_second": 112.912,
301
+ "eval_steps_per_second": 0.098,
302
+ "step": 14400
303
+ },
304
+ {
305
+ "epoch": 20.89337175792507,
306
+ "grad_norm": 0.14320553839206696,
307
+ "learning_rate": 0.000395796986518636,
308
+ "loss": 0.004,
309
+ "step": 14500
310
+ },
311
+ {
312
+ "epoch": 21.613832853025936,
313
+ "grad_norm": 0.3327866494655609,
314
+ "learning_rate": 0.00039219234373873553,
315
+ "loss": 0.004,
316
+ "step": 15000
317
+ },
318
+ {
319
+ "epoch": 22.334293948126803,
320
+ "grad_norm": 0.29509493708610535,
321
+ "learning_rate": 0.000388587700958835,
322
+ "loss": 0.0037,
323
+ "step": 15500
324
+ },
325
+ {
326
+ "epoch": 22.478386167146976,
327
+ "eval_loss": 0.07450389117002487,
328
+ "eval_runtime": 21.8716,
329
+ "eval_samples_per_second": 105.799,
330
+ "eval_steps_per_second": 0.091,
331
+ "step": 15600
332
+ },
333
+ {
334
+ "epoch": 23.054755043227665,
335
+ "grad_norm": 0.5017435550689697,
336
+ "learning_rate": 0.00038498305817893447,
337
+ "loss": 0.0038,
338
+ "step": 16000
339
+ },
340
+ {
341
+ "epoch": 23.77521613832853,
342
+ "grad_norm": 0.05931377038359642,
343
+ "learning_rate": 0.00038137841539903394,
344
+ "loss": 0.0038,
345
+ "step": 16500
346
+ },
347
+ {
348
+ "epoch": 24.207492795389047,
349
+ "eval_loss": 0.09549176692962646,
350
+ "eval_runtime": 21.5513,
351
+ "eval_samples_per_second": 107.372,
352
+ "eval_steps_per_second": 0.093,
353
+ "step": 16800
354
+ },
355
+ {
356
+ "epoch": 24.495677233429394,
357
+ "grad_norm": 0.13349242508411407,
358
+ "learning_rate": 0.0003777737726191334,
359
+ "loss": 0.0034,
360
+ "step": 17000
361
+ },
362
+ {
363
+ "epoch": 25.21613832853026,
364
+ "grad_norm": 0.19320227205753326,
365
+ "learning_rate": 0.00037416912983923294,
366
+ "loss": 0.0034,
367
+ "step": 17500
368
+ },
369
+ {
370
+ "epoch": 25.936599423631122,
371
+ "grad_norm": 0.24608492851257324,
372
+ "learning_rate": 0.0003705644870593324,
373
+ "loss": 0.0034,
374
+ "step": 18000
375
+ },
376
+ {
377
+ "epoch": 25.936599423631122,
378
+ "eval_loss": 0.10036125034093857,
379
+ "eval_runtime": 22.0387,
380
+ "eval_samples_per_second": 104.997,
381
+ "eval_steps_per_second": 0.091,
382
+ "step": 18000
383
+ },
384
+ {
385
+ "epoch": 26.65706051873199,
386
+ "grad_norm": 0.11887585371732712,
387
+ "learning_rate": 0.0003669598442794319,
388
+ "loss": 0.0033,
389
+ "step": 18500
390
+ },
391
+ {
392
+ "epoch": 27.377521613832855,
393
+ "grad_norm": 0.5103694796562195,
394
+ "learning_rate": 0.0003633552014995314,
395
+ "loss": 0.0031,
396
+ "step": 19000
397
+ },
398
+ {
399
+ "epoch": 27.665706051873197,
400
+ "eval_loss": 0.0853080227971077,
401
+ "eval_runtime": 21.6671,
402
+ "eval_samples_per_second": 106.798,
403
+ "eval_steps_per_second": 0.092,
404
+ "step": 19200
405
+ },
406
+ {
407
+ "epoch": 28.097982708933717,
408
+ "grad_norm": 0.9122279286384583,
409
+ "learning_rate": 0.00035975055871963093,
410
+ "loss": 0.0034,
411
+ "step": 19500
412
+ },
413
+ {
414
+ "epoch": 28.818443804034583,
415
+ "grad_norm": 0.028490234166383743,
416
+ "learning_rate": 0.0003561459159397304,
417
+ "loss": 0.0035,
418
+ "step": 20000
419
+ },
420
+ {
421
+ "epoch": 29.394812680115272,
422
+ "eval_loss": 0.05787323787808418,
423
+ "eval_runtime": 21.1854,
424
+ "eval_samples_per_second": 109.226,
425
+ "eval_steps_per_second": 0.094,
426
+ "step": 20400
427
+ },
428
+ {
429
+ "epoch": 29.538904899135446,
430
+ "grad_norm": 0.32352131605148315,
431
+ "learning_rate": 0.0003525412731598299,
432
+ "loss": 0.0036,
433
+ "step": 20500
434
+ },
435
+ {
436
+ "epoch": 30.259365994236312,
437
+ "grad_norm": 0.43146830797195435,
438
+ "learning_rate": 0.00034893663037992935,
439
+ "loss": 0.0032,
440
+ "step": 21000
441
+ },
442
+ {
443
+ "epoch": 30.979827089337174,
444
+ "grad_norm": 0.22915582358837128,
445
+ "learning_rate": 0.0003453319876000288,
446
+ "loss": 0.0026,
447
+ "step": 21500
448
+ }
449
+ ],
450
+ "logging_steps": 500,
451
+ "max_steps": 69400,
452
+ "num_input_tokens_seen": 0,
453
+ "num_train_epochs": 100,
454
+ "save_steps": 500,
455
+ "stateful_callbacks": {
456
+ "TrainerControl": {
457
+ "args": {
458
+ "should_epoch_stop": false,
459
+ "should_evaluate": false,
460
+ "should_log": false,
461
+ "should_save": true,
462
+ "should_training_stop": false
463
+ },
464
+ "attributes": {}
465
+ }
466
+ },
467
+ "total_flos": 5.477693523839612e+17,
468
+ "train_batch_size": 64,
469
+ "trial_name": null,
470
+ "trial_params": null
471
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eb3398d063b4e0989357a2085b4d8d4b41f25bfd36bdaa7fa2a40da3a9f930f
3
+ size 5841