morpheuslord commited on
Commit
4d32c73
·
verified ·
1 Parent(s): 07dfa8b

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -14,3 +14,45 @@ logs/events.out.tfevents.1777738485.bazzite.226596.0 filter=lfs diff=lfs merge=l
14
  logs/events.out.tfevents.1777790308.bazzite.14979.0 filter=lfs diff=lfs merge=lfs -text
15
  logs/events.out.tfevents.1777785111.bazzite.5847.0 filter=lfs diff=lfs merge=lfs -text
16
  logs/events.out.tfevents.1777790600.bazzite.19895.0 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  logs/events.out.tfevents.1777790308.bazzite.14979.0 filter=lfs diff=lfs merge=lfs -text
15
  logs/events.out.tfevents.1777785111.bazzite.5847.0 filter=lfs diff=lfs merge=lfs -text
16
  logs/events.out.tfevents.1777790600.bazzite.19895.0 filter=lfs diff=lfs merge=lfs -text
17
+ logs/events.out.tfevents.1777790432.bazzite.18166.0 filter=lfs diff=lfs merge=lfs -text
18
+ logs/events.out.tfevents.1777791700.bazzite.29722.0 filter=lfs diff=lfs merge=lfs -text
19
+ logs/events.out.tfevents.1777792299.bazzite.34388.0 filter=lfs diff=lfs merge=lfs -text
20
+ wandb/run-20260502_150043-2fg22e6p/run-2fg22e6p.wandb filter=lfs diff=lfs merge=lfs -text
21
+ wandb/run-20260502_192151-h1jq4pkw/run-h1jq4pkw.wandb filter=lfs diff=lfs merge=lfs -text
22
+ wandb/run-20260502_200514-kl2gg5g9/run-kl2gg5g9.wandb filter=lfs diff=lfs merge=lfs -text
23
+ wandb/run-20260502_165926-36ppiwlg/run-36ppiwlg.wandb filter=lfs diff=lfs merge=lfs -text
24
+ wandb/run-20260502_204834-03roqvb7/run-03roqvb7.wandb filter=lfs diff=lfs merge=lfs -text
25
+ wandb/run-20260503_104137-zjr4w5ln/run-zjr4w5ln.wandb filter=lfs diff=lfs merge=lfs -text
26
+ wandb/run-20260503_123131-4y9tqaim/run-4y9tqaim.wandb filter=lfs diff=lfs merge=lfs -text
27
+ wandb/run-20260503_124131-7q4dwe22/run-7q4dwe22.wandb filter=lfs diff=lfs merge=lfs -text
28
+ data/raw/hf/writing_prompts/data-00001-of-00002.arrow filter=lfs diff=lfs merge=lfs -text
29
+ data/raw/hf/writing_prompts/data-00000-of-00002.arrow filter=lfs diff=lfs merge=lfs -text
30
+ data/raw/hf/raid/data-00024-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
31
+ data/raw/starblasters8/data.csv filter=lfs diff=lfs merge=lfs -text
32
+ data/raw/fce_v2.1.bea19.tar.gz filter=lfs diff=lfs merge=lfs -text
33
+ data/raw/wi+locness_v2.1.bea19.tar.gz filter=lfs diff=lfs merge=lfs -text
34
+ data/processed/train.jsonl filter=lfs diff=lfs merge=lfs -text
35
+ data/raw/jfleg_repo/EACLshort037.pdf filter=lfs diff=lfs merge=lfs -text
36
+ data/raw/starblasters8/distribution.parquet filter=lfs diff=lfs merge=lfs -text
37
+ data/raw/starblasters8/prompts.parquet filter=lfs diff=lfs merge=lfs -text
38
+ data/raw/hf/paws/data-00000-of-00001.arrow filter=lfs diff=lfs merge=lfs -text
39
+ data/raw/hf/raid/data-00001-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
40
+ data/raw/hf/raid/data-00002-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
41
+ data/raw/hf/mage/data-00000-of-00001.arrow filter=lfs diff=lfs merge=lfs -text
42
+ data/raw/hf/gpt_wiki_intro/data-00000-of-00001.arrow filter=lfs diff=lfs merge=lfs -text
43
+ data/raw/hf/raid/data-00000-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
44
+ data/raw/hf/raid/data-00004-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
45
+ data/raw/hf/raid/data-00003-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
46
+ data/raw/hf/raid/data-00005-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
47
+ data/raw/hf/raid/data-00007-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
48
+ data/raw/hf/raid/data-00008-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
49
+ data/raw/hf/raid/data-00010-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
50
+ data/raw/shanegerami/AI_Human.csv filter=lfs diff=lfs merge=lfs -text
51
+ data/raw/hf/raid/data-00011-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
52
+ data/raw/hf/raid/data-00006-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
53
+ data/raw/hf/raid/data-00012-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
54
+ data/raw/starblasters8/data.parquet filter=lfs diff=lfs merge=lfs -text
55
+ data/raw/hf/raid/data-00009-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
56
+ data/raw/hf/raid/data-00015-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
57
+ data/raw/hf/raid/data-00014-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
58
+ data/raw/hf/raid/data-00017-of-00025.arrow filter=lfs diff=lfs merge=lfs -text
checkpoints/checkpoint-1515/trainer_state.json CHANGED
@@ -1,574 +1,3 @@
1
- {
2
- "best_global_step": 1300,
3
- "best_metric": 1.4053895473480225,
4
- "best_model_checkpoint": "checkpoints/checkpoint-1300",
5
- "epoch": 5.0,
6
- "eval_steps": 100,
7
- "global_step": 1515,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.08264462809917356,
14
- "grad_norm": 0.9776630997657776,
15
- "learning_rate": 9.473684210526315e-05,
16
- "loss": 14.8076,
17
- "step": 25
18
- },
19
- {
20
- "epoch": 0.1652892561983471,
21
- "grad_norm": 0.8765299916267395,
22
- "learning_rate": 0.00019342105263157894,
23
- "loss": 14.3984,
24
- "step": 50
25
- },
26
- {
27
- "epoch": 0.24793388429752067,
28
- "grad_norm": 1.0470659732818604,
29
- "learning_rate": 0.0002921052631578947,
30
- "loss": 13.8088,
31
- "step": 75
32
- },
33
- {
34
- "epoch": 0.3305785123966942,
35
- "grad_norm": 0.8377240896224976,
36
- "learning_rate": 0.0002998109381774427,
37
- "loss": 13.4818,
38
- "step": 100
39
- },
40
- {
41
- "epoch": 0.3305785123966942,
42
- "eval_loss": 1.5258959531784058,
43
- "eval_runtime": 44.5888,
44
- "eval_samples_per_second": 18.816,
45
- "eval_steps_per_second": 2.355,
46
- "step": 100
47
- },
48
- {
49
- "epoch": 0.4132231404958678,
50
- "grad_norm": 0.9031451940536499,
51
- "learning_rate": 0.0002991771428891996,
52
- "loss": 13.0253,
53
- "step": 125
54
- },
55
- {
56
- "epoch": 0.49586776859504134,
57
- "grad_norm": 1.3265438079833984,
58
- "learning_rate": 0.00029809907181205865,
59
- "loss": 12.7368,
60
- "step": 150
61
- },
62
- {
63
- "epoch": 0.5785123966942148,
64
- "grad_norm": 1.1032358407974243,
65
- "learning_rate": 0.00029657993563144406,
66
- "loss": 13.0546,
67
- "step": 175
68
- },
69
- {
70
- "epoch": 0.6611570247933884,
71
- "grad_norm": 1.085852861404419,
72
- "learning_rate": 0.00029462425860229204,
73
- "loss": 12.6614,
74
- "step": 200
75
- },
76
- {
77
- "epoch": 0.6611570247933884,
78
- "eval_loss": 1.4710893630981445,
79
- "eval_runtime": 20.3316,
80
- "eval_samples_per_second": 41.266,
81
- "eval_steps_per_second": 5.164,
82
- "step": 200
83
- },
84
- {
85
- "epoch": 0.743801652892562,
86
- "grad_norm": 1.010011076927185,
87
- "learning_rate": 0.00029223786507502327,
88
- "loss": 12.687,
89
- "step": 225
90
- },
91
- {
92
- "epoch": 0.8264462809917356,
93
- "grad_norm": 1.0375562906265259,
94
- "learning_rate": 0.00028942786214960245,
95
- "loss": 12.7898,
96
- "step": 250
97
- },
98
- {
99
- "epoch": 0.9090909090909091,
100
- "grad_norm": 1.092232584953308,
101
- "learning_rate": 0.00028620261850934335,
102
- "loss": 12.5906,
103
- "step": 275
104
- },
105
- {
106
- "epoch": 0.9917355371900827,
107
- "grad_norm": 1.353973388671875,
108
- "learning_rate": 0.000282571739497497,
109
- "loss": 12.477,
110
- "step": 300
111
- },
112
- {
113
- "epoch": 0.9917355371900827,
114
- "eval_loss": 1.4449142217636108,
115
- "eval_runtime": 20.1604,
116
- "eval_samples_per_second": 41.616,
117
- "eval_steps_per_second": 5.208,
118
- "step": 300
119
- },
120
- {
121
- "epoch": 1.0727272727272728,
122
- "grad_norm": 1.1319797039031982,
123
- "learning_rate": 0.0002785460385108489,
124
- "loss": 12.342,
125
- "step": 325
126
- },
127
- {
128
- "epoch": 1.1553719008264463,
129
- "grad_norm": 1.1007564067840576,
130
- "learning_rate": 0.0002741375047955198,
131
- "loss": 12.239,
132
- "step": 350
133
- },
134
- {
135
- "epoch": 1.2380165289256198,
136
- "grad_norm": 1.1579365730285645,
137
- "learning_rate": 0.000269359267740881,
138
- "loss": 12.4161,
139
- "step": 375
140
- },
141
- {
142
- "epoch": 1.3206611570247935,
143
- "grad_norm": 1.2810680866241455,
144
- "learning_rate": 0.0002642255577779212,
145
- "loss": 12.3898,
146
- "step": 400
147
- },
148
- {
149
- "epoch": 1.3206611570247935,
150
- "eval_loss": 1.4367592334747314,
151
- "eval_runtime": 17.2935,
152
- "eval_samples_per_second": 48.515,
153
- "eval_steps_per_second": 6.072,
154
- "step": 400
155
- },
156
- {
157
- "epoch": 1.403305785123967,
158
- "grad_norm": 1.1732923984527588,
159
- "learning_rate": 0.0002587516639985186,
160
- "loss": 12.1514,
161
- "step": 425
162
- },
163
- {
164
- "epoch": 1.4859504132231405,
165
- "grad_norm": 1.2293694019317627,
166
- "learning_rate": 0.00025295388862183425,
167
- "loss": 12.2659,
168
- "step": 450
169
- },
170
- {
171
- "epoch": 1.5685950413223142,
172
- "grad_norm": 1.1800557374954224,
173
- "learning_rate": 0.00024684949844343366,
174
- "loss": 12.3354,
175
- "step": 475
176
- },
177
- {
178
- "epoch": 1.6512396694214875,
179
- "grad_norm": 1.7430154085159302,
180
- "learning_rate": 0.00024045667341173076,
181
- "loss": 12.2703,
182
- "step": 500
183
- },
184
- {
185
- "epoch": 1.6512396694214875,
186
- "eval_loss": 1.433081865310669,
187
- "eval_runtime": 12.995,
188
- "eval_samples_per_second": 64.563,
189
- "eval_steps_per_second": 8.08,
190
- "step": 500
191
- },
192
- {
193
- "epoch": 1.7338842975206612,
194
- "grad_norm": 1.0517712831497192,
195
- "learning_rate": 0.00023379445248490122,
196
- "loss": 12.3778,
197
- "step": 525
198
- },
199
- {
200
- "epoch": 1.8165289256198347,
201
- "grad_norm": 1.0788432359695435,
202
- "learning_rate": 0.00022688267692951415,
203
- "loss": 12.2553,
204
- "step": 550
205
- },
206
- {
207
- "epoch": 1.8991735537190082,
208
- "grad_norm": 1.15561842918396,
209
- "learning_rate": 0.00021974193122974782,
210
- "loss": 12.3038,
211
- "step": 575
212
- },
213
- {
214
- "epoch": 1.981818181818182,
215
- "grad_norm": 1.3563194274902344,
216
- "learning_rate": 0.00021239348178317242,
217
- "loss": 12.343,
218
- "step": 600
219
- },
220
- {
221
- "epoch": 1.981818181818182,
222
- "eval_loss": 1.4239836931228638,
223
- "eval_runtime": 13.614,
224
- "eval_samples_per_second": 61.628,
225
- "eval_steps_per_second": 7.713,
226
- "step": 600
227
- },
228
- {
229
- "epoch": 2.062809917355372,
230
- "grad_norm": 1.1103503704071045,
231
- "learning_rate": 0.00020485921356567523,
232
- "loss": 12.0412,
233
- "step": 625
234
- },
235
- {
236
- "epoch": 2.1454545454545455,
237
- "grad_norm": 1.2184436321258545,
238
- "learning_rate": 0.0001971615649541501,
239
- "loss": 12.2252,
240
- "step": 650
241
- },
242
- {
243
- "epoch": 2.2280991735537192,
244
- "grad_norm": 1.2329784631729126,
245
- "learning_rate": 0.00018932346090106165,
246
- "loss": 12.0018,
247
- "step": 675
248
- },
249
- {
250
- "epoch": 2.3107438016528925,
251
- "grad_norm": 1.2614842653274536,
252
- "learning_rate": 0.00018136824465990166,
253
- "loss": 12.1944,
254
- "step": 700
255
- },
256
- {
257
- "epoch": 2.3107438016528925,
258
- "eval_loss": 1.4167625904083252,
259
- "eval_runtime": 13.0103,
260
- "eval_samples_per_second": 64.488,
261
- "eval_steps_per_second": 8.071,
262
- "step": 700
263
- },
264
- {
265
- "epoch": 2.3933884297520662,
266
- "grad_norm": 1.261049509048462,
267
- "learning_rate": 0.0001733196082648715,
268
- "loss": 11.9581,
269
- "step": 725
270
- },
271
- {
272
- "epoch": 2.4760330578512395,
273
- "grad_norm": 1.2732257843017578,
274
- "learning_rate": 0.00016520152197183418,
275
- "loss": 12.1556,
276
- "step": 750
277
- },
278
- {
279
- "epoch": 2.5586776859504132,
280
- "grad_norm": 1.2463732957839966,
281
- "learning_rate": 0.000157038162870673,
282
- "loss": 12.0429,
283
- "step": 775
284
- },
285
- {
286
- "epoch": 2.641322314049587,
287
- "grad_norm": 1.1238192319869995,
288
- "learning_rate": 0.0001488538428816627,
289
- "loss": 12.3026,
290
- "step": 800
291
- },
292
- {
293
- "epoch": 2.641322314049587,
294
- "eval_loss": 1.4147059917449951,
295
- "eval_runtime": 13.0025,
296
- "eval_samples_per_second": 64.526,
297
- "eval_steps_per_second": 8.075,
298
- "step": 800
299
- },
300
- {
301
- "epoch": 2.7239669421487602,
302
- "grad_norm": 1.3407771587371826,
303
- "learning_rate": 0.00014067293635029133,
304
- "loss": 11.9619,
305
- "step": 825
306
- },
307
- {
308
- "epoch": 2.806611570247934,
309
- "grad_norm": 1.3070601224899292,
310
- "learning_rate": 0.00013251980745616928,
311
- "loss": 12.2279,
312
- "step": 850
313
- },
314
- {
315
- "epoch": 2.8892561983471072,
316
- "grad_norm": 1.1657178401947021,
317
- "learning_rate": 0.0001244187376522141,
318
- "loss": 12.3092,
319
- "step": 875
320
- },
321
- {
322
- "epoch": 2.971900826446281,
323
- "grad_norm": 1.3273015022277832,
324
- "learning_rate": 0.0001163938533502094,
325
- "loss": 12.09,
326
- "step": 900
327
- },
328
- {
329
- "epoch": 2.971900826446281,
330
- "eval_loss": 1.414381742477417,
331
- "eval_runtime": 13.0006,
332
- "eval_samples_per_second": 64.536,
333
- "eval_steps_per_second": 8.077,
334
- "step": 900
335
- },
336
- {
337
- "epoch": 3.0528925619834713,
338
- "grad_norm": 1.2026565074920654,
339
- "learning_rate": 0.0001084690540681045,
340
- "loss": 11.89,
341
- "step": 925
342
- },
343
- {
344
- "epoch": 3.1355371900826445,
345
- "grad_norm": 1.2379560470581055,
346
- "learning_rate": 0.00010066794125304281,
347
- "loss": 12.196,
348
- "step": 950
349
- },
350
- {
351
- "epoch": 3.2181818181818183,
352
- "grad_norm": 1.219448447227478,
353
- "learning_rate": 9.301374799209826e-05,
354
- "loss": 11.8079,
355
- "step": 975
356
- },
357
- {
358
- "epoch": 3.3008264462809915,
359
- "grad_norm": 1.2622979879379272,
360
- "learning_rate": 8.55292698200527e-05,
361
- "loss": 12.0511,
362
- "step": 1000
363
- },
364
- {
365
- "epoch": 3.3008264462809915,
366
- "eval_loss": 1.4059090614318848,
367
- "eval_runtime": 12.9985,
368
- "eval_samples_per_second": 64.546,
369
- "eval_steps_per_second": 8.078,
370
- "step": 1000
371
- },
372
- {
373
- "epoch": 3.3834710743801653,
374
- "grad_norm": 1.420868158340454,
375
- "learning_rate": 7.823679683028089e-05,
376
- "loss": 12.0749,
377
- "step": 1025
378
- },
379
- {
380
- "epoch": 3.466115702479339,
381
- "grad_norm": 1.2610206604003906,
382
- "learning_rate": 7.115804729092889e-05,
383
- "loss": 12.0321,
384
- "step": 1050
385
- },
386
- {
387
- "epoch": 3.5487603305785123,
388
- "grad_norm": 1.0775682926177979,
389
- "learning_rate": 6.431410296408913e-05,
390
- "loss": 12.1338,
391
- "step": 1075
392
- },
393
- {
394
- "epoch": 3.631404958677686,
395
- "grad_norm": 1.196616291999817,
396
- "learning_rate": 5.772534632060193e-05,
397
- "loss": 12.2208,
398
- "step": 1100
399
- },
400
- {
401
- "epoch": 3.631404958677686,
402
- "eval_loss": 1.4090858697891235,
403
- "eval_runtime": 13.0123,
404
- "eval_samples_per_second": 64.478,
405
- "eval_steps_per_second": 8.069,
406
- "step": 1100
407
- },
408
- {
409
- "epoch": 3.7140495867768593,
410
- "grad_norm": 1.3985774517059326,
411
- "learning_rate": 5.14113998374697e-05,
412
- "loss": 11.9138,
413
- "step": 1125
414
- },
415
- {
416
- "epoch": 3.796694214876033,
417
- "grad_norm": 1.2652424573898315,
418
- "learning_rate": 4.53910675586668e-05,
419
- "loss": 11.9639,
420
- "step": 1150
421
- },
422
- {
423
- "epoch": 3.8793388429752067,
424
- "grad_norm": 1.2594548463821411,
425
- "learning_rate": 3.968227909338666e-05,
426
- "loss": 12.048,
427
- "step": 1175
428
- },
429
- {
430
- "epoch": 3.9619834710743804,
431
- "grad_norm": 1.3240498304367065,
432
- "learning_rate": 3.43020362185097e-05,
433
- "loss": 11.7403,
434
- "step": 1200
435
- },
436
- {
437
- "epoch": 3.9619834710743804,
438
- "eval_loss": 1.4072344303131104,
439
- "eval_runtime": 13.0151,
440
- "eval_samples_per_second": 64.464,
441
- "eval_steps_per_second": 8.068,
442
- "step": 1200
443
- },
444
- {
445
- "epoch": 4.04297520661157,
446
- "grad_norm": 1.252326250076294,
447
- "learning_rate": 2.9266362244319302e-05,
448
- "loss": 11.8426,
449
- "step": 1225
450
- },
451
- {
452
- "epoch": 4.125619834710744,
453
- "grad_norm": 1.3867508172988892,
454
- "learning_rate": 2.4590254294263283e-05,
455
- "loss": 11.902,
456
- "step": 1250
457
- },
458
- {
459
- "epoch": 4.208264462809917,
460
- "grad_norm": 1.266250491142273,
461
- "learning_rate": 2.0287638640880855e-05,
462
- "loss": 12.095,
463
- "step": 1275
464
- },
465
- {
466
- "epoch": 4.290909090909091,
467
- "grad_norm": 1.0812941789627075,
468
- "learning_rate": 1.6371329230911417e-05,
469
- "loss": 12.0194,
470
- "step": 1300
471
- },
472
- {
473
- "epoch": 4.290909090909091,
474
- "eval_loss": 1.4053895473480225,
475
- "eval_runtime": 13.009,
476
- "eval_samples_per_second": 64.494,
477
- "eval_steps_per_second": 8.071,
478
- "step": 1300
479
- },
480
- {
481
- "epoch": 4.373553719008265,
482
- "grad_norm": 1.3015711307525635,
483
- "learning_rate": 1.285298952310605e-05,
484
- "loss": 11.947,
485
- "step": 1325
486
- },
487
- {
488
- "epoch": 4.4561983471074385,
489
- "grad_norm": 1.2638295888900757,
490
- "learning_rate": 9.743097752394192e-06,
491
- "loss": 12.1636,
492
- "step": 1350
493
- },
494
- {
495
- "epoch": 4.538842975206611,
496
- "grad_norm": 1.4439826011657715,
497
- "learning_rate": 7.050915723855716e-06,
498
- "loss": 11.9777,
499
- "step": 1375
500
- },
501
- {
502
- "epoch": 4.621487603305785,
503
- "grad_norm": 1.3093808889389038,
504
- "learning_rate": 4.784461229435327e-06,
505
- "loss": 12.0375,
506
- "step": 1400
507
- },
508
- {
509
- "epoch": 4.621487603305785,
510
- "eval_loss": 1.4081956148147583,
511
- "eval_runtime": 18.4374,
512
- "eval_samples_per_second": 45.505,
513
- "eval_steps_per_second": 5.695,
514
- "step": 1400
515
- },
516
- {
517
- "epoch": 4.704132231404959,
518
- "grad_norm": 1.3626532554626465,
519
- "learning_rate": 2.9504841695467797e-06,
520
- "loss": 11.9254,
521
- "step": 1425
522
- },
523
- {
524
- "epoch": 4.7867768595041325,
525
- "grad_norm": 1.433358907699585,
526
- "learning_rate": 1.5544464506813624e-06,
527
- "loss": 11.9902,
528
- "step": 1450
529
- },
530
- {
531
- "epoch": 4.869421487603306,
532
- "grad_norm": 1.2513891458511353,
533
- "learning_rate": 6.005057188890661e-07,
534
- "loss": 11.9917,
535
- "step": 1475
536
- },
537
- {
538
- "epoch": 4.952066115702479,
539
- "grad_norm": 1.3513580560684204,
540
- "learning_rate": 9.150297757676839e-08,
541
- "loss": 11.8346,
542
- "step": 1500
543
- },
544
- {
545
- "epoch": 4.952066115702479,
546
- "eval_loss": 1.4073647260665894,
547
- "eval_runtime": 23.2129,
548
- "eval_samples_per_second": 36.144,
549
- "eval_steps_per_second": 4.523,
550
- "step": 1500
551
- }
552
- ],
553
- "logging_steps": 25,
554
- "max_steps": 1515,
555
- "num_input_tokens_seen": 0,
556
- "num_train_epochs": 5,
557
- "save_steps": 100,
558
- "stateful_callbacks": {
559
- "TrainerControl": {
560
- "args": {
561
- "should_epoch_stop": false,
562
- "should_evaluate": false,
563
- "should_log": false,
564
- "should_save": true,
565
- "should_training_stop": true
566
- },
567
- "attributes": {}
568
- }
569
- },
570
- "total_flos": 2296778116300800.0,
571
- "train_batch_size": 4,
572
- "trial_name": null,
573
- "trial_params": null
574
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88c73e13c88e2f12d95ba99d4233a2af5206350c02a2aad237563baca928f2bc
3
+ size 14461