besimray commited on
Commit
a0b8840
·
verified ·
1 Parent(s): 1632a9e

Training in progress, step 60, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e41d69019d421204220964865c9c89fc7bacb60a8411908a8b29c0a12114b94a
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b7abe2f22382f3999a654747949c5f04cd7b50f2076f7d08dc4cde28c3ff8c3
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1928572a9ac56c6d98ea8953c7a7a8fdca570b8a20a3959f47990cc5be36ddc3
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86a525396334785dec46ba6a050a54b56d171785887e5e54d90bce2652d22331
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e565442590f72e02c81245f841f0d570f0816c276d51ef5abfd2bed9cc00d28
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31543a07fbd48036322e55fc875b941e5f56b48a74d8818ee4618f3aa994756b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:062016b917f4cd81cef6ab15bfe81df4e94586c0afba8905b655deaa6fff468f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50b80d146b8b37f82a1962ba385b8329fd6c2c35ba5116c53e131bb661ab681b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7905269861221313,
3
- "best_model_checkpoint": "miner_id_besimray/checkpoint-40",
4
- "epoch": 0.5144694533762058,
5
  "eval_steps": 20,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -311,6 +311,154 @@
311
  "eval_samples_per_second": 21.975,
312
  "eval_steps_per_second": 2.278,
313
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  }
315
  ],
316
  "logging_steps": 1,
@@ -339,7 +487,7 @@
339
  "attributes": {}
340
  }
341
  },
342
- "total_flos": 1.012328531755008e+16,
343
  "train_batch_size": 10,
344
  "trial_name": null,
345
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7785296440124512,
3
+ "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
4
+ "epoch": 0.7717041800643086,
5
  "eval_steps": 20,
6
+ "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
311
  "eval_samples_per_second": 21.975,
312
  "eval_steps_per_second": 2.278,
313
  "step": 40
314
+ },
315
+ {
316
+ "epoch": 0.5273311897106109,
317
+ "grad_norm": 0.20955221354961395,
318
+ "learning_rate": 0.0001767644740358011,
319
+ "loss": 0.7579,
320
+ "step": 41
321
+ },
322
+ {
323
+ "epoch": 0.5401929260450161,
324
+ "grad_norm": 0.22845645248889923,
325
+ "learning_rate": 0.00017530714660036112,
326
+ "loss": 0.8299,
327
+ "step": 42
328
+ },
329
+ {
330
+ "epoch": 0.5530546623794212,
331
+ "grad_norm": 0.21866574883460999,
332
+ "learning_rate": 0.00017381189974873407,
333
+ "loss": 0.7611,
334
+ "step": 43
335
+ },
336
+ {
337
+ "epoch": 0.5659163987138264,
338
+ "grad_norm": 0.23347878456115723,
339
+ "learning_rate": 0.00017227948638273916,
340
+ "loss": 0.7499,
341
+ "step": 44
342
+ },
343
+ {
344
+ "epoch": 0.5787781350482315,
345
+ "grad_norm": 0.22938531637191772,
346
+ "learning_rate": 0.00017071067811865476,
347
+ "loss": 0.7519,
348
+ "step": 45
349
+ },
350
+ {
351
+ "epoch": 0.5916398713826366,
352
+ "grad_norm": 0.23043856024742126,
353
+ "learning_rate": 0.00016910626489868649,
354
+ "loss": 0.7447,
355
+ "step": 46
356
+ },
357
+ {
358
+ "epoch": 0.6045016077170418,
359
+ "grad_norm": 0.2377193123102188,
360
+ "learning_rate": 0.00016746705459320745,
361
+ "loss": 0.8113,
362
+ "step": 47
363
+ },
364
+ {
365
+ "epoch": 0.617363344051447,
366
+ "grad_norm": 0.22112426161766052,
367
+ "learning_rate": 0.00016579387259397127,
368
+ "loss": 0.7647,
369
+ "step": 48
370
+ },
371
+ {
372
+ "epoch": 0.6302250803858521,
373
+ "grad_norm": 0.22107215225696564,
374
+ "learning_rate": 0.0001640875613985024,
375
+ "loss": 0.7654,
376
+ "step": 49
377
+ },
378
+ {
379
+ "epoch": 0.6430868167202572,
380
+ "grad_norm": 0.23675453662872314,
381
+ "learning_rate": 0.00016234898018587337,
382
+ "loss": 0.7388,
383
+ "step": 50
384
+ },
385
+ {
386
+ "epoch": 0.6559485530546624,
387
+ "grad_norm": 0.22421492636203766,
388
+ "learning_rate": 0.000160579004384082,
389
+ "loss": 0.7062,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 0.6688102893890675,
394
+ "grad_norm": 0.20600281655788422,
395
+ "learning_rate": 0.00015877852522924732,
396
+ "loss": 0.7074,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 0.6816720257234726,
401
+ "grad_norm": 0.24087969958782196,
402
+ "learning_rate": 0.0001569484493168452,
403
+ "loss": 0.8616,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 0.6945337620578779,
408
+ "grad_norm": 0.20934121310710907,
409
+ "learning_rate": 0.00015508969814521025,
410
+ "loss": 0.7366,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 0.707395498392283,
415
+ "grad_norm": 0.19671772420406342,
416
+ "learning_rate": 0.00015320320765153367,
417
+ "loss": 0.6683,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 0.7202572347266881,
422
+ "grad_norm": 0.2396930456161499,
423
+ "learning_rate": 0.00015128992774059063,
424
+ "loss": 0.721,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 0.7331189710610932,
429
+ "grad_norm": 0.23783567547798157,
430
+ "learning_rate": 0.0001493508218064347,
431
+ "loss": 0.6809,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 0.7459807073954984,
436
+ "grad_norm": 0.2352839708328247,
437
+ "learning_rate": 0.00014738686624729986,
438
+ "loss": 0.8257,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 0.7588424437299035,
443
+ "grad_norm": 0.24446754157543182,
444
+ "learning_rate": 0.00014539904997395468,
445
+ "loss": 0.8681,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 0.7717041800643086,
450
+ "grad_norm": 0.21945470571517944,
451
+ "learning_rate": 0.00014338837391175582,
452
+ "loss": 0.7524,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 0.7717041800643086,
457
+ "eval_loss": 0.7785296440124512,
458
+ "eval_runtime": 7.0737,
459
+ "eval_samples_per_second": 23.185,
460
+ "eval_steps_per_second": 2.403,
461
+ "step": 60
462
  }
463
  ],
464
  "logging_steps": 1,
 
487
  "attributes": {}
488
  }
489
  },
490
+ "total_flos": 1.446291621543936e+16,
491
  "train_batch_size": 10,
492
  "trial_name": null,
493
  "trial_params": null