besimray commited on
Commit
0f3521a
·
verified ·
1 Parent(s): 90c0cac

Training in progress, step 80, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87f7d1cab1f1c1f94445cd22369257aaa21529875106c6c56c58b5307c1bc477
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e1cf37faf9885b6cb74c0c4ef4e2c2d7e9e4aab0773875932068d778e74f6f5
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa7b7ae7c5bb58cdf42f58142b085e62f4e7a4f192e231d24d38bd4992f8be42
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00e3c513aa284f4a659f6163efed2ca9c3075b3ed338078bb8fc52b80c446eec
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cf47f03b057010c0300b7a3c8bdc5b1074d0d042fbd2a7295866ded90ee1395
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb63af91c38df442748945f1270836c41afb394cb114946f4943111c086f7fbe
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50b80d146b8b37f82a1962ba385b8329fd6c2c35ba5116c53e131bb661ab681b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed29baad6d6611c736eccf18bdd7afdc5d6f3612cde61e7bfa83472d3e2068d2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.1519354581832886,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
4
- "epoch": 1.263157894736842,
5
  "eval_steps": 20,
6
- "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -459,6 +459,154 @@
459
  "eval_samples_per_second": 48.71,
460
  "eval_steps_per_second": 4.871,
461
  "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  }
463
  ],
464
  "logging_steps": 1,
@@ -473,7 +621,7 @@
473
  "early_stopping_threshold": 0.0
474
  },
475
  "attributes": {
476
- "early_stopping_patience_counter": 0
477
  }
478
  },
479
  "TrainerControl": {
@@ -487,7 +635,7 @@
487
  "attributes": {}
488
  }
489
  },
490
- "total_flos": 6036925560913920.0,
491
  "train_batch_size": 10,
492
  "trial_name": null,
493
  "trial_params": null
 
1
  {
2
  "best_metric": 1.1519354581832886,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
4
+ "epoch": 1.6842105263157894,
5
  "eval_steps": 20,
6
+ "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
459
  "eval_samples_per_second": 48.71,
460
  "eval_steps_per_second": 4.871,
461
  "step": 60
462
+ },
463
+ {
464
+ "epoch": 1.2842105263157895,
465
+ "grad_norm": 0.4118487238883972,
466
+ "learning_rate": 0.00014135585049665207,
467
+ "loss": 0.9891,
468
+ "step": 61
469
+ },
470
+ {
471
+ "epoch": 1.305263157894737,
472
+ "grad_norm": 0.32802432775497437,
473
+ "learning_rate": 0.00013930250316539238,
474
+ "loss": 0.9878,
475
+ "step": 62
476
+ },
477
+ {
478
+ "epoch": 1.3263157894736843,
479
+ "grad_norm": 0.41467538475990295,
480
+ "learning_rate": 0.00013722936584019453,
481
+ "loss": 1.0542,
482
+ "step": 63
483
+ },
484
+ {
485
+ "epoch": 1.3473684210526315,
486
+ "grad_norm": 0.39795804023742676,
487
+ "learning_rate": 0.0001351374824081343,
488
+ "loss": 1.1358,
489
+ "step": 64
490
+ },
491
+ {
492
+ "epoch": 1.368421052631579,
493
+ "grad_norm": 0.3385366201400757,
494
+ "learning_rate": 0.00013302790619551674,
495
+ "loss": 1.1107,
496
+ "step": 65
497
+ },
498
+ {
499
+ "epoch": 1.3894736842105262,
500
+ "grad_norm": 0.4300186336040497,
501
+ "learning_rate": 0.00013090169943749476,
502
+ "loss": 1.0554,
503
+ "step": 66
504
+ },
505
+ {
506
+ "epoch": 1.4105263157894736,
507
+ "grad_norm": 0.4523608982563019,
508
+ "learning_rate": 0.00012875993274320173,
509
+ "loss": 1.1442,
510
+ "step": 67
511
+ },
512
+ {
513
+ "epoch": 1.431578947368421,
514
+ "grad_norm": 0.48153308033943176,
515
+ "learning_rate": 0.00012660368455666752,
516
+ "loss": 1.1677,
517
+ "step": 68
518
+ },
519
+ {
520
+ "epoch": 1.4526315789473685,
521
+ "grad_norm": 0.46898069977760315,
522
+ "learning_rate": 0.0001244340406137894,
523
+ "loss": 1.1212,
524
+ "step": 69
525
+ },
526
+ {
527
+ "epoch": 1.4736842105263157,
528
+ "grad_norm": 0.3733386695384979,
529
+ "learning_rate": 0.00012225209339563145,
530
+ "loss": 0.9843,
531
+ "step": 70
532
+ },
533
+ {
534
+ "epoch": 1.4947368421052631,
535
+ "grad_norm": 0.4410829544067383,
536
+ "learning_rate": 0.00012005894157832729,
537
+ "loss": 1.1679,
538
+ "step": 71
539
+ },
540
+ {
541
+ "epoch": 1.5157894736842106,
542
+ "grad_norm": 0.46537336707115173,
543
+ "learning_rate": 0.00011785568947986367,
544
+ "loss": 1.0453,
545
+ "step": 72
546
+ },
547
+ {
548
+ "epoch": 1.5368421052631578,
549
+ "grad_norm": 0.39270663261413574,
550
+ "learning_rate": 0.0001156434465040231,
551
+ "loss": 1.1019,
552
+ "step": 73
553
+ },
554
+ {
555
+ "epoch": 1.5578947368421052,
556
+ "grad_norm": 0.3547813296318054,
557
+ "learning_rate": 0.00011342332658176555,
558
+ "loss": 0.9807,
559
+ "step": 74
560
+ },
561
+ {
562
+ "epoch": 1.5789473684210527,
563
+ "grad_norm": 0.33064335584640503,
564
+ "learning_rate": 0.00011119644761033078,
565
+ "loss": 0.9903,
566
+ "step": 75
567
+ },
568
+ {
569
+ "epoch": 1.6,
570
+ "grad_norm": 0.41019386053085327,
571
+ "learning_rate": 0.00010896393089034336,
572
+ "loss": 0.9956,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 1.6210526315789475,
577
+ "grad_norm": 0.43731600046157837,
578
+ "learning_rate": 0.00010672690056120399,
579
+ "loss": 0.9657,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 1.6421052631578947,
584
+ "grad_norm": 0.38457056879997253,
585
+ "learning_rate": 0.00010448648303505151,
586
+ "loss": 1.1255,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 1.663157894736842,
591
+ "grad_norm": 0.4372155964374542,
592
+ "learning_rate": 0.00010224380642958052,
593
+ "loss": 1.105,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 1.6842105263157894,
598
+ "grad_norm": 0.4701666533946991,
599
+ "learning_rate": 0.0001,
600
+ "loss": 1.0658,
601
+ "step": 80
602
+ },
603
+ {
604
+ "epoch": 1.6842105263157894,
605
+ "eval_loss": 1.152337908744812,
606
+ "eval_runtime": 2.0462,
607
+ "eval_samples_per_second": 48.871,
608
+ "eval_steps_per_second": 4.887,
609
+ "step": 80
610
  }
611
  ],
612
  "logging_steps": 1,
 
621
  "early_stopping_threshold": 0.0
622
  },
623
  "attributes": {
624
+ "early_stopping_patience_counter": 1
625
  }
626
  },
627
  "TrainerControl": {
 
635
  "attributes": {}
636
  }
637
  },
638
+ "total_flos": 8119797185249280.0,
639
  "train_batch_size": 10,
640
  "trial_name": null,
641
  "trial_params": null