error577 commited on
Commit
868db76
·
verified ·
1 Parent(s): ead86a9

Training in progress, step 800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e288abafc2c3cea0015871a382f7a51dded5e5cd5458c5bb4df2fc216162fde2
3
  size 500770656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eab27a442053707a3f6362261bb0b2ba1c21906fd347ae9e38c2846aad860c09
3
  size 500770656
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b7c52a6a40977974af750230ec1751fd9c5b331d0ff37beccb418d1a5813a71
3
  size 134320806
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:616d57538b5f356aa4760a011e22fc28e67c199fc0f01391ba864a885de75471
3
  size 134320806
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35a6d2cf966ab0e16fb93f8d45df1efd541d1aa6a8117be3ca9cb938b2babc67
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28f22fe94279dc5b4fc0860cf06ff132979d7d9cce6691fec29243b2df2a3eab
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b00bb9fde45e0630a7244ca815d2d61a6dbf25a1a1b403dd491700003e109bf9
3
  size 2080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d1867500d89be5276678cb80137dde3c5d6682d87aba4b3c93400957d5bb560
3
  size 2080
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.107336401939392,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.09696633882809254,
5
  "eval_steps": 100,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -561,6 +561,84 @@
561
  "eval_samples_per_second": 3.126,
562
  "eval_steps_per_second": 3.126,
563
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
  }
565
  ],
566
  "logging_steps": 10,
@@ -575,7 +653,7 @@
575
  "early_stopping_threshold": 0.0
576
  },
577
  "attributes": {
578
- "early_stopping_patience_counter": 2
579
  }
580
  },
581
  "TrainerControl": {
@@ -584,12 +662,12 @@
584
  "should_evaluate": false,
585
  "should_log": false,
586
  "should_save": true,
587
- "should_training_stop": false
588
  },
589
  "attributes": {}
590
  }
591
  },
592
- "total_flos": 8.19245323911168e+16,
593
  "train_batch_size": 1,
594
  "trial_name": null,
595
  "trial_params": null
 
1
  {
2
  "best_metric": 1.107336401939392,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.11081867294639147,
5
  "eval_steps": 100,
6
+ "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
561
  "eval_samples_per_second": 3.126,
562
  "eval_steps_per_second": 3.126,
563
  "step": 700
564
+ },
565
+ {
566
+ "epoch": 0.09835157223992243,
567
+ "grad_norm": 7.35622501373291,
568
+ "learning_rate": 0.0001995125348912552,
569
+ "loss": 3.5653,
570
+ "step": 710
571
+ },
572
+ {
573
+ "epoch": 0.09973680565175232,
574
+ "grad_norm": 21.072219848632812,
575
+ "learning_rate": 0.00019949809939134866,
576
+ "loss": 3.8766,
577
+ "step": 720
578
+ },
579
+ {
580
+ "epoch": 0.10112203906358222,
581
+ "grad_norm": 35.778114318847656,
582
+ "learning_rate": 0.00019948346016462892,
583
+ "loss": 5.7792,
584
+ "step": 730
585
+ },
586
+ {
587
+ "epoch": 0.10250727247541211,
588
+ "grad_norm": 12.812899589538574,
589
+ "learning_rate": 0.00019946860265918076,
590
+ "loss": 5.0388,
591
+ "step": 740
592
+ },
593
+ {
594
+ "epoch": 0.103892505887242,
595
+ "grad_norm": 76.3056640625,
596
+ "learning_rate": 0.00019945355597883463,
597
+ "loss": 5.4607,
598
+ "step": 750
599
+ },
600
+ {
601
+ "epoch": 0.10527773929907189,
602
+ "grad_norm": 7.007827281951904,
603
+ "learning_rate": 0.00019943827646784484,
604
+ "loss": 3.4849,
605
+ "step": 760
606
+ },
607
+ {
608
+ "epoch": 0.10666297271090179,
609
+ "grad_norm": 12.457751274108887,
610
+ "learning_rate": 0.00019942279323004186,
611
+ "loss": 3.9192,
612
+ "step": 770
613
+ },
614
+ {
615
+ "epoch": 0.10804820612273168,
616
+ "grad_norm": 8.994271278381348,
617
+ "learning_rate": 0.00019940710626542568,
618
+ "loss": 4.6842,
619
+ "step": 780
620
+ },
621
+ {
622
+ "epoch": 0.10943343953456157,
623
+ "grad_norm": 13.488161087036133,
624
+ "learning_rate": 0.00019939120102208108,
625
+ "loss": 4.5099,
626
+ "step": 790
627
+ },
628
+ {
629
+ "epoch": 0.11081867294639147,
630
+ "grad_norm": 100.06396484375,
631
+ "learning_rate": 0.0001993751066038385,
632
+ "loss": 5.7456,
633
+ "step": 800
634
+ },
635
+ {
636
+ "epoch": 0.11081867294639147,
637
+ "eval_loss": 1.1453617811203003,
638
+ "eval_runtime": 46.7089,
639
+ "eval_samples_per_second": 3.126,
640
+ "eval_steps_per_second": 3.126,
641
+ "step": 800
642
  }
643
  ],
644
  "logging_steps": 10,
 
653
  "early_stopping_threshold": 0.0
654
  },
655
  "attributes": {
656
+ "early_stopping_patience_counter": 3
657
  }
658
  },
659
  "TrainerControl": {
 
662
  "should_evaluate": false,
663
  "should_log": false,
664
  "should_save": true,
665
+ "should_training_stop": true
666
  },
667
  "attributes": {}
668
  }
669
  },
670
+ "total_flos": 9.394411707039744e+16,
671
  "train_batch_size": 1,
672
  "trial_name": null,
673
  "trial_params": null