baby-dev commited on
Commit
01a4ee0
·
verified ·
1 Parent(s): 38822e4

Training in progress, step 3750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cbdc2b9825166869a6fac2e63a5a97fa4b37b5b84f026246356242c3f1b4f3c
3
  size 48552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15c45c6e7cae74c7c01cc815be82d11c04abfc37e552bbb2e1edeb577ca83b42
3
  size 48552
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3ccbbe76b5289cfdf27cae32ff8667d16da79bd900a109b903abe92fb00891a
3
  size 107046
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb068897f774dbfee50ed4f9898a74eb932cd172b2dae12cdd41b44fa863b09
3
  size 107046
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aebc4da58173c003f7aad6d162278501a1be585d313d2d5f53369f2e219408a6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f976ec367a6fae76ed6062feb3c28d822959b5991b851f1f820f4aede451cc53
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0afe5275623b07807c5bf749b62f86b7a1ab63345ab37a5eef17970d4cb1036
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f949a4e89c1a57ac6e6dbdd046680bfbcaae2a1fc8d3bcb9e95ba396ae344e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 11.898159980773926,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-3450",
4
- "epoch": 29.93762993762994,
5
  "eval_steps": 150,
6
- "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -711,6 +711,35 @@
711
  "eval_samples_per_second": 225.366,
712
  "eval_steps_per_second": 57.446,
713
  "step": 3600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
  }
715
  ],
716
  "logging_steps": 50,
@@ -725,7 +754,7 @@
725
  "early_stopping_threshold": 0.0
726
  },
727
  "attributes": {
728
- "early_stopping_patience_counter": 1
729
  }
730
  },
731
  "TrainerControl": {
@@ -734,12 +763,12 @@
734
  "should_evaluate": false,
735
  "should_log": false,
736
  "should_save": true,
737
- "should_training_stop": false
738
  },
739
  "attributes": {}
740
  }
741
  },
742
- "total_flos": 4686500364288.0,
743
  "train_batch_size": 4,
744
  "trial_name": null,
745
  "trial_params": null
 
1
  {
2
  "best_metric": 11.898159980773926,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-3450",
4
+ "epoch": 31.185031185031185,
5
  "eval_steps": 150,
6
+ "global_step": 3750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
711
  "eval_samples_per_second": 225.366,
712
  "eval_steps_per_second": 57.446,
713
  "step": 3600
714
+ },
715
+ {
716
+ "epoch": 30.353430353430355,
717
+ "grad_norm": 0.07721268385648727,
718
+ "learning_rate": 3.956689608863522e-05,
719
+ "loss": 12.0689,
720
+ "step": 3650
721
+ },
722
+ {
723
+ "epoch": 30.76923076923077,
724
+ "grad_norm": 0.05511786788702011,
725
+ "learning_rate": 3.87275474231996e-05,
726
+ "loss": 11.8995,
727
+ "step": 3700
728
+ },
729
+ {
730
+ "epoch": 31.185031185031185,
731
+ "grad_norm": 0.0616980604827404,
732
+ "learning_rate": 3.788819875776398e-05,
733
+ "loss": 12.0734,
734
+ "step": 3750
735
+ },
736
+ {
737
+ "epoch": 31.185031185031185,
738
+ "eval_loss": 11.89826488494873,
739
+ "eval_runtime": 0.4524,
740
+ "eval_samples_per_second": 225.481,
741
+ "eval_steps_per_second": 57.475,
742
+ "step": 3750
743
  }
744
  ],
745
  "logging_steps": 50,
 
754
  "early_stopping_threshold": 0.0
755
  },
756
  "attributes": {
757
+ "early_stopping_patience_counter": 2
758
  }
759
  },
760
  "TrainerControl": {
 
763
  "should_evaluate": false,
764
  "should_log": false,
765
  "should_save": true,
766
+ "should_training_stop": true
767
  },
768
  "attributes": {}
769
  }
770
  },
771
+ "total_flos": 4881642258432.0,
772
  "train_batch_size": 4,
773
  "trial_name": null,
774
  "trial_params": null