FormlessAI commited on
Commit
c8c4568
·
verified ·
1 Parent(s): db05d0b

Training in progress, step 360, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8bcebe942c9a8b99ef56e2c5446f9da9d60e5a25fc8214a7efdd1e72a32d1e8
3
  size 80792880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:145c54eac1ed297a2be387fb3da1ca50f3997b3a4ad127c5ef0a43dbc2b62306
3
  size 80792880
last-checkpoint/global_step360/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37321759e325e8f28673e9f84f6c30337b948e8504dcc735f2ea68ac33fc875a
3
+ size 50635184
last-checkpoint/global_step360/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab46a061ba4255ab5eb5bf9538f1e050241474360869db22a40d322d051341ca
3
+ size 50635248
last-checkpoint/global_step360/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cafea06a22eaea8301f18737cdaaad4292543e1aac3ac5467258b2e0f7fd110
3
+ size 50635248
last-checkpoint/global_step360/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a501920a2f435e14830b91b8794ade47441ece0f291b92eea363012e9eaf5c54
3
+ size 50635248
last-checkpoint/global_step360/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df26171df51686229492d5e3350890bc09c6eabfed73df3178d05a938d5edbbc
3
+ size 81040172
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step340
 
1
+ global_step360
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9ee4edfcbe4b77ace2b87141b8bd957560ff5cf1e9769ddb40bc10245e61753
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a6f1f4c8f24e0c563b224df07f972a9c03fd55885f468ac05d4936cb17a58f2
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1a8214cc5d40141141c8f71962d04bf800f999c32180dcd7ec3ae74c2cfd33a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f53fbbfc6f98216ba3e33af1efdac4f926f502586f2aaf9e56b638c8ee8c6ad2
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdcbe52a4efc157b4e1fec0014b1ef1c4a099e5954abc079e099fd3c7d5f7e77
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df243d851095642cd3b676fddc7012d10efdba379f7ecb37a141453a1d3a54dc
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ebda69adf36c903e083c2a9ebbd95cf2d544fccc910933692d5edd4959d5b9e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a873ca510bfc78f1c5c1aee0fe58b28053e6be23007bbef3d27b40cf337bee9f
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15b9006a0f681367a429404e804cfe8e2e383020b17f445e8fea72a4879f60b6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fcd358d907eb48a96def3a4b1ad085129b5a2436573e5c22da7e483872863ef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 330,
3
- "best_metric": 0.5116480588912964,
4
- "best_model_checkpoint": "miner_id_24/checkpoint-320",
5
- "epoch": 0.06859333232460786,
6
  "eval_steps": 10,
7
- "global_step": 340,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -764,6 +764,50 @@
764
  "eval_samples_per_second": 7.829,
765
  "eval_steps_per_second": 0.49,
766
  "step": 340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767
  }
768
  ],
769
  "logging_steps": 5,
@@ -778,7 +822,7 @@
778
  "early_stopping_threshold": 0.0
779
  },
780
  "attributes": {
781
- "early_stopping_patience_counter": 1
782
  }
783
  },
784
  "TrainerControl": {
@@ -792,7 +836,7 @@
792
  "attributes": {}
793
  }
794
  },
795
- "total_flos": 1.901385200214999e+18,
796
  "train_batch_size": 1,
797
  "trial_name": null,
798
  "trial_params": null
 
1
  {
2
+ "best_global_step": 360,
3
+ "best_metric": 0.5115170478820801,
4
+ "best_model_checkpoint": "miner_id_24/checkpoint-360",
5
+ "epoch": 0.07262823422605538,
6
  "eval_steps": 10,
7
+ "global_step": 360,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
764
  "eval_samples_per_second": 7.829,
765
  "eval_steps_per_second": 0.49,
766
  "step": 340
767
+ },
768
+ {
769
+ "epoch": 0.06960205779996974,
770
+ "grad_norm": 0.14681710302829742,
771
+ "learning_rate": 4.179195491219353e-05,
772
+ "loss": 0.6052,
773
+ "step": 345
774
+ },
775
+ {
776
+ "epoch": 0.07061078327533162,
777
+ "grad_norm": 0.21438910067081451,
778
+ "learning_rate": 4.1552103789056286e-05,
779
+ "loss": 0.4697,
780
+ "step": 350
781
+ },
782
+ {
783
+ "epoch": 0.07061078327533162,
784
+ "eval_loss": 0.5133654475212097,
785
+ "eval_runtime": 1026.2871,
786
+ "eval_samples_per_second": 8.586,
787
+ "eval_steps_per_second": 0.537,
788
+ "step": 350
789
+ },
790
+ {
791
+ "epoch": 0.07161950875069349,
792
+ "grad_norm": 0.0766027569770813,
793
+ "learning_rate": 4.130950877214683e-05,
794
+ "loss": 0.3593,
795
+ "step": 355
796
+ },
797
+ {
798
+ "epoch": 0.07262823422605538,
799
+ "grad_norm": 0.09048033505678177,
800
+ "learning_rate": 4.106421007719631e-05,
801
+ "loss": 0.4417,
802
+ "step": 360
803
+ },
804
+ {
805
+ "epoch": 0.07262823422605538,
806
+ "eval_loss": 0.5115170478820801,
807
+ "eval_runtime": 817.5154,
808
+ "eval_samples_per_second": 10.779,
809
+ "eval_steps_per_second": 0.674,
810
+ "step": 360
811
  }
812
  ],
813
  "logging_steps": 5,
 
822
  "early_stopping_threshold": 0.0
823
  },
824
  "attributes": {
825
+ "early_stopping_patience_counter": 0
826
  }
827
  },
828
  "TrainerControl": {
 
836
  "attributes": {}
837
  }
838
  },
839
+ "total_flos": 2.0132313884629402e+18,
840
  "train_batch_size": 1,
841
  "trial_name": null,
842
  "trial_params": null