GhostScientist commited on
Commit
993e5f3
·
verified ·
1 Parent(s): fbd21a9

Training in progress, step 735, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5556c42afd4cbcccc6b8c7daeeed304b98a9f69cff50dce9ba7b205f95dd41cd
3
  size 645975704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bef26c2a2853a24f1b1c9805d18570a11582a968162d8ec559b2eb99caf6a905
3
  size 645975704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5729627a47d753fb4652198309b47c4ee8d996325418926805a63ba81123f555
3
  size 1292182139
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11d1a002f539fc5bc604de71a5a7d6f1fa4338b74515c7488f80fcd90cbd2908
3
  size 1292182139
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69507df2b8a5c1c7c0f2966d469e9dc7a76d78b5c8dd43ad70533a04eda7e6f9
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91bfd02584ff4a4b7257c19902379baf05dddfa2f12bf1176cb250ae41bd5889
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f290ffa291ab5f169b101e10cc8f320158a20b2ff211b18ae56cf14b41a64b36
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:010a2386ada1bde03d03b30051618785b4ef8543babe46fbc7b87cf0823779a5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 100,
3
  "best_metric": 0.3715035319328308,
4
  "best_model_checkpoint": "qwen2.5-coder-7b-agentic-cot/checkpoint-100",
5
- "epoch": 14.290155440414507,
6
  "eval_steps": 50,
7
- "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -862,6 +862,36 @@
862
  "eval_samples_per_second": 1.873,
863
  "eval_steps_per_second": 0.261,
864
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
865
  }
866
  ],
867
  "logging_steps": 10,
@@ -876,12 +906,12 @@
876
  "should_evaluate": false,
877
  "should_log": false,
878
  "should_save": true,
879
- "should_training_stop": false
880
  },
881
  "attributes": {}
882
  }
883
  },
884
- "total_flos": 9.803058972717158e+17,
885
  "train_batch_size": 2,
886
  "trial_name": null,
887
  "trial_params": null
 
2
  "best_global_step": 100,
3
  "best_metric": 0.3715035319328308,
4
  "best_model_checkpoint": "qwen2.5-coder-7b-agentic-cot/checkpoint-100",
5
+ "epoch": 15.0,
6
  "eval_steps": 50,
7
+ "global_step": 735,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
862
  "eval_samples_per_second": 1.873,
863
  "eval_steps_per_second": 0.261,
864
  "step": 700
865
+ },
866
+ {
867
+ "entropy": 0.005753540876321494,
868
+ "epoch": 14.49740932642487,
869
+ "grad_norm": 0.01792309246957302,
870
+ "learning_rate": 6.839272934511143e-07,
871
+ "loss": 0.0027,
872
+ "mean_token_accuracy": 0.9992820754647255,
873
+ "num_tokens": 22815368.0,
874
+ "step": 710
875
+ },
876
+ {
877
+ "entropy": 0.005742728849872947,
878
+ "epoch": 14.704663212435234,
879
+ "grad_norm": 0.017534621059894562,
880
+ "learning_rate": 2.591857829770672e-07,
881
+ "loss": 0.0027,
882
+ "mean_token_accuracy": 0.9992718860507012,
883
+ "num_tokens": 23142299.0,
884
+ "step": 720
885
+ },
886
+ {
887
+ "entropy": 0.005674040759913623,
888
+ "epoch": 14.911917098445596,
889
+ "grad_norm": 0.016848629340529442,
890
+ "learning_rate": 3.6461540339682855e-08,
891
+ "loss": 0.0026,
892
+ "mean_token_accuracy": 0.9993263006210327,
893
+ "num_tokens": 23467258.0,
894
+ "step": 730
895
  }
896
  ],
897
  "logging_steps": 10,
 
906
  "should_evaluate": false,
907
  "should_log": false,
908
  "should_save": true,
909
+ "should_training_stop": true
910
  },
911
  "attributes": {}
912
  }
913
  },
914
+ "total_flos": 1.029005584744704e+18,
915
  "train_batch_size": 2,
916
  "trial_name": null,
917
  "trial_params": null