radna commited on
Commit
ecdfec9
·
verified ·
1 Parent(s): d29ec2c

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-10/adapter_config.json +2 -2
  2. checkpoint-10/global_step10/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +3 -0
  3. checkpoint-10/global_step10/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3 -0
  4. checkpoint-10/global_step10/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3 -0
  5. checkpoint-10/global_step10/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3 -0
  6. checkpoint-10/global_step10/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3 -0
  7. checkpoint-10/global_step10/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3 -0
  8. checkpoint-10/global_step10/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3 -0
  9. checkpoint-10/global_step10/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3 -0
  10. checkpoint-10/rng_state_16.pth +3 -0
  11. checkpoint-10/rng_state_17.pth +3 -0
  12. checkpoint-10/rng_state_18.pth +3 -0
  13. checkpoint-10/rng_state_19.pth +3 -0
  14. checkpoint-10/rng_state_20.pth +3 -0
  15. checkpoint-10/rng_state_21.pth +3 -0
  16. checkpoint-10/rng_state_22.pth +3 -0
  17. checkpoint-10/rng_state_23.pth +3 -0
  18. checkpoint-10/trainer_state.json +16 -15
  19. checkpoint-10/training_args.bin +1 -1
  20. checkpoint-12/adapter_config.json +2 -2
  21. checkpoint-12/global_step12/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +3 -0
  22. checkpoint-12/global_step12/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3 -0
  23. checkpoint-12/global_step12/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3 -0
  24. checkpoint-12/global_step12/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3 -0
  25. checkpoint-12/global_step12/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3 -0
  26. checkpoint-12/global_step12/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3 -0
  27. checkpoint-12/global_step12/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3 -0
  28. checkpoint-12/global_step12/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3 -0
  29. checkpoint-12/rng_state_16.pth +3 -0
  30. checkpoint-12/rng_state_17.pth +3 -0
  31. checkpoint-12/rng_state_18.pth +3 -0
  32. checkpoint-12/rng_state_19.pth +3 -0
  33. checkpoint-12/rng_state_20.pth +3 -0
  34. checkpoint-12/rng_state_21.pth +3 -0
  35. checkpoint-12/rng_state_22.pth +3 -0
  36. checkpoint-12/rng_state_23.pth +3 -0
  37. checkpoint-12/trainer_state.json +19 -18
  38. checkpoint-12/training_args.bin +1 -1
  39. checkpoint-14/adapter_config.json +2 -2
  40. checkpoint-14/global_step14/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +3 -0
  41. checkpoint-14/global_step14/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3 -0
  42. checkpoint-14/global_step14/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3 -0
  43. checkpoint-14/global_step14/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3 -0
  44. checkpoint-14/global_step14/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3 -0
  45. checkpoint-14/global_step14/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3 -0
  46. checkpoint-14/global_step14/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3 -0
  47. checkpoint-14/global_step14/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3 -0
  48. checkpoint-14/rng_state_16.pth +3 -0
  49. checkpoint-14/rng_state_17.pth +3 -0
  50. checkpoint-14/rng_state_18.pth +3 -0
checkpoint-10/adapter_config.json CHANGED
@@ -24,10 +24,10 @@
24
  "revision": null,
25
  "target_modules": [
26
  "v_proj",
27
- "gate_proj",
28
  "k_proj",
 
29
  "o_proj",
30
- "up_proj",
31
  "q_proj",
32
  "down_proj"
33
  ],
 
24
  "revision": null,
25
  "target_modules": [
26
  "v_proj",
27
+ "up_proj",
28
  "k_proj",
29
+ "gate_proj",
30
  "o_proj",
 
31
  "q_proj",
32
  "down_proj"
33
  ],
checkpoint-10/global_step10/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5e0c4d0d8ca773e1777b1c1e16af020993d22551ebcb2adf7ea67ef8f375907
3
+ size 51616527
checkpoint-10/global_step10/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f47b10f14b06374d68fb3756cc8b6cd77dffb1391fc04529515fd4d49deb3bf7
3
+ size 51616015
checkpoint-10/global_step10/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f1e86a3a53100c463ed130b6a93ce58e45e73d7d9fbfa2a7af6b26c8c5fa6c2
3
+ size 51616527
checkpoint-10/global_step10/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d1d7f35387b368824418346713afcf0a735451c94078768eafb567f9875bfc
3
+ size 51616015
checkpoint-10/global_step10/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b916d2e992e1c8b8c1742bb867ae3c769323ba0e94c43c22fcd66fb0c079b96
3
+ size 51616527
checkpoint-10/global_step10/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ac51130567df50a5fa8c77c89e0d2ca250bfee52edd2b2f5719efb2ff36025
3
+ size 51616015
checkpoint-10/global_step10/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c62c5fb4b96163a311d4179e475297fe6ec1cf43cab95fd8d3c518d26357ac6
3
+ size 51616527
checkpoint-10/global_step10/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92df35f58ecc684b528fa303509abfbed79e2846e9d2b9fb16276adb90774df8
3
+ size 51616015
checkpoint-10/rng_state_16.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b407998b525b66bd58428611f35c2bcee3839aa1591cad9eb396102a75d9a3
3
+ size 16404
checkpoint-10/rng_state_17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3f1ee09d6bd6278d3475a8d7f654e2e7ab24e0f4347cea12b35fde685694d11
3
+ size 16404
checkpoint-10/rng_state_18.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:832de3be768fb57af62af75a77169dac73e185fb164da68db43d911b80b993e2
3
+ size 16404
checkpoint-10/rng_state_19.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7244b45e8524a60c2b661a2b80e2222715b34db86bc6e26e52ebf2f73ba9ab3f
3
+ size 16340
checkpoint-10/rng_state_20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da62553da56900d0a290bd3696bb3510f07ae2c803a5bbe8e38a1d1a9af68f9b
3
+ size 16340
checkpoint-10/rng_state_21.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c7be6022296f7fe848e5c9739de0e948e47c94c2fd582920669a74a3f417f61
3
+ size 16340
checkpoint-10/rng_state_22.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c2624e1cb099cd6788d65dd85a009386b8943d056ecafae70be7e564b94b66
3
+ size 16468
checkpoint-10/rng_state_23.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ae46f07c7c82831665b01e967d9f53c4263d35b0043e55d0abecab6d18fb581
3
+ size 16468
checkpoint-10/trainer_state.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "best_metric": 0.012996690347790718,
3
  "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-6",
4
  "epoch": 2.4210526315789473,
@@ -6,7 +7,7 @@
6
  "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
@@ -16,14 +17,14 @@
16
  "kl": 0.0,
17
  "learning_rate": 1.6666666666666667e-05,
18
  "loss": -0.11016345024108887,
19
- "memory(GiB)": 182.91,
20
  "response_clip_ratio": 0.11328125,
21
  "reward": -0.002658387296833098,
22
  "reward_std": 0.06134121119976044,
23
  "rewards/CosineReward": -0.0026579967816360295,
24
  "rewards/RepetitionPenalty": -3.8975886695880035e-07,
25
  "step": 1,
26
- "train_speed(iter/s)": 0.000242
27
  },
28
  {
29
  "clip_ratio": 0.0,
@@ -32,9 +33,9 @@
32
  "kl": 0.0,
33
  "learning_rate": 3.3333333333333335e-05,
34
  "loss": -0.11016345024108887,
35
- "memory(GiB)": 182.91,
36
  "step": 2,
37
- "train_speed(iter/s)": 0.000467
38
  },
39
  {
40
  "clip_ratio": 1.3441811461234465e-05,
@@ -44,7 +45,7 @@
44
  "kl": 9.50181856751442e-07,
45
  "learning_rate": 5e-05,
46
  "loss": -0.06604708731174469,
47
- "memory(GiB)": 182.91,
48
  "response_clip_ratio": 0.13671875,
49
  "reward": 0.0006296975770965219,
50
  "reward_std": 0.07172460854053497,
@@ -60,7 +61,7 @@
60
  "kl": 1.1101365089416504e-05,
61
  "learning_rate": 6.666666666666667e-05,
62
  "loss": -0.06727766245603561,
63
- "memory(GiB)": 182.91,
64
  "step": 4,
65
  "train_speed(iter/s)": 0.000458
66
  },
@@ -72,7 +73,7 @@
72
  "kl": 0.00017762184143066406,
73
  "learning_rate": 8.333333333333334e-05,
74
  "loss": -0.09315311908721924,
75
- "memory(GiB)": 182.91,
76
  "response_clip_ratio": 0.119140625,
77
  "reward": -0.005135859013535082,
78
  "reward_std": 0.07994875870645046,
@@ -86,9 +87,9 @@
86
  "grad_norm": 0.18263348937034607,
87
  "learning_rate": 0.0001,
88
  "loss": -0.1041698157787323,
89
- "memory(GiB)": 182.91,
90
  "step": 6,
91
- "train_speed(iter/s)": 0.000459
92
  },
93
  {
94
  "epoch": 1.4210526315789473,
@@ -101,7 +102,7 @@
101
  "eval_reward_std": 0.08769983053207397,
102
  "eval_rewards/CosineReward": 0.012996694073081017,
103
  "eval_rewards/RepetitionPenalty": 0.0,
104
- "eval_runtime": 1030.1127,
105
  "eval_samples_per_second": 0.001,
106
  "eval_steps_per_second": 0.001,
107
  "step": 6
@@ -114,7 +115,7 @@
114
  "kl": 0.017406463623046875,
115
  "learning_rate": 9.991540791356342e-05,
116
  "loss": -0.051375165581703186,
117
- "memory(GiB)": 182.91,
118
  "response_clip_ratio": 0.1484375,
119
  "reward": 0.004909618757665157,
120
  "reward_std": 0.08167182095348835,
@@ -130,7 +131,7 @@
130
  "kl": 0.089599609375,
131
  "learning_rate": 9.966191788709716e-05,
132
  "loss": -0.05105742812156677,
133
- "memory(GiB)": 182.91,
134
  "step": 8,
135
  "train_speed(iter/s)": 0.000433
136
  },
@@ -142,7 +143,7 @@
142
  "kl": 0.0963134765625,
143
  "learning_rate": 9.924038765061042e-05,
144
  "loss": -0.05842069163918495,
145
- "memory(GiB)": 182.91,
146
  "response_clip_ratio": 0.255859375,
147
  "reward": 0.03643610421568155,
148
  "reward_std": 0.11898956261575222,
@@ -158,7 +159,7 @@
158
  "kl": 0.1185302734375,
159
  "learning_rate": 9.865224352899119e-05,
160
  "loss": -0.06491819024085999,
161
- "memory(GiB)": 182.91,
162
  "step": 10,
163
  "train_speed(iter/s)": 0.000436
164
  }
 
1
  {
2
+ "best_global_step": 6,
3
  "best_metric": 0.012996690347790718,
4
  "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-6",
5
  "epoch": 2.4210526315789473,
 
7
  "global_step": 10,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
+ "is_world_process_zero": false,
11
  "log_history": [
12
  {
13
  "clip_ratio": 0.0,
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 176.98,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
24
  "rewards/CosineReward": -0.0026579967816360295,
25
  "rewards/RepetitionPenalty": -3.8975886695880035e-07,
26
  "step": 1,
27
+ "train_speed(iter/s)": 0.000241
28
  },
29
  {
30
  "clip_ratio": 0.0,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 176.98,
37
  "step": 2,
38
+ "train_speed(iter/s)": 0.000466
39
  },
40
  {
41
  "clip_ratio": 1.3441811461234465e-05,
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 176.98,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 176.98,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 176.98,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 176.98,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1126,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 176.98,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 176.98,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 176.98,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 176.98,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  }
checkpoint-10/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044
3
  size 9809
checkpoint-12/adapter_config.json CHANGED
@@ -24,10 +24,10 @@
24
  "revision": null,
25
  "target_modules": [
26
  "v_proj",
27
- "gate_proj",
28
  "k_proj",
 
29
  "o_proj",
30
- "up_proj",
31
  "q_proj",
32
  "down_proj"
33
  ],
 
24
  "revision": null,
25
  "target_modules": [
26
  "v_proj",
27
+ "up_proj",
28
  "k_proj",
29
+ "gate_proj",
30
  "o_proj",
 
31
  "q_proj",
32
  "down_proj"
33
  ],
checkpoint-12/global_step12/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bd0227e86cfa3a81c73db4156871e17a6b1c24313e6d6ffbcc435aef76b65fb
3
+ size 51616527
checkpoint-12/global_step12/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00fee1025300c4be32a71428d645d217dd877f4c4e2025f4d8dc86dfde602a11
3
+ size 51616015
checkpoint-12/global_step12/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e312e427ff6bd3b12f7c96cbe20aa4ad739c91ce076c0df6f0bbfcc2a6f5b29
3
+ size 51616527
checkpoint-12/global_step12/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:907c8fe038878cb9bc36c90b9f84b55ad238879a85caa93b684f850861b26720
3
+ size 51616015
checkpoint-12/global_step12/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f96b95334ab00b824837fe37b1c856d308d6de83bdb23904228ea1806e606e2
3
+ size 51616527
checkpoint-12/global_step12/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e122318d8256d9c002d012f93d137ee3776de45e41053b5a5c708b6713c7db26
3
+ size 51616015
checkpoint-12/global_step12/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c343402a0e9b88311d79d2662bbf9309541944289b864369f9c50b8f422266ba
3
+ size 51616527
checkpoint-12/global_step12/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75b0fa393d6387a17fba2b052c770f27c183d6c94b5e9986a0a06aba0d5f34a8
3
+ size 51616015
checkpoint-12/rng_state_16.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62801652da7bb7998504dee1271843df4ed71ed6d734848c4f9b907228445fb8
3
+ size 16404
checkpoint-12/rng_state_17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18cd9119c5d34b9ee7e516544a59dd814b9a3256d261250524ec75c9cadc31a3
3
+ size 16404
checkpoint-12/rng_state_18.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc2c3f80c6accb5324f8f6fc70ceda29815877d430783249b05dfef8a4de0082
3
+ size 16404
checkpoint-12/rng_state_19.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13ed3ef9748edd204f4c0f8c55f7183358e3201779080ed05d1776ec253d9d6e
3
+ size 16340
checkpoint-12/rng_state_20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df2446d98b74607d5086a9337f99092c7637cc524dd36756c24a17112a02afc0
3
+ size 16340
checkpoint-12/rng_state_21.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cbf14b3f1cda4c5d327abe5858e749c8e3e7539654f891bfa98dae142e2a5f1
3
+ size 16340
checkpoint-12/rng_state_22.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74629e3ba1b24c62a30a12c5d18c1f67e3f6a52d02e45bfd5625c3db5cd3ee98
3
+ size 16468
checkpoint-12/rng_state_23.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1660fe1c86d36b187818def78122b1c82cd7b56b1b91b35897e6214503f7dc8
3
+ size 16468
checkpoint-12/trainer_state.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "best_metric": 0.03234308212995529,
3
  "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-12",
4
  "epoch": 2.8421052631578947,
@@ -6,7 +7,7 @@
6
  "global_step": 12,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
@@ -16,14 +17,14 @@
16
  "kl": 0.0,
17
  "learning_rate": 1.6666666666666667e-05,
18
  "loss": -0.11016345024108887,
19
- "memory(GiB)": 182.91,
20
  "response_clip_ratio": 0.11328125,
21
  "reward": -0.002658387296833098,
22
  "reward_std": 0.06134121119976044,
23
  "rewards/CosineReward": -0.0026579967816360295,
24
  "rewards/RepetitionPenalty": -3.8975886695880035e-07,
25
  "step": 1,
26
- "train_speed(iter/s)": 0.000242
27
  },
28
  {
29
  "clip_ratio": 0.0,
@@ -32,9 +33,9 @@
32
  "kl": 0.0,
33
  "learning_rate": 3.3333333333333335e-05,
34
  "loss": -0.11016345024108887,
35
- "memory(GiB)": 182.91,
36
  "step": 2,
37
- "train_speed(iter/s)": 0.000467
38
  },
39
  {
40
  "clip_ratio": 1.3441811461234465e-05,
@@ -44,7 +45,7 @@
44
  "kl": 9.50181856751442e-07,
45
  "learning_rate": 5e-05,
46
  "loss": -0.06604708731174469,
47
- "memory(GiB)": 182.91,
48
  "response_clip_ratio": 0.13671875,
49
  "reward": 0.0006296975770965219,
50
  "reward_std": 0.07172460854053497,
@@ -60,7 +61,7 @@
60
  "kl": 1.1101365089416504e-05,
61
  "learning_rate": 6.666666666666667e-05,
62
  "loss": -0.06727766245603561,
63
- "memory(GiB)": 182.91,
64
  "step": 4,
65
  "train_speed(iter/s)": 0.000458
66
  },
@@ -72,7 +73,7 @@
72
  "kl": 0.00017762184143066406,
73
  "learning_rate": 8.333333333333334e-05,
74
  "loss": -0.09315311908721924,
75
- "memory(GiB)": 182.91,
76
  "response_clip_ratio": 0.119140625,
77
  "reward": -0.005135859013535082,
78
  "reward_std": 0.07994875870645046,
@@ -86,9 +87,9 @@
86
  "grad_norm": 0.18263348937034607,
87
  "learning_rate": 0.0001,
88
  "loss": -0.1041698157787323,
89
- "memory(GiB)": 182.91,
90
  "step": 6,
91
- "train_speed(iter/s)": 0.000459
92
  },
93
  {
94
  "epoch": 1.4210526315789473,
@@ -101,7 +102,7 @@
101
  "eval_reward_std": 0.08769983053207397,
102
  "eval_rewards/CosineReward": 0.012996694073081017,
103
  "eval_rewards/RepetitionPenalty": 0.0,
104
- "eval_runtime": 1030.1127,
105
  "eval_samples_per_second": 0.001,
106
  "eval_steps_per_second": 0.001,
107
  "step": 6
@@ -114,7 +115,7 @@
114
  "kl": 0.017406463623046875,
115
  "learning_rate": 9.991540791356342e-05,
116
  "loss": -0.051375165581703186,
117
- "memory(GiB)": 182.91,
118
  "response_clip_ratio": 0.1484375,
119
  "reward": 0.004909618757665157,
120
  "reward_std": 0.08167182095348835,
@@ -130,7 +131,7 @@
130
  "kl": 0.089599609375,
131
  "learning_rate": 9.966191788709716e-05,
132
  "loss": -0.05105742812156677,
133
- "memory(GiB)": 182.91,
134
  "step": 8,
135
  "train_speed(iter/s)": 0.000433
136
  },
@@ -142,7 +143,7 @@
142
  "kl": 0.0963134765625,
143
  "learning_rate": 9.924038765061042e-05,
144
  "loss": -0.05842069163918495,
145
- "memory(GiB)": 182.91,
146
  "response_clip_ratio": 0.255859375,
147
  "reward": 0.03643610421568155,
148
  "reward_std": 0.11898956261575222,
@@ -158,7 +159,7 @@
158
  "kl": 0.1185302734375,
159
  "learning_rate": 9.865224352899119e-05,
160
  "loss": -0.06491819024085999,
161
- "memory(GiB)": 182.91,
162
  "step": 10,
163
  "train_speed(iter/s)": 0.000436
164
  },
@@ -170,7 +171,7 @@
170
  "kl": 0.1275634765625,
171
  "learning_rate": 9.789947561577445e-05,
172
  "loss": -0.04600231721997261,
173
- "memory(GiB)": 182.91,
174
  "response_clip_ratio": 0.361328125,
175
  "reward": 0.023204635945148766,
176
  "reward_std": 0.10593634657561779,
@@ -184,7 +185,7 @@
184
  "grad_norm": 0.05781339108943939,
185
  "learning_rate": 9.698463103929542e-05,
186
  "loss": -0.05069056898355484,
187
- "memory(GiB)": 182.91,
188
  "step": 12,
189
  "train_speed(iter/s)": 0.000439
190
  },
@@ -199,7 +200,7 @@
199
  "eval_reward_std": 0.10685288906097412,
200
  "eval_rewards/CosineReward": 0.03234308212995529,
201
  "eval_rewards/RepetitionPenalty": 0.0,
202
- "eval_runtime": 1025.9041,
203
  "eval_samples_per_second": 0.001,
204
  "eval_steps_per_second": 0.001,
205
  "step": 12
 
1
  {
2
+ "best_global_step": 12,
3
  "best_metric": 0.03234308212995529,
4
  "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-12",
5
  "epoch": 2.8421052631578947,
 
7
  "global_step": 12,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
+ "is_world_process_zero": false,
11
  "log_history": [
12
  {
13
  "clip_ratio": 0.0,
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 176.98,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
24
  "rewards/CosineReward": -0.0026579967816360295,
25
  "rewards/RepetitionPenalty": -3.8975886695880035e-07,
26
  "step": 1,
27
+ "train_speed(iter/s)": 0.000241
28
  },
29
  {
30
  "clip_ratio": 0.0,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 176.98,
37
  "step": 2,
38
+ "train_speed(iter/s)": 0.000466
39
  },
40
  {
41
  "clip_ratio": 1.3441811461234465e-05,
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 176.98,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 176.98,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 176.98,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 176.98,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1126,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 176.98,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 176.98,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 176.98,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 176.98,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 187.02,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 187.02,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9048,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
checkpoint-12/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044
3
  size 9809
checkpoint-14/adapter_config.json CHANGED
@@ -24,10 +24,10 @@
24
  "revision": null,
25
  "target_modules": [
26
  "v_proj",
27
- "gate_proj",
28
  "k_proj",
 
29
  "o_proj",
30
- "up_proj",
31
  "q_proj",
32
  "down_proj"
33
  ],
 
24
  "revision": null,
25
  "target_modules": [
26
  "v_proj",
27
+ "up_proj",
28
  "k_proj",
29
+ "gate_proj",
30
  "o_proj",
 
31
  "q_proj",
32
  "down_proj"
33
  ],
checkpoint-14/global_step14/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0582b13ce2bea3a18df7b26c719cd0004488f486d8ae9583d5daf80d4ed897b
3
+ size 51616527
checkpoint-14/global_step14/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba0ff89afd35d3229ab9baf52fbe54c6eb8d2a060490c99679294172d1b398be
3
+ size 51616015
checkpoint-14/global_step14/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:062c8b969c5bf45384e64ab7c7075d2e2c3f94c9033f685ab032c794db7df520
3
+ size 51616527
checkpoint-14/global_step14/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9997949f600b69ead3be0acb5766b9a57f987e0637153446bebf5afe99ccf24b
3
+ size 51616015
checkpoint-14/global_step14/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c840ea51b03be2c2807a3fff2f59fe9ff212f79551e415314348227714ca42d
3
+ size 51616527
checkpoint-14/global_step14/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f165f270b45fc392a56019942ff0090f901eacb197b225db74fc2c0a943122a
3
+ size 51616015
checkpoint-14/global_step14/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7392ded59260dfd58da3dd886ff43616633625a757815568c03deab0b9f2cb76
3
+ size 51616527
checkpoint-14/global_step14/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a132cb9ca836ba4cca2f97c715e34ad55464f19ca87edc3da52d092cc975bf5
3
+ size 51616015
checkpoint-14/rng_state_16.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d840b0364c1a570779728649f1039d445383c4edac5486e3a66c94317e1916b
3
+ size 16404
checkpoint-14/rng_state_17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510dc95c4f2633cbc2b86c5ed1c9c1d7e471489f547625ceb3669eb644d31848
3
+ size 16404
checkpoint-14/rng_state_18.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93bdcb0f7609107d8eb7565bedf4a35aec622814b359b9877dff7cad7e714b95
3
+ size 16404