diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-10/adapter_config.json +++ b/checkpoint-10/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json index d9543360277532e9321b29edfc3279b65f6493cb..0d8d96672052972cd01d7af2ed175e36d6f9d5e0 100644 --- a/checkpoint-10/trainer_state.json +++ b/checkpoint-10/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 6, "best_metric": 0.012996690347790718, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-6", "epoch": 2.4210526315789473, @@ -6,7 +7,7 @@ "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -142,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -158,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 } diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-10/training_args.bin +++ b/checkpoint-10/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-12/adapter_config.json b/checkpoint-12/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-12/adapter_config.json +++ b/checkpoint-12/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-12/trainer_state.json b/checkpoint-12/trainer_state.json index 63af88b38df0307c22be3faf58351ed68fccbb37..e2fc6f34722a82aeb5916c74ff2a4a44a712399b 100644 --- a/checkpoint-12/trainer_state.json +++ b/checkpoint-12/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 12, "best_metric": 0.03234308212995529, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-12", "epoch": 2.8421052631578947, @@ -6,7 +7,7 @@ "global_step": 12, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -142,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -158,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -170,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -184,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -199,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9041, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 diff --git a/checkpoint-12/training_args.bin b/checkpoint-12/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-12/training_args.bin +++ b/checkpoint-12/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-14/adapter_config.json b/checkpoint-14/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-14/adapter_config.json +++ b/checkpoint-14/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-14/trainer_state.json b/checkpoint-14/trainer_state.json index 0283ff9c783b7aa5ca912544b2dd35fcf23f9a99..bc72987d815f3ee4b46df1bd1a0e97cfd6670b2f 100644 --- a/checkpoint-14/trainer_state.json +++ b/checkpoint-14/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 12, "best_metric": 0.03234308212995529, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-12", "epoch": 3.4210526315789473, @@ -6,7 +7,7 @@ "global_step": 14, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -142,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -158,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -170,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -184,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -199,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9041, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -212,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -228,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 } diff --git a/checkpoint-14/training_args.bin b/checkpoint-14/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-14/training_args.bin +++ b/checkpoint-14/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-16/adapter_config.json b/checkpoint-16/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-16/adapter_config.json +++ b/checkpoint-16/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-16/trainer_state.json b/checkpoint-16/trainer_state.json index 9227c90abf65fb0517b0d1c7eb78e18f0426365e..936f05557643f63325a6c93d12f959ac26c3cd7f 100644 --- a/checkpoint-16/trainer_state.json +++ b/checkpoint-16/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 12, "best_metric": 0.03234308212995529, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-12", "epoch": 3.8421052631578947, @@ -6,7 +7,7 @@ "global_step": 16, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -142,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -158,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -170,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -184,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -199,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9041, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -212,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -228,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -240,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -256,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 } diff --git a/checkpoint-16/training_args.bin b/checkpoint-16/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-16/training_args.bin +++ b/checkpoint-16/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-18/adapter_config.json b/checkpoint-18/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-18/adapter_config.json +++ b/checkpoint-18/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-18/trainer_state.json b/checkpoint-18/trainer_state.json index 5e552ea3183048034e8097ff86a4f6d1c899830d..4581e6296e397edb0e50965c1236e116214abdf4 100644 --- a/checkpoint-18/trainer_state.json +++ b/checkpoint-18/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 18, "best_metric": 0.03729328140616417, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-18", "epoch": 4.421052631578947, @@ -6,7 +7,7 @@ "global_step": 18, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -142,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -158,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -170,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -184,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -199,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9041, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -212,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -228,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -240,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -256,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -268,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -282,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -297,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.231, + "eval_runtime": 1041.2321, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 diff --git a/checkpoint-18/training_args.bin b/checkpoint-18/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-18/training_args.bin +++ b/checkpoint-18/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-2/adapter_config.json b/checkpoint-2/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-2/adapter_config.json +++ b/checkpoint-2/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-2/trainer_state.json b/checkpoint-2/trainer_state.json index abb60dd05407f920c1cb0c23f346741d8e8e4205..cc36da2c33e4a28606796c2458a219824cd1bd43 100644 --- a/checkpoint-2/trainer_state.json +++ b/checkpoint-2/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.42105263157894735, @@ -6,7 +7,7 @@ "global_step": 2, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 } ], "logging_steps": 1, diff --git a/checkpoint-2/training_args.bin b/checkpoint-2/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-2/training_args.bin +++ b/checkpoint-2/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-20/adapter_config.json +++ b/checkpoint-20/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json index 939e242f4ac78f4a828aaf48a8eafdabdd7268dd..a6123962584e4b002aab697407126e40d2fc1302 100644 --- a/checkpoint-20/trainer_state.json +++ b/checkpoint-20/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 18, "best_metric": 0.03729328140616417, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-18", "epoch": 4.842105263157895, @@ -6,7 +7,7 @@ "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -142,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -158,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -170,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -184,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -199,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9041, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -212,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -228,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -240,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -256,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -268,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -282,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -297,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.231, + "eval_runtime": 1041.2321, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -310,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -326,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 20, "train_speed(iter/s)": 0.000424 } diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-20/training_args.bin +++ b/checkpoint-20/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-22/adapter_config.json b/checkpoint-22/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-22/adapter_config.json +++ b/checkpoint-22/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-22/trainer_state.json b/checkpoint-22/trainer_state.json index f25a50fd2117664a679744f3e1068c255dd295e1..00588ae573da776f25af27a488a14e7cc18fa07b 100644 --- a/checkpoint-22/trainer_state.json +++ b/checkpoint-22/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 18, "best_metric": 0.03729328140616417, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-18", "epoch": 5.421052631578947, @@ -6,7 +7,7 @@ "global_step": 22, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -142,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -158,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -170,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -184,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -199,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9041, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -212,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -228,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -240,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -256,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -268,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -282,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -297,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.231, + "eval_runtime": 1041.2321, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -310,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -326,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -338,14 +339,14 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, "rewards/CosineReward": 0.049969930201768875, "rewards/RepetitionPenalty": -1.1864573679076784e-07, "step": 21, - "train_speed(iter/s)": 0.000408 + "train_speed(iter/s)": 0.000407 }, { "clip_ratio": 5.869188044016482e-05, @@ -354,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 22, "train_speed(iter/s)": 0.000426 } diff --git a/checkpoint-22/training_args.bin b/checkpoint-22/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-22/training_args.bin +++ b/checkpoint-22/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-24/adapter_config.json b/checkpoint-24/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-24/adapter_config.json +++ b/checkpoint-24/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-24/trainer_state.json b/checkpoint-24/trainer_state.json index 28c98e01019184c604a3825cec4b497fc6482848..71fe7f2e41930e700b7ad6623db68a42930e3853 100644 --- a/checkpoint-24/trainer_state.json +++ b/checkpoint-24/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 24, "best_metric": 0.04339282959699631, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-24", "epoch": 5.842105263157895, @@ -6,7 +7,7 @@ "global_step": 24, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -142,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -158,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -170,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -184,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -199,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9041, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -212,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -228,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -240,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -256,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -268,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -282,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -297,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.231, + "eval_runtime": 1041.2321, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -310,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -326,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -338,14 +339,14 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, "rewards/CosineReward": 0.049969930201768875, "rewards/RepetitionPenalty": -1.1864573679076784e-07, "step": 21, - "train_speed(iter/s)": 0.000408 + "train_speed(iter/s)": 0.000407 }, { "clip_ratio": 5.869188044016482e-05, @@ -354,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -366,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -380,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -395,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0632, + "eval_runtime": 1045.0616, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 diff --git a/checkpoint-24/training_args.bin b/checkpoint-24/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-24/training_args.bin +++ b/checkpoint-24/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-26/adapter_config.json b/checkpoint-26/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-26/adapter_config.json +++ b/checkpoint-26/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-26/trainer_state.json b/checkpoint-26/trainer_state.json index 5b072b987db923c7897214c7f630cb7ed87c56ab..001905ec4de0241ac02cb975479cb8b9bb85f854 100644 --- a/checkpoint-26/trainer_state.json +++ b/checkpoint-26/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 24, "best_metric": 0.04339282959699631, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-24", "epoch": 6.421052631578947, @@ -6,7 +7,7 @@ "global_step": 26, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -142,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -158,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -170,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -184,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -199,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9041, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -212,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -228,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -240,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -256,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -268,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -282,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -297,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.231, + "eval_runtime": 1041.2321, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -310,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -326,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -338,14 +339,14 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, "rewards/CosineReward": 0.049969930201768875, "rewards/RepetitionPenalty": -1.1864573679076784e-07, "step": 21, - "train_speed(iter/s)": 0.000408 + "train_speed(iter/s)": 0.000407 }, { "clip_ratio": 5.869188044016482e-05, @@ -354,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -366,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -380,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -395,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0632, + "eval_runtime": 1045.0616, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 @@ -408,7 +409,7 @@ "kl": 0.1800537109375, "learning_rate": 7.243995901002312e-05, "loss": -0.02097315341234207, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.6171875, "reward": 0.03010205877944827, "reward_std": 0.10742511600255966, @@ -424,7 +425,7 @@ "kl": 0.18408203125, "learning_rate": 6.980398830195785e-05, "loss": -0.02103913575410843, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 26, "train_speed(iter/s)": 0.000421 } diff --git a/checkpoint-26/training_args.bin b/checkpoint-26/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-26/training_args.bin +++ b/checkpoint-26/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-28/adapter_config.json b/checkpoint-28/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-28/adapter_config.json +++ b/checkpoint-28/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-28/trainer_state.json b/checkpoint-28/trainer_state.json index 148e40ed0b36a607a26cd48bcc7a44e47f551f9b..5ffa656555aedc46214a3328f7b650204c2802b0 100644 --- a/checkpoint-28/trainer_state.json +++ b/checkpoint-28/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 24, "best_metric": 0.04339282959699631, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-24", "epoch": 6.842105263157895, @@ -6,7 +7,7 @@ "global_step": 28, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -142,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -158,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -170,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -184,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -199,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9041, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -212,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -228,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -240,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -256,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -268,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -282,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -297,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.231, + "eval_runtime": 1041.2321, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -310,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -326,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -338,14 +339,14 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, "rewards/CosineReward": 0.049969930201768875, "rewards/RepetitionPenalty": -1.1864573679076784e-07, "step": 21, - "train_speed(iter/s)": 0.000408 + "train_speed(iter/s)": 0.000407 }, { "clip_ratio": 5.869188044016482e-05, @@ -354,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -366,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -380,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -395,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0632, + "eval_runtime": 1045.0616, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 @@ -408,7 +409,7 @@ "kl": 0.1800537109375, "learning_rate": 7.243995901002312e-05, "loss": -0.02097315341234207, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.6171875, "reward": 0.03010205877944827, "reward_std": 0.10742511600255966, @@ -424,7 +425,7 @@ "kl": 0.18408203125, "learning_rate": 6.980398830195785e-05, "loss": -0.02103913575410843, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 26, "train_speed(iter/s)": 0.000421 }, @@ -436,7 +437,7 @@ "kl": 0.174560546875, "learning_rate": 6.710100716628344e-05, "loss": -0.03593946248292923, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.513671875, "reward": 0.04752760287374258, "reward_std": 0.14935147762298584, @@ -452,7 +453,7 @@ "kl": 0.182373046875, "learning_rate": 6.434016163555452e-05, "loss": -0.03595500811934471, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 28, "train_speed(iter/s)": 0.000422 } diff --git a/checkpoint-28/training_args.bin b/checkpoint-28/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-28/training_args.bin +++ b/checkpoint-28/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-30/adapter_config.json +++ b/checkpoint-30/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb43d5dca8b92ffceb8ebba4404fc2367166522e --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e55637d10a606b1c4548d11222cf2203be3732556568beb057466cdc664ad4d +size 51616527 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c61c965ed1686d63ae8ed0a1b0ffd7ccdc2dbe90 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f918a8d78d02a6bb0c3b78aa535ba635edd803e57b1d29a398f8044ee1f23d +size 51616015 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa1f54b15730e35bc2f0078b343ee07dfbff372c --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e9a17698868042b6c5d413425d6800e585ce6747e333ca3c1f237999bee440 +size 51616527 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cc8d4bcac4202781b16077c0ad6b450168c0901 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5466499266f4b34f807e3c8219f5d1dc961145b81040242cdc2604123afb29f +size 51616015 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3860f114cded2614ea8dd90d8b72910cf7f05130 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cf7bbc813d8c4a9ddd84f99f272e1a3597a0aedd614f42be8b26292f36bd0d4 +size 51616527 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..65e9bfa1247341b50cecf3154415bee7f7e50a60 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0b43f39939991ea04c843fb1af82098f35e53f66d1ed284bfe74b2b156e3c90 +size 51616015 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..df4634055c355c3db2e9b2f7a2e6a765d9039192 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c6064fa63ee43597a575223e5d27d1a9f78c46ca5d2249d2c286a8e11546906 +size 51616517 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7266fe615ba20e88e80ce21a5fd0a6a454a4559f --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e35fb26447a6dab0120c7bb123daf6c56d682fa095ba3520478075d361f49 +size 51616005 diff --git a/checkpoint-30/rng_state_10.pth b/checkpoint-30/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..965770ed4061278fbc6a380333ca53e206ccdc73 --- /dev/null +++ b/checkpoint-30/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e2c4ae8a7a72d5fa3fffa5b1790f7c05f343652ff279b267cb02c1ea1450019 +size 16404 diff --git a/checkpoint-30/rng_state_11.pth b/checkpoint-30/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..28ccf0e9a87112f833ab7ecaef15634f0856600a --- /dev/null +++ b/checkpoint-30/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a2477af9f9146885ae739aae7fce8caed34fdcc7b6ba670110eaf5af9bbd4f +size 16468 diff --git a/checkpoint-30/rng_state_12.pth b/checkpoint-30/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..d0344f119fb73ae6afbf725c4c8f9a74ca20d114 --- /dev/null +++ b/checkpoint-30/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6e02e4e933e0565bcac02de812b35f312d7d3c1fa524e7ec565c0b58168b2c5 +size 16340 diff --git a/checkpoint-30/rng_state_13.pth b/checkpoint-30/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..33f1be82c52dec4c6734ef1193c1938638744673 --- /dev/null +++ b/checkpoint-30/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f60cc7fc25a6bf581d13962550c94c3f732a7fcebe112c13d88d7be34ff3ee +size 16468 diff --git a/checkpoint-30/rng_state_14.pth b/checkpoint-30/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f48d73b76dd2719989bcd738428f9973c4c5c6e --- /dev/null +++ b/checkpoint-30/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e89c5c47e96e9a670905d6a024b717057f7e744cb15d841066c2543d38d493ca +size 16404 diff --git a/checkpoint-30/rng_state_15.pth b/checkpoint-30/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..5347fe369c76d3077f34309794035719eefab079 --- /dev/null +++ b/checkpoint-30/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a09586fa73abe0c5bd3addd127d400057b84dc07190df99c680834ca075d87 +size 16404 diff --git a/checkpoint-30/rng_state_8.pth b/checkpoint-30/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..1bab56b67ebfc85864072d85928a4a5d361714ab --- /dev/null +++ b/checkpoint-30/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:806ea740b8aa5b01e84b7e8a2de7b547cd7ef04618091a007b4e2b7e2edcc6cb +size 16389 diff --git a/checkpoint-30/rng_state_9.pth b/checkpoint-30/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..c42c26bc420b81196fcc14d309a7fa338432bb25 --- /dev/null +++ b/checkpoint-30/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c96fddfdb910bf5037b513e22fd76243525edf1f04813a9013acd71733a822f +size 16389 diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json index 535f7d018c6b69c58687ec47fad131e8d13e5836..7503eb1c9ef3a5129c7db19e8ca1496d6e560bab 100644 --- a/checkpoint-30/trainer_state.json +++ b/checkpoint-30/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 30, "best_metric": 0.05227778106927872, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-30", "epoch": 7.421052631578947, @@ -6,7 +7,7 @@ "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -142,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -158,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -170,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -184,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -199,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9041, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -212,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -228,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -240,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -256,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -268,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -282,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -297,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.231, + "eval_runtime": 1041.2321, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -310,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -326,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -338,14 +339,14 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, "rewards/CosineReward": 0.049969930201768875, "rewards/RepetitionPenalty": -1.1864573679076784e-07, "step": 21, - "train_speed(iter/s)": 0.000408 + "train_speed(iter/s)": 0.000407 }, { "clip_ratio": 5.869188044016482e-05, @@ -354,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -366,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -380,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -395,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0632, + "eval_runtime": 1045.0616, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 @@ -408,7 +409,7 @@ "kl": 0.1800537109375, "learning_rate": 7.243995901002312e-05, "loss": -0.02097315341234207, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.6171875, "reward": 0.03010205877944827, "reward_std": 0.10742511600255966, @@ -424,7 +425,7 @@ "kl": 0.18408203125, "learning_rate": 6.980398830195785e-05, "loss": -0.02103913575410843, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 26, "train_speed(iter/s)": 0.000421 }, @@ -436,7 +437,7 @@ "kl": 0.174560546875, "learning_rate": 6.710100716628344e-05, "loss": -0.03593946248292923, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.513671875, "reward": 0.04752760287374258, "reward_std": 0.14935147762298584, @@ -452,7 +453,7 @@ "kl": 0.182373046875, "learning_rate": 6.434016163555452e-05, "loss": -0.03595500811934471, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 28, "train_speed(iter/s)": 0.000422 }, @@ -464,7 +465,7 @@ "kl": 0.18701171875, "learning_rate": 6.153079353712201e-05, "loss": -0.031890563666820526, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.541015625, "reward": 0.04964290652424097, "reward_std": 0.1329497341066599, @@ -478,7 +479,7 @@ "grad_norm": 0.014435957185924053, "learning_rate": 5.868240888334653e-05, "loss": -0.032097991555929184, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 30, "train_speed(iter/s)": 0.000423 }, @@ -493,7 +494,7 @@ "eval_reward_std": 0.1401301473379135, "eval_rewards/CosineReward": 0.05227777361869812, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1085.6092, + "eval_runtime": 1085.6089, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 30 diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-30/training_args.bin +++ b/checkpoint-30/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-4/adapter_config.json b/checkpoint-4/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-4/adapter_config.json +++ b/checkpoint-4/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-4/trainer_state.json b/checkpoint-4/trainer_state.json index f595f5961ff8df694c28b2d941589615801e0294..dc9c3073d2c9e1b5260f61f624b6fe4dc378bcd3 100644 --- a/checkpoint-4/trainer_state.json +++ b/checkpoint-4/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8421052631578947, @@ -6,7 +7,7 @@ "global_step": 4, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 } diff --git a/checkpoint-4/training_args.bin b/checkpoint-4/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-4/training_args.bin +++ b/checkpoint-4/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-6/adapter_config.json b/checkpoint-6/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-6/adapter_config.json +++ b/checkpoint-6/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-6/trainer_state.json b/checkpoint-6/trainer_state.json index 3667b41328b83c70faf4e00796621b5ccfe72baf..b3b3cc55dc3b19e5b194f4b07b4f5db7d73b0de8 100644 --- a/checkpoint-6/trainer_state.json +++ b/checkpoint-6/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 6, "best_metric": 0.012996690347790718, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-6", "epoch": 1.4210526315789473, @@ -6,7 +7,7 @@ "global_step": 6, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 diff --git a/checkpoint-6/training_args.bin b/checkpoint-6/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-6/training_args.bin +++ b/checkpoint-6/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-8/adapter_config.json b/checkpoint-8/adapter_config.json index 3f8d06e83be137d4af154849ed1686625c42e280..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-8/adapter_config.json +++ b/checkpoint-8/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "v_proj", - "gate_proj", - "k_proj", + "down_proj", "o_proj", - "up_proj", "q_proj", - "down_proj" + "v_proj", + "up_proj", + "gate_proj", + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-8/trainer_state.json b/checkpoint-8/trainer_state.json index 1878466a3ed5546ada8088e46201029d0f48bef5..60713415d250188c737244a7e24c1300414b789f 100644 --- a/checkpoint-8/trainer_state.json +++ b/checkpoint-8/trainer_state.json @@ -1,4 +1,5 @@ { + "best_global_step": 6, "best_metric": 0.012996690347790718, "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-6", "epoch": 1.8421052631578947, @@ -6,7 +7,7 @@ "global_step": 8, "is_hyper_param_search": false, "is_local_process_zero": true, - "is_world_process_zero": true, + "is_world_process_zero": false, "log_history": [ { "clip_ratio": 0.0, @@ -16,14 +17,14 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, "rewards/CosineReward": -0.0026579967816360295, "rewards/RepetitionPenalty": -3.8975886695880035e-07, "step": 1, - "train_speed(iter/s)": 0.000242 + "train_speed(iter/s)": 0.000241 }, { "clip_ratio": 0.0, @@ -32,9 +33,9 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 2, - "train_speed(iter/s)": 0.000467 + "train_speed(iter/s)": 0.000466 }, { "clip_ratio": 1.3441811461234465e-05, @@ -44,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -60,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -72,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -86,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 6, - "train_speed(iter/s)": 0.000459 + "train_speed(iter/s)": 0.000458 }, { "epoch": 1.4210526315789473, @@ -101,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1127, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -114,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -130,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 182.91, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 } diff --git a/checkpoint-8/training_args.bin b/checkpoint-8/training_args.bin index a5c75ad76398d7f403ccc1a74c463d2dab6465ca..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-8/training_args.bin +++ b/checkpoint-8/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1207fcb9d91c7deb13a80104f3ca89016b4cff3ef13ebd136ee6320d5a9888bb +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809