diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-10/adapter_config.json +++ b/checkpoint-10/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json index 0d8d96672052972cd01d7af2ed175e36d6f9d5e0..5a79924b2d245a8abca9496ead30bfe57b2b71eb 100644 --- a/checkpoint-10/trainer_state.json +++ b/checkpoint-10/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 10, "train_speed(iter/s)": 0.000436 } diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-10/training_args.bin +++ b/checkpoint-10/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-12/adapter_config.json b/checkpoint-12/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-12/adapter_config.json +++ b/checkpoint-12/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-12/trainer_state.json b/checkpoint-12/trainer_state.json index e2fc6f34722a82aeb5916c74ff2a4a44a712399b..e881a9a8919b536e9e280ccff4f8b90228fbfebb 100644 --- a/checkpoint-12/trainer_state.json +++ b/checkpoint-12/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9053, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 diff --git a/checkpoint-12/training_args.bin b/checkpoint-12/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-12/training_args.bin +++ b/checkpoint-12/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-14/adapter_config.json b/checkpoint-14/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-14/adapter_config.json +++ b/checkpoint-14/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-14/trainer_state.json b/checkpoint-14/trainer_state.json index bc72987d815f3ee4b46df1bd1a0e97cfd6670b2f..88fbf9becfb5d669da763a3d0ad753433363ad56 100644 --- a/checkpoint-14/trainer_state.json +++ b/checkpoint-14/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9053, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 14, "train_speed(iter/s)": 0.000427 } diff --git a/checkpoint-14/training_args.bin b/checkpoint-14/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-14/training_args.bin +++ b/checkpoint-14/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-16/adapter_config.json b/checkpoint-16/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-16/adapter_config.json +++ b/checkpoint-16/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-16/trainer_state.json b/checkpoint-16/trainer_state.json index 936f05557643f63325a6c93d12f959ac26c3cd7f..05d1f498967f7d2261705218aabc15ae6b24fdbb 100644 --- a/checkpoint-16/trainer_state.json +++ b/checkpoint-16/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9053, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 16, "train_speed(iter/s)": 0.000431 } diff --git a/checkpoint-16/training_args.bin b/checkpoint-16/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-16/training_args.bin +++ b/checkpoint-16/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-18/adapter_config.json b/checkpoint-18/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-18/adapter_config.json +++ b/checkpoint-18/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-18/trainer_state.json b/checkpoint-18/trainer_state.json index 4581e6296e397edb0e50965c1236e116214abdf4..68a886310dcb3a5fdd37c6f6e0a752b3deac1847 100644 --- a/checkpoint-18/trainer_state.json +++ b/checkpoint-18/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9053, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -298,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.2321, + "eval_runtime": 1041.2277, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 diff --git a/checkpoint-18/training_args.bin b/checkpoint-18/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-18/training_args.bin +++ b/checkpoint-18/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-2/adapter_config.json b/checkpoint-2/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-2/adapter_config.json +++ b/checkpoint-2/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-2/trainer_state.json b/checkpoint-2/trainer_state.json index cc36da2c33e4a28606796c2458a219824cd1bd43..bb52596552917fa1c4d3ad7571620d71c01afb13 100644 --- a/checkpoint-2/trainer_state.json +++ b/checkpoint-2/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 } diff --git a/checkpoint-2/training_args.bin b/checkpoint-2/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-2/training_args.bin +++ b/checkpoint-2/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-20/adapter_config.json +++ b/checkpoint-20/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json index a6123962584e4b002aab697407126e40d2fc1302..917db439a8e53b0f8f659b78dc218b1c69f5106b 100644 --- a/checkpoint-20/trainer_state.json +++ b/checkpoint-20/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9053, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -298,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.2321, + "eval_runtime": 1041.2277, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 20, "train_speed(iter/s)": 0.000424 } diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-20/training_args.bin +++ b/checkpoint-20/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-22/adapter_config.json b/checkpoint-22/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-22/adapter_config.json +++ b/checkpoint-22/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-22/trainer_state.json b/checkpoint-22/trainer_state.json index 00588ae573da776f25af27a488a14e7cc18fa07b..456d04aa7966a1cd12ff9f6c0086d744a460e3c7 100644 --- a/checkpoint-22/trainer_state.json +++ b/checkpoint-22/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9053, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -298,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.2321, + "eval_runtime": 1041.2277, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 22, "train_speed(iter/s)": 0.000426 } diff --git a/checkpoint-22/training_args.bin b/checkpoint-22/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-22/training_args.bin +++ b/checkpoint-22/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-24/adapter_config.json b/checkpoint-24/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-24/adapter_config.json +++ b/checkpoint-24/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-24/trainer_state.json b/checkpoint-24/trainer_state.json index 71fe7f2e41930e700b7ad6623db68a42930e3853..bb1be19c6a2805c0e9655eb7f7fec3076a704e14 100644 --- a/checkpoint-24/trainer_state.json +++ b/checkpoint-24/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9053, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -298,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.2321, + "eval_runtime": 1041.2277, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -367,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -381,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -396,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0616, + "eval_runtime": 1045.0653, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 diff --git a/checkpoint-24/training_args.bin b/checkpoint-24/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-24/training_args.bin +++ b/checkpoint-24/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-26/adapter_config.json b/checkpoint-26/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-26/adapter_config.json +++ b/checkpoint-26/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-26/trainer_state.json b/checkpoint-26/trainer_state.json index 001905ec4de0241ac02cb975479cb8b9bb85f854..602a6c1309f1ee6b6bfc89540ccc6ada6b1d2e89 100644 --- a/checkpoint-26/trainer_state.json +++ b/checkpoint-26/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9053, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -298,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.2321, + "eval_runtime": 1041.2277, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -367,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -381,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -396,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0616, + "eval_runtime": 1045.0653, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 @@ -409,7 +409,7 @@ "kl": 0.1800537109375, "learning_rate": 7.243995901002312e-05, "loss": -0.02097315341234207, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.6171875, "reward": 0.03010205877944827, "reward_std": 0.10742511600255966, @@ -425,7 +425,7 @@ "kl": 0.18408203125, "learning_rate": 6.980398830195785e-05, "loss": -0.02103913575410843, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 26, "train_speed(iter/s)": 0.000421 } diff --git a/checkpoint-26/training_args.bin b/checkpoint-26/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-26/training_args.bin +++ b/checkpoint-26/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-28/adapter_config.json b/checkpoint-28/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-28/adapter_config.json +++ b/checkpoint-28/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-28/trainer_state.json b/checkpoint-28/trainer_state.json index 5ffa656555aedc46214a3328f7b650204c2802b0..1ecbac39b96cd3bf5b99ae1c405479b2290145ec 100644 --- a/checkpoint-28/trainer_state.json +++ b/checkpoint-28/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9053, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -298,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.2321, + "eval_runtime": 1041.2277, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -367,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -381,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -396,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0616, + "eval_runtime": 1045.0653, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 @@ -409,7 +409,7 @@ "kl": 0.1800537109375, "learning_rate": 7.243995901002312e-05, "loss": -0.02097315341234207, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.6171875, "reward": 0.03010205877944827, "reward_std": 0.10742511600255966, @@ -425,7 +425,7 @@ "kl": 0.18408203125, "learning_rate": 6.980398830195785e-05, "loss": -0.02103913575410843, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 26, "train_speed(iter/s)": 0.000421 }, @@ -437,7 +437,7 @@ "kl": 0.174560546875, "learning_rate": 6.710100716628344e-05, "loss": -0.03593946248292923, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.513671875, "reward": 0.04752760287374258, "reward_std": 0.14935147762298584, @@ -453,7 +453,7 @@ "kl": 0.182373046875, "learning_rate": 6.434016163555452e-05, "loss": -0.03595500811934471, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 28, "train_speed(iter/s)": 0.000422 } diff --git a/checkpoint-28/training_args.bin b/checkpoint-28/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-28/training_args.bin +++ b/checkpoint-28/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-30/adapter_config.json +++ b/checkpoint-30/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f2efb5944f9eac77f28284cd09ae2fdde0febb4 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b1a3cd6933506800a430038393b6941bd000301ef9f2dc6faeb6e1459140ba3 +size 51616527 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e96a099533ea12d6d36b428644e49290e1df725a --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d490b31e4ae11c61f13bceb96677e15b12a5316213c042d2a6ac1c5568b18e27 +size 51616015 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c04dbbb45f55453bc91016209465114bedeb7421 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f96828d4477387a7cfbbff0217d2e06634144ead2d4531d5d012f92cbd3915ca +size 51616527 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a9bdae99397f0f9b00c497119523c40be48480c --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:703767cfa56852356c051e72a843812484422326afdc2226c9cb7315ddac9a0f +size 51616015 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..739ea4eccb2005f8bcf086843d690a84004f7a9e --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30b72531a3686bb479a405afecb9a5e36ded3a1f0e77715b982d34b35bf4ea5 +size 51616527 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a4e0e1b2388e2d9765e1538290608bc8a76d16e --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad1f53600a2e0547e940bc9d6722d6db5881c6b0b82db04e4633a3a671d790e5 +size 51616015 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..131a2b33c2bbdbf69bbef96b5e413aa78643686e --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62085edea348819aeb01edf89abb88dbc38895b13a999c46c497f8876545ab5d +size 51616527 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..188cdb05e2aeef8496db08b5edc6c02dddc437da --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049092cac5d4dbecc59edf2e1beae0aa6ecb9be85a1769e39277b8132173827c +size 51616015 diff --git a/checkpoint-30/rng_state_24.pth b/checkpoint-30/rng_state_24.pth new file mode 100644 index 0000000000000000000000000000000000000000..18753f0d8bfa090cb88d8a566afec268e8affa6d --- /dev/null +++ b/checkpoint-30/rng_state_24.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:550197960179c76e27d753c57b888a6aceb5f7914c64eb2030d99039f4c10ee3 +size 16340 diff --git a/checkpoint-30/rng_state_25.pth b/checkpoint-30/rng_state_25.pth new file mode 100644 index 0000000000000000000000000000000000000000..28b0c5aac07166f1b0dc2dfbeba5025f4af1372e --- /dev/null +++ b/checkpoint-30/rng_state_25.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86494bd509e73b62b00dd992dd55557dc6918b884ceabf488a48de384d5a3124 +size 16340 diff --git a/checkpoint-30/rng_state_26.pth b/checkpoint-30/rng_state_26.pth new file mode 100644 index 0000000000000000000000000000000000000000..ab72b5285b7960e0f40d23d1893424610eb067ca --- /dev/null +++ b/checkpoint-30/rng_state_26.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e2ab1725dde0d17f163d31707c459492db313385e56d7d908960b9da1e3104 +size 16404 diff --git a/checkpoint-30/rng_state_27.pth b/checkpoint-30/rng_state_27.pth new file mode 100644 index 0000000000000000000000000000000000000000..32dd9c751437d0abd26bdf1c73517655414af95c --- /dev/null +++ b/checkpoint-30/rng_state_27.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628965fd355f90ce9405c61784b5296f30f4d56f2d8079593166eb50fef5820c +size 16404 diff --git a/checkpoint-30/rng_state_28.pth b/checkpoint-30/rng_state_28.pth new file mode 100644 index 0000000000000000000000000000000000000000..7103b1884f11545483c9668efc16f7f3ff8e86ff --- /dev/null +++ b/checkpoint-30/rng_state_28.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653a95667e1e4ab42ade2b12f244e882d81edce91454b60df85abd86914e1c3a +size 16404 diff --git a/checkpoint-30/rng_state_29.pth b/checkpoint-30/rng_state_29.pth new file mode 100644 index 0000000000000000000000000000000000000000..f53af13b574ae5276be2cbfb9e95e8a0cbb0b8f3 --- /dev/null +++ b/checkpoint-30/rng_state_29.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:148fc9650216d30f813309a1ff1a4839c72be88ef17b2125497372c57fe47150 +size 16340 diff --git a/checkpoint-30/rng_state_30.pth b/checkpoint-30/rng_state_30.pth new file mode 100644 index 0000000000000000000000000000000000000000..b2a6926c034507f7ad97b96d43527e4ca9d98ac2 --- /dev/null +++ b/checkpoint-30/rng_state_30.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a03b2904d2254d43024a5c2e00b159e258a66d6078b0523311e69df02402e8 +size 16468 diff --git a/checkpoint-30/rng_state_31.pth b/checkpoint-30/rng_state_31.pth new file mode 100644 index 0000000000000000000000000000000000000000..20271609fb6510a7d8c80fbbbcbedc106f751f76 --- /dev/null +++ b/checkpoint-30/rng_state_31.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ada7dafd49fa06d30df1afdde7f7b919cb8d007421630f594e9f61fffd246751 +size 16340 diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json index 7503eb1c9ef3a5129c7db19e8ca1496d6e560bab..aef4983299ab3c5b638f80fa1f75fec5af356381 100644 --- a/checkpoint-30/trainer_state.json +++ b/checkpoint-30/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9053, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -298,7 +298,7 @@ "eval_reward_std": 0.10691346973180771, "eval_rewards/CosineReward": 0.03729327768087387, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1041.2321, + "eval_runtime": 1041.2277, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 18 @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -367,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -381,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -396,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0616, + "eval_runtime": 1045.0653, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 @@ -409,7 +409,7 @@ "kl": 0.1800537109375, "learning_rate": 7.243995901002312e-05, "loss": -0.02097315341234207, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.6171875, "reward": 0.03010205877944827, "reward_std": 0.10742511600255966, @@ -425,7 +425,7 @@ "kl": 0.18408203125, "learning_rate": 6.980398830195785e-05, "loss": -0.02103913575410843, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 26, "train_speed(iter/s)": 0.000421 }, @@ -437,7 +437,7 @@ "kl": 0.174560546875, "learning_rate": 6.710100716628344e-05, "loss": -0.03593946248292923, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.513671875, "reward": 0.04752760287374258, "reward_std": 0.14935147762298584, @@ -453,7 +453,7 @@ "kl": 0.182373046875, "learning_rate": 6.434016163555452e-05, "loss": -0.03595500811934471, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 28, "train_speed(iter/s)": 0.000422 }, @@ -465,7 +465,7 @@ "kl": 0.18701171875, "learning_rate": 6.153079353712201e-05, "loss": -0.031890563666820526, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.541015625, "reward": 0.04964290652424097, "reward_std": 0.1329497341066599, @@ -479,7 +479,7 @@ "grad_norm": 0.014435957185924053, "learning_rate": 5.868240888334653e-05, "loss": -0.032097991555929184, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 30, "train_speed(iter/s)": 0.000423 }, @@ -494,7 +494,7 @@ "eval_reward_std": 0.1401301473379135, "eval_rewards/CosineReward": 0.05227777361869812, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1085.6089, + "eval_runtime": 1085.6065, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 30 diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-30/training_args.bin +++ b/checkpoint-30/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-4/adapter_config.json b/checkpoint-4/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-4/adapter_config.json +++ b/checkpoint-4/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-4/trainer_state.json b/checkpoint-4/trainer_state.json index dc9c3073d2c9e1b5260f61f624b6fe4dc378bcd3..b9e6e0a1939e20ff64b2c526958f9071d2092ce6 100644 --- a/checkpoint-4/trainer_state.json +++ b/checkpoint-4/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 } diff --git a/checkpoint-4/training_args.bin b/checkpoint-4/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-4/training_args.bin +++ b/checkpoint-4/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-6/adapter_config.json b/checkpoint-6/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-6/adapter_config.json +++ b/checkpoint-6/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-6/trainer_state.json b/checkpoint-6/trainer_state.json index b3b3cc55dc3b19e5b194f4b07b4f5db7d73b0de8..788be4857b9899abbfb479f3a84793787ed74e90 100644 --- a/checkpoint-6/trainer_state.json +++ b/checkpoint-6/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 diff --git a/checkpoint-6/training_args.bin b/checkpoint-6/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-6/training_args.bin +++ b/checkpoint-6/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809 diff --git a/checkpoint-8/adapter_config.json b/checkpoint-8/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..ff0f4cc95c39fe3def3378592de89aa4d14e7c68 100644 --- a/checkpoint-8/adapter_config.json +++ b/checkpoint-8/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", "o_proj", - "q_proj", + "gate_proj", "v_proj", + "k_proj", "up_proj", - "gate_proj", - "k_proj" + "down_proj", + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-8/trainer_state.json b/checkpoint-8/trainer_state.json index 60713415d250188c737244a7e24c1300414b789f..ff8b0e6b148baae81697b538a5071347d6601c0c 100644 --- a/checkpoint-8/trainer_state.json +++ b/checkpoint-8/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,9 +87,9 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 6, - "train_speed(iter/s)": 0.000458 + "train_speed(iter/s)": 0.000459 }, { "epoch": 1.4210526315789473, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1122, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 186.69, "step": 8, "train_speed(iter/s)": 0.000433 } diff --git a/checkpoint-8/training_args.bin b/checkpoint-8/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..f36a7dd9bdc6cb2a2d931801db92f6b3a1142a0b 100644 --- a/checkpoint-8/training_args.bin +++ b/checkpoint-8/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257 size 9809