diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-10/adapter_config.json +++ b/checkpoint-10/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json index 0d8d96672052972cd01d7af2ed175e36d6f9d5e0..f3bc7a6c7a8e7a542613193645c83c0135cfaace 100644 --- a/checkpoint-10/trainer_state.json +++ b/checkpoint-10/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 10, "train_speed(iter/s)": 0.000436 } diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-10/training_args.bin +++ b/checkpoint-10/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-12/adapter_config.json b/checkpoint-12/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-12/adapter_config.json +++ b/checkpoint-12/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-12/trainer_state.json b/checkpoint-12/trainer_state.json index e2fc6f34722a82aeb5916c74ff2a4a44a712399b..8d028d3771c94b5ea13dd49451668c8949834618 100644 --- a/checkpoint-12/trainer_state.json +++ b/checkpoint-12/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9048, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 diff --git a/checkpoint-12/training_args.bin b/checkpoint-12/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-12/training_args.bin +++ b/checkpoint-12/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-14/adapter_config.json b/checkpoint-14/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-14/adapter_config.json +++ b/checkpoint-14/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-14/trainer_state.json b/checkpoint-14/trainer_state.json index bc72987d815f3ee4b46df1bd1a0e97cfd6670b2f..4f77c8cd47b58975e4fc2300556d90007758118f 100644 --- a/checkpoint-14/trainer_state.json +++ b/checkpoint-14/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9048, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 14, "train_speed(iter/s)": 0.000427 } diff --git a/checkpoint-14/training_args.bin b/checkpoint-14/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-14/training_args.bin +++ b/checkpoint-14/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-16/adapter_config.json b/checkpoint-16/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-16/adapter_config.json +++ b/checkpoint-16/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-16/trainer_state.json b/checkpoint-16/trainer_state.json index 936f05557643f63325a6c93d12f959ac26c3cd7f..5e83c92bde4761577fc169c57fbd06c509552cad 100644 --- a/checkpoint-16/trainer_state.json +++ b/checkpoint-16/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9048, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 16, "train_speed(iter/s)": 0.000431 } diff --git a/checkpoint-16/training_args.bin b/checkpoint-16/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-16/training_args.bin +++ b/checkpoint-16/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-18/adapter_config.json b/checkpoint-18/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-18/adapter_config.json +++ b/checkpoint-18/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-18/trainer_state.json b/checkpoint-18/trainer_state.json index 4581e6296e397edb0e50965c1236e116214abdf4..a1c1a4655ab577f58a2feda407b25c2a37f8a847 100644 --- a/checkpoint-18/trainer_state.json +++ b/checkpoint-18/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9048, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 18, "train_speed(iter/s)": 0.000432 }, diff --git a/checkpoint-18/training_args.bin b/checkpoint-18/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-18/training_args.bin +++ b/checkpoint-18/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-2/adapter_config.json b/checkpoint-2/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-2/adapter_config.json +++ b/checkpoint-2/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-2/trainer_state.json b/checkpoint-2/trainer_state.json index cc36da2c33e4a28606796c2458a219824cd1bd43..f1eb74f328226054adf4f3fb18e6428f343d668d 100644 --- a/checkpoint-2/trainer_state.json +++ b/checkpoint-2/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 } diff --git a/checkpoint-2/training_args.bin b/checkpoint-2/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-2/training_args.bin +++ b/checkpoint-2/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-20/adapter_config.json +++ b/checkpoint-20/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json index a6123962584e4b002aab697407126e40d2fc1302..799cd565c5d6a9d014372dae066e8e90a4c1adf3 100644 --- a/checkpoint-20/trainer_state.json +++ b/checkpoint-20/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9048, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 20, "train_speed(iter/s)": 0.000424 } diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-20/training_args.bin +++ b/checkpoint-20/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-22/adapter_config.json b/checkpoint-22/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-22/adapter_config.json +++ b/checkpoint-22/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-22/trainer_state.json b/checkpoint-22/trainer_state.json index 00588ae573da776f25af27a488a14e7cc18fa07b..38dfc6c5cc5295ebde051bb509900e4bfe319f90 100644 --- a/checkpoint-22/trainer_state.json +++ b/checkpoint-22/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9048, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 22, "train_speed(iter/s)": 0.000426 } diff --git a/checkpoint-22/training_args.bin b/checkpoint-22/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-22/training_args.bin +++ b/checkpoint-22/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-24/adapter_config.json b/checkpoint-24/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-24/adapter_config.json +++ b/checkpoint-24/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-24/trainer_state.json b/checkpoint-24/trainer_state.json index 71fe7f2e41930e700b7ad6623db68a42930e3853..6882365a9f6cb165794d215b4d3e0b03b2c144aa 100644 --- a/checkpoint-24/trainer_state.json +++ b/checkpoint-24/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9048, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -367,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -381,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -396,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0616, + "eval_runtime": 1045.0642, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 diff --git a/checkpoint-24/training_args.bin b/checkpoint-24/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-24/training_args.bin +++ b/checkpoint-24/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-26/adapter_config.json b/checkpoint-26/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-26/adapter_config.json +++ b/checkpoint-26/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-26/trainer_state.json b/checkpoint-26/trainer_state.json index 001905ec4de0241ac02cb975479cb8b9bb85f854..879395d69f8fcc796e0f0b4c11b379c5c04ad9cb 100644 --- a/checkpoint-26/trainer_state.json +++ b/checkpoint-26/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9048, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -367,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -381,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -396,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0616, + "eval_runtime": 1045.0642, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 @@ -409,7 +409,7 @@ "kl": 0.1800537109375, "learning_rate": 7.243995901002312e-05, "loss": -0.02097315341234207, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.6171875, "reward": 0.03010205877944827, "reward_std": 0.10742511600255966, @@ -425,7 +425,7 @@ "kl": 0.18408203125, "learning_rate": 6.980398830195785e-05, "loss": -0.02103913575410843, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 26, "train_speed(iter/s)": 0.000421 } diff --git a/checkpoint-26/training_args.bin b/checkpoint-26/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-26/training_args.bin +++ b/checkpoint-26/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-28/adapter_config.json b/checkpoint-28/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-28/adapter_config.json +++ b/checkpoint-28/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-28/trainer_state.json b/checkpoint-28/trainer_state.json index 5ffa656555aedc46214a3328f7b650204c2802b0..54f298b29417457381943a331bc7ae1242c31d16 100644 --- a/checkpoint-28/trainer_state.json +++ b/checkpoint-28/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9045, + "eval_runtime": 1025.9048, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -367,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -381,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -396,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0616, + "eval_runtime": 1045.0642, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 @@ -409,7 +409,7 @@ "kl": 0.1800537109375, "learning_rate": 7.243995901002312e-05, "loss": -0.02097315341234207, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.6171875, "reward": 0.03010205877944827, "reward_std": 0.10742511600255966, @@ -425,7 +425,7 @@ "kl": 0.18408203125, "learning_rate": 6.980398830195785e-05, "loss": -0.02103913575410843, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 26, "train_speed(iter/s)": 0.000421 }, @@ -437,7 +437,7 @@ "kl": 0.174560546875, "learning_rate": 6.710100716628344e-05, "loss": -0.03593946248292923, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "response_clip_ratio": 0.513671875, "reward": 0.04752760287374258, "reward_std": 0.14935147762298584, @@ -453,7 +453,7 @@ "kl": 0.182373046875, "learning_rate": 6.434016163555452e-05, "loss": -0.03595500811934471, - "memory(GiB)": 180.29, + "memory(GiB)": 187.02, "step": 28, "train_speed(iter/s)": 0.000422 } diff --git a/checkpoint-28/training_args.bin b/checkpoint-28/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-28/training_args.bin +++ b/checkpoint-28/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-30/README.md b/checkpoint-30/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4dbd19e3cdbcefaa269791fa922afe075b806c8 --- /dev/null +++ b/checkpoint-30/README.md @@ -0,0 +1,202 @@ +--- +base_model: /mnt/nvme5n1p1/distill-14b-rl-70 +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9a6753254d3cb15865ffb290b9553d85b57dac9a --- /dev/null +++ b/checkpoint-30/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme5n1p1/distill-14b-rl-70", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "k_proj", + "gate_proj", + "o_proj", + "q_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-30/adapter_model.safetensors b/checkpoint-30/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..463182e300a5b38391d4a2400b62dfa5a4e917ae --- /dev/null +++ b/checkpoint-30/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:329c17739679fed02726da82bfb68abbf7e99089c2e8a5c4d4e1f53724467219 +size 275342392 diff --git a/checkpoint-30/additional_config.json b/checkpoint-30/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/checkpoint-30/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..77555895b02889cf06e335eba47a925bbf262165 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:236b7edd60ea120f67cd3fa17b6a0f26eafcb2b674178d748741a7bec780d3bb +size 51616527 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d39a65161dcebfd16d2960467606c3990920bd94 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:577881ad19e84826a790e8334307e0604cacd87ff7f67e416aab193d1d3c7681 +size 51616015 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a8a579cc944fa3cf2a14cd979feca521d21cdf0 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e00a64034712680dbe3172fed4c3f9dac79b60a2559d4bae0ba510bbe197c59 +size 51616527 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..95c1390a99e4a002812a9195a0492455e7705f80 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b79763a682ec1a5717205a4c47851db99fb6ebeed1b00addf598d403c72fb2 +size 51616015 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbe1eea6bed6dc9d2fcadb15a90d4703b3c88bad --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d637f1b940ee9ea28d165342c08527f8224e0f4d52fb8368a944a2120bd2a2d5 +size 51616527 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..33543196a8874b82a3a133012e9e4221c7c8efc7 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d253294687babb38ecf1f3b1d6efb23f50026f15b6d92e2e6d55f500bb931dad +size 51616015 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..75e05f118b0c3f1fa8daf1e8008de1c0a5047d9d --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:954697ae86ca3d7a1662c904d6d2e375b8348c71fc6fa59237b03f66fcf6f698 +size 51616527 diff --git a/checkpoint-30/global_step30/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/checkpoint-30/global_step30/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0de7cb6a7d88aaa390ef6ce09c86b75fdbb3849 --- /dev/null +++ b/checkpoint-30/global_step30/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:292c5cd764814ad0730375034dafd5b5316b951ea4b0d1a0c6d5f086e55cca1c +size 51616015 diff --git a/checkpoint-30/global_step30/mp_rank_00_model_states.pt b/checkpoint-30/global_step30/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2223f15ff81da4ce74d4df0ce35153ff0a21543 --- /dev/null +++ b/checkpoint-30/global_step30/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19a396f183721ca31d21fa84e5530ebda200179fc3f57ea0b314e8f9828345e0 +size 275768601 diff --git a/checkpoint-30/latest b/checkpoint-30/latest new file mode 100644 index 0000000000000000000000000000000000000000..f11ba0855f2215bf9c6cbacc2e9fd7b7c5f5a480 --- /dev/null +++ b/checkpoint-30/latest @@ -0,0 +1 @@ +global_step30 \ No newline at end of file diff --git a/checkpoint-30/rng_state_16.pth b/checkpoint-30/rng_state_16.pth new file mode 100644 index 0000000000000000000000000000000000000000..2bafa53ca0cb2388bf0f88229ab3e0ec762c38b0 --- /dev/null +++ b/checkpoint-30/rng_state_16.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cfe883ccee2993ebb068bb729f6106faeaa041208e83ceb6be530ff77d7075 +size 16404 diff --git a/checkpoint-30/rng_state_17.pth b/checkpoint-30/rng_state_17.pth new file mode 100644 index 0000000000000000000000000000000000000000..166ed18aa1a766510e5b36b95ddfae6e817e0e60 --- /dev/null +++ b/checkpoint-30/rng_state_17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894b0396bee0214f53a66aaa33ee5095a9ef8c191fb926b5b8575a431b670362 +size 16404 diff --git a/checkpoint-30/rng_state_18.pth b/checkpoint-30/rng_state_18.pth new file mode 100644 index 0000000000000000000000000000000000000000..2232be68f93fa43d02394284a16091a60212f8b4 --- /dev/null +++ b/checkpoint-30/rng_state_18.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3126bd9635a55e5de0856de58320a3e0aca1161489085ae74ab449b1b8dd95b +size 16404 diff --git a/checkpoint-30/rng_state_19.pth b/checkpoint-30/rng_state_19.pth new file mode 100644 index 0000000000000000000000000000000000000000..70ebd8657ebc7047b4db98871dae9f8405018678 --- /dev/null +++ b/checkpoint-30/rng_state_19.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4487d8bde4a49904b5d9c264fd067eed8a30a4eec8f4079d4eda094a8d6c1efb +size 16340 diff --git a/checkpoint-30/rng_state_20.pth b/checkpoint-30/rng_state_20.pth new file mode 100644 index 0000000000000000000000000000000000000000..b7c1903210ce0b15c967ee57c9ae3aa80882d17d --- /dev/null +++ b/checkpoint-30/rng_state_20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab226df43bbf44b457973165fc37983dac217d307c8fb8944a779155869d220e +size 16340 diff --git a/checkpoint-30/rng_state_21.pth b/checkpoint-30/rng_state_21.pth new file mode 100644 index 0000000000000000000000000000000000000000..ae5a11ee907a5d6b3acb8d4036bba5c8144eec55 --- /dev/null +++ b/checkpoint-30/rng_state_21.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80f66ee1267b9b228efe189c7ee95c1dc1eaec059b39b3fede94e72228b7816 +size 16340 diff --git a/checkpoint-30/rng_state_22.pth b/checkpoint-30/rng_state_22.pth new file mode 100644 index 0000000000000000000000000000000000000000..9cc1ad1d0f03fa84988543fe076a3d440d802932 --- /dev/null +++ b/checkpoint-30/rng_state_22.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5feba79e62d5e5bdeae8e659dece61fdbbe06798e6efd426529e08d3671eb86 +size 16468 diff --git a/checkpoint-30/rng_state_23.pth b/checkpoint-30/rng_state_23.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ecce045134c6bf3c6cedfede4220ce8d4858451 --- /dev/null +++ b/checkpoint-30/rng_state_23.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2910df48583c8f05d8048cff844faddab134065b6f227c74d406c00dbf34e1b4 +size 16468 diff --git a/checkpoint-30/scheduler.pt b/checkpoint-30/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..05f0a1f680257166d5b507724ba45575d83f6079 --- /dev/null +++ b/checkpoint-30/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2eb80be7fdf47fb2469b7f5895ba6d947c0bbb73c82b64ab5962d854bb5b396 +size 1401 diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..664690c82b357f0d4097c0a552d4c28eca3f4ebc --- /dev/null +++ b/checkpoint-30/trainer_state.json @@ -0,0 +1,524 @@ +{ + "best_global_step": 30, + "best_metric": 0.05227778106927872, + "best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-30", + "epoch": 7.421052631578947, + "eval_steps": 6, + "global_step": 30, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": false, + "log_history": [ + { + "clip_ratio": 0.0, + "completion_length": 10352.974609375, + "epoch": 0.21052631578947367, + "grad_norm": 0.13259537518024445, + "kl": 0.0, + "learning_rate": 1.6666666666666667e-05, + "loss": -0.11016345024108887, + "memory(GiB)": 176.98, + "response_clip_ratio": 0.11328125, + "reward": -0.002658387296833098, + "reward_std": 0.06134121119976044, + "rewards/CosineReward": -0.0026579967816360295, + "rewards/RepetitionPenalty": -3.8975886695880035e-07, + "step": 1, + "train_speed(iter/s)": 0.000241 + }, + { + "clip_ratio": 0.0, + "epoch": 0.42105263157894735, + "grad_norm": 0.1320001482963562, + "kl": 0.0, + "learning_rate": 3.3333333333333335e-05, + "loss": -0.11016345024108887, + "memory(GiB)": 176.98, + "step": 2, + "train_speed(iter/s)": 0.000466 + }, + { + "clip_ratio": 1.3441811461234465e-05, + "completion_length": 10439.369140625, + "epoch": 0.631578947368421, + "grad_norm": 0.08990391343832016, + "kl": 9.50181856751442e-07, + "learning_rate": 5e-05, + "loss": -0.06604708731174469, + "memory(GiB)": 176.98, + "response_clip_ratio": 0.13671875, + "reward": 0.0006296975770965219, + "reward_std": 0.07172460854053497, + "rewards/CosineReward": 0.0006298604130279273, + "rewards/RepetitionPenalty": -1.6200439745261974e-07, + "step": 3, + "train_speed(iter/s)": 0.00035 + }, + { + "clip_ratio": 1.70210253145342e-05, + "epoch": 0.8421052631578947, + "grad_norm": 0.0967094898223877, + "kl": 1.1101365089416504e-05, + "learning_rate": 6.666666666666667e-05, + "loss": -0.06727766245603561, + "memory(GiB)": 176.98, + "step": 4, + "train_speed(iter/s)": 0.000458 + }, + { + "clip_ratio": 1.675608473306056e-05, + "completion_length": 10092.408203125, + "epoch": 1.2105263157894737, + "grad_norm": 0.142837256193161, + "kl": 0.00017762184143066406, + "learning_rate": 8.333333333333334e-05, + "loss": -0.09315311908721924, + "memory(GiB)": 176.98, + "response_clip_ratio": 0.119140625, + "reward": -0.005135859013535082, + "reward_std": 0.07994875870645046, + "rewards/CosineReward": -0.005134060338605195, + "rewards/RepetitionPenalty": -1.7973881654143042e-06, + "step": 5, + "train_speed(iter/s)": 0.000387 + }, + { + "epoch": 1.4210526315789473, + "grad_norm": 0.18263348937034607, + "learning_rate": 0.0001, + "loss": -0.1041698157787323, + "memory(GiB)": 176.98, + "step": 6, + "train_speed(iter/s)": 0.000458 + }, + { + "epoch": 1.4210526315789473, + "eval_clip_ratio": 4.069424539920874e-05, + "eval_completion_length": 12289.0, + "eval_kl": 0.04833984375, + "eval_loss": -0.5377416610717773, + "eval_response_clip_ratio": 1.0, + "eval_reward": 0.012996690347790718, + "eval_reward_std": 0.08769983053207397, + "eval_rewards/CosineReward": 0.012996694073081017, + "eval_rewards/RepetitionPenalty": 0.0, + "eval_runtime": 1030.1126, + "eval_samples_per_second": 0.001, + "eval_steps_per_second": 0.001, + "step": 6 + }, + { + "clip_ratio": 0.0005237623976199757, + "completion_length": 10448.94921875, + "epoch": 1.631578947368421, + "grad_norm": 0.1291271299123764, + "kl": 0.017406463623046875, + "learning_rate": 9.991540791356342e-05, + "loss": -0.051375165581703186, + "memory(GiB)": 176.98, + "response_clip_ratio": 0.1484375, + "reward": 0.004909618757665157, + "reward_std": 0.08167182095348835, + "rewards/CosineReward": 0.004909833543933928, + "rewards/RepetitionPenalty": -2.1478646772266075e-07, + "step": 7, + "train_speed(iter/s)": 0.000382 + }, + { + "clip_ratio": 0.1706484742462635, + "epoch": 1.8421052631578947, + "grad_norm": 0.26641014218330383, + "kl": 0.089599609375, + "learning_rate": 9.966191788709716e-05, + "loss": -0.05105742812156677, + "memory(GiB)": 176.98, + "step": 8, + "train_speed(iter/s)": 0.000433 + }, + { + "clip_ratio": 9.482144946559856e-06, + "completion_length": 10432.384765625, + "epoch": 2.2105263157894735, + "grad_norm": 0.10375155508518219, + "kl": 0.0963134765625, + "learning_rate": 9.924038765061042e-05, + "loss": -0.05842069163918495, + "memory(GiB)": 176.98, + "response_clip_ratio": 0.255859375, + "reward": 0.03643610421568155, + "reward_std": 0.11898956261575222, + "rewards/CosineReward": 0.03643618477508426, + "rewards/RepetitionPenalty": -7.898860587829404e-08, + "step": 9, + "train_speed(iter/s)": 0.000396 + }, + { + "clip_ratio": 0.0036088433116674423, + "epoch": 2.4210526315789473, + "grad_norm": 0.09477333724498749, + "kl": 0.1185302734375, + "learning_rate": 9.865224352899119e-05, + "loss": -0.06491819024085999, + "memory(GiB)": 176.98, + "step": 10, + "train_speed(iter/s)": 0.000436 + }, + { + "clip_ratio": 1.2955343891007942e-05, + "completion_length": 10559.296875, + "epoch": 2.6315789473684212, + "grad_norm": 0.06739140301942825, + "kl": 0.1275634765625, + "learning_rate": 9.789947561577445e-05, + "loss": -0.04600231721997261, + "memory(GiB)": 187.02, + "response_clip_ratio": 0.361328125, + "reward": 0.023204635945148766, + "reward_std": 0.10593634657561779, + "rewards/CosineReward": 0.02320496749598533, + "rewards/RepetitionPenalty": -3.3051759373847744e-07, + "step": 11, + "train_speed(iter/s)": 0.000405 + }, + { + "epoch": 2.8421052631578947, + "grad_norm": 0.05781339108943939, + "learning_rate": 9.698463103929542e-05, + "loss": -0.05069056898355484, + "memory(GiB)": 187.02, + "step": 12, + "train_speed(iter/s)": 0.000439 + }, + { + "epoch": 2.8421052631578947, + "eval_clip_ratio": 4.392032860778272e-05, + "eval_completion_length": 12289.0, + "eval_kl": 0.2275390625, + "eval_loss": 0.17524278163909912, + "eval_response_clip_ratio": 1.0, + "eval_reward": 0.03234308212995529, + "eval_reward_std": 0.10685288906097412, + "eval_rewards/CosineReward": 0.03234308212995529, + "eval_rewards/RepetitionPenalty": 0.0, + "eval_runtime": 1025.9048, + "eval_samples_per_second": 0.001, + "eval_steps_per_second": 0.001, + "step": 12 + }, + { + "clip_ratio": 0.0007908324183745208, + "completion_length": 10652.939453125, + "epoch": 3.2105263157894735, + "grad_norm": 0.01199417095631361, + "kl": 0.151123046875, + "learning_rate": 9.591080534401371e-05, + "loss": -0.02191038429737091, + "memory(GiB)": 187.02, + "response_clip_ratio": 0.419921875, + "reward": 0.035983758978545666, + "reward_std": 0.11553369648754597, + "rewards/CosineReward": 0.03598417737521231, + "rewards/RepetitionPenalty": -4.176556771540163e-07, + "step": 13, + "train_speed(iter/s)": 0.000399 + }, + { + "clip_ratio": 0.0004821276670554653, + "epoch": 3.4210526315789473, + "grad_norm": 0.01075426209717989, + "kl": 0.169189453125, + "learning_rate": 9.468163201617062e-05, + "loss": -0.022672578692436218, + "memory(GiB)": 187.02, + "step": 14, + "train_speed(iter/s)": 0.000427 + }, + { + "clip_ratio": 1.9617403040683712e-05, + "completion_length": 10482.146484375, + "epoch": 3.6315789473684212, + "grad_norm": 0.01779361069202423, + "kl": 0.166748046875, + "learning_rate": 9.330127018922194e-05, + "loss": -0.059799157083034515, + "memory(GiB)": 187.02, + "response_clip_ratio": 0.4765625, + "reward": 0.03584331553429365, + "reward_std": 0.11829411797225475, + "rewards/CosineReward": 0.03584346390562132, + "rewards/RepetitionPenalty": -1.4977952389472193e-07, + "step": 15, + "train_speed(iter/s)": 0.000406 + }, + { + "clip_ratio": 0.00011349086707923561, + "epoch": 3.8421052631578947, + "grad_norm": 0.013216385617852211, + "kl": 0.16748046875, + "learning_rate": 9.177439057064683e-05, + "loss": -0.06071458384394646, + "memory(GiB)": 187.02, + "step": 16, + "train_speed(iter/s)": 0.000431 + }, + { + "clip_ratio": 2.4864069928298704e-05, + "completion_length": 10822.3515625, + "epoch": 4.2105263157894735, + "grad_norm": 0.008352754637598991, + "kl": 0.1787109375, + "learning_rate": 9.01061596377522e-05, + "loss": -0.04504441097378731, + "memory(GiB)": 187.02, + "response_clip_ratio": 0.5625, + "reward": 0.027318883687257767, + "reward_std": 0.10441224090754986, + "rewards/CosineReward": 0.027319116634316742, + "rewards/RepetitionPenalty": -2.338138500590503e-07, + "step": 17, + "train_speed(iter/s)": 0.00041 + }, + { + "epoch": 4.421052631578947, + "grad_norm": 0.005998397711664438, + "learning_rate": 8.83022221559489e-05, + "loss": -0.045487549155950546, + "memory(GiB)": 187.02, + "step": 18, + "train_speed(iter/s)": 0.000432 + }, + { + "epoch": 4.421052631578947, + "eval_clip_ratio": 2.286707422172185e-05, + "eval_completion_length": 12289.0, + "eval_kl": 0.18359375, + "eval_loss": -0.38219889998435974, + "eval_response_clip_ratio": 1.0, + "eval_reward": 0.03729328140616417, + "eval_reward_std": 0.10691346973180771, + "eval_rewards/CosineReward": 0.03729327768087387, + "eval_rewards/RepetitionPenalty": 0.0, + "eval_runtime": 1041.2321, + "eval_samples_per_second": 0.001, + "eval_steps_per_second": 0.001, + "step": 18 + }, + { + "clip_ratio": 6.176384295031312e-05, + "completion_length": 10454.50390625, + "epoch": 4.631578947368421, + "grad_norm": 0.007075619418174028, + "kl": 0.1820068359375, + "learning_rate": 8.636868207865244e-05, + "loss": -0.03466903418302536, + "memory(GiB)": 187.02, + "response_clip_ratio": 0.466796875, + "reward": 0.04069916973821819, + "reward_std": 0.11991005763411522, + "rewards/CosineReward": 0.04070046404376626, + "rewards/RepetitionPenalty": -1.294118249006715e-06, + "step": 19, + "train_speed(iter/s)": 0.000404 + }, + { + "clip_ratio": 6.06911453360226e-05, + "epoch": 4.842105263157895, + "grad_norm": 0.005896567367017269, + "kl": 0.19287109375, + "learning_rate": 8.43120818934367e-05, + "loss": -0.03502114117145538, + "memory(GiB)": 187.02, + "step": 20, + "train_speed(iter/s)": 0.000424 + }, + { + "clip_ratio": 3.8725801914551994e-05, + "completion_length": 10645.056640625, + "epoch": 5.2105263157894735, + "grad_norm": 0.004154536407440901, + "kl": 0.17626953125, + "learning_rate": 8.213938048432697e-05, + "loss": -0.008662773296236992, + "memory(GiB)": 187.02, + "response_clip_ratio": 0.5625, + "reward": 0.04996980866417289, + "reward_std": 0.13849420100450516, + "rewards/CosineReward": 0.049969930201768875, + "rewards/RepetitionPenalty": -1.1864573679076784e-07, + "step": 21, + "train_speed(iter/s)": 0.000407 + }, + { + "clip_ratio": 5.869188044016482e-05, + "epoch": 5.421052631578947, + "grad_norm": 0.004300669766962528, + "kl": 0.178955078125, + "learning_rate": 7.985792958513931e-05, + "loss": -0.008743642829358578, + "memory(GiB)": 187.02, + "step": 22, + "train_speed(iter/s)": 0.000426 + }, + { + "clip_ratio": 4.6346245653694496e-05, + "completion_length": 10538.072265625, + "epoch": 5.631578947368421, + "grad_norm": 0.01327697653323412, + "kl": 0.1796875, + "learning_rate": 7.74754489035403e-05, + "loss": -0.03423420712351799, + "memory(GiB)": 187.02, + "response_clip_ratio": 0.583984375, + "reward": 0.034468831261619925, + "reward_std": 0.11841745302081108, + "rewards/CosineReward": 0.03447544714435935, + "rewards/RepetitionPenalty": -6.612649428916484e-06, + "step": 23, + "train_speed(iter/s)": 0.00041 + }, + { + "epoch": 5.842105263157895, + "grad_norm": 0.014131724834442139, + "learning_rate": 7.500000000000001e-05, + "loss": -0.03426633030176163, + "memory(GiB)": 187.02, + "step": 24, + "train_speed(iter/s)": 0.000427 + }, + { + "epoch": 5.842105263157895, + "eval_clip_ratio": 4.0687620639801025e-05, + "eval_completion_length": 12289.0, + "eval_kl": 0.1982421875, + "eval_loss": 0.3612469434738159, + "eval_response_clip_ratio": 1.0, + "eval_reward": 0.04339282959699631, + "eval_reward_std": 0.10456253588199615, + "eval_rewards/CosineReward": 0.04339282959699631, + "eval_rewards/RepetitionPenalty": 0.0, + "eval_runtime": 1045.0642, + "eval_samples_per_second": 0.001, + "eval_steps_per_second": 0.001, + "step": 24 + }, + { + "clip_ratio": 5.05705434079573e-05, + "completion_length": 10789.259765625, + "epoch": 6.2105263157894735, + "grad_norm": 0.0099335303530097, + "kl": 0.1800537109375, + "learning_rate": 7.243995901002312e-05, + "loss": -0.02097315341234207, + "memory(GiB)": 187.02, + "response_clip_ratio": 0.6171875, + "reward": 0.03010205877944827, + "reward_std": 0.10742511600255966, + "rewards/CosineReward": 0.030102317687124014, + "rewards/RepetitionPenalty": -2.580197531187878e-07, + "step": 25, + "train_speed(iter/s)": 0.000406 + }, + { + "clip_ratio": 4.821802576771006e-05, + "epoch": 6.421052631578947, + "grad_norm": 0.00989576056599617, + "kl": 0.18408203125, + "learning_rate": 6.980398830195785e-05, + "loss": -0.02103913575410843, + "memory(GiB)": 187.02, + "step": 26, + "train_speed(iter/s)": 0.000421 + }, + { + "clip_ratio": 5.442534347821493e-05, + "completion_length": 10197.099609375, + "epoch": 6.631578947368421, + "grad_norm": 0.00436774967238307, + "kl": 0.174560546875, + "learning_rate": 6.710100716628344e-05, + "loss": -0.03593946248292923, + "memory(GiB)": 187.02, + "response_clip_ratio": 0.513671875, + "reward": 0.04752760287374258, + "reward_std": 0.14935147762298584, + "rewards/CosineReward": 0.04752839542925358, + "rewards/RepetitionPenalty": -7.915698745364352e-07, + "step": 27, + "train_speed(iter/s)": 0.000408 + }, + { + "clip_ratio": 5.9543880524870474e-05, + "epoch": 6.842105263157895, + "grad_norm": 0.005277659278362989, + "kl": 0.182373046875, + "learning_rate": 6.434016163555452e-05, + "loss": -0.03595500811934471, + "memory(GiB)": 187.02, + "step": 28, + "train_speed(iter/s)": 0.000422 + }, + { + "clip_ratio": 5.280190998746548e-05, + "completion_length": 10427.296875, + "epoch": 7.2105263157894735, + "grad_norm": 0.016844402998685837, + "kl": 0.18701171875, + "learning_rate": 6.153079353712201e-05, + "loss": -0.031890563666820526, + "memory(GiB)": 187.02, + "response_clip_ratio": 0.541015625, + "reward": 0.04964290652424097, + "reward_std": 0.1329497341066599, + "rewards/CosineReward": 0.04964365530759096, + "rewards/RepetitionPenalty": -7.497067713302386e-07, + "step": 29, + "train_speed(iter/s)": 0.00041 + }, + { + "epoch": 7.421052631578947, + "grad_norm": 0.014435957185924053, + "learning_rate": 5.868240888334653e-05, + "loss": -0.032097991555929184, + "memory(GiB)": 187.02, + "step": 30, + "train_speed(iter/s)": 0.000423 + }, + { + "epoch": 7.421052631578947, + "eval_clip_ratio": 4.5418415538733825e-05, + "eval_completion_length": 12289.0, + "eval_kl": 0.197265625, + "eval_loss": -0.09817080944776535, + "eval_response_clip_ratio": 1.0, + "eval_reward": 0.05227778106927872, + "eval_reward_std": 0.1401301473379135, + "eval_rewards/CosineReward": 0.05227777361869812, + "eval_rewards/RepetitionPenalty": 0.0, + "eval_runtime": 1085.6089, + "eval_samples_per_second": 0.001, + "eval_steps_per_second": 0.001, + "step": 30 + } + ], + "logging_steps": 1, + "max_steps": 60, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 2, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db6dc02fc187938d09e0a6626a08092ad8f42c54 --- /dev/null +++ b/checkpoint-30/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +size 9809 diff --git a/checkpoint-4/adapter_config.json b/checkpoint-4/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-4/adapter_config.json +++ b/checkpoint-4/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-4/trainer_state.json b/checkpoint-4/trainer_state.json index dc9c3073d2c9e1b5260f61f624b6fe4dc378bcd3..b95d617400d7530f7b064a18f7bc7cdc945a2aef 100644 --- a/checkpoint-4/trainer_state.json +++ b/checkpoint-4/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 } diff --git a/checkpoint-4/training_args.bin b/checkpoint-4/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-4/training_args.bin +++ b/checkpoint-4/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-6/adapter_config.json b/checkpoint-6/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-6/adapter_config.json +++ b/checkpoint-6/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-6/trainer_state.json b/checkpoint-6/trainer_state.json index b3b3cc55dc3b19e5b194f4b07b4f5db7d73b0de8..d4540222f113714a1953b0e161b891639c52298f 100644 --- a/checkpoint-6/trainer_state.json +++ b/checkpoint-6/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 diff --git a/checkpoint-6/training_args.bin b/checkpoint-6/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-6/training_args.bin +++ b/checkpoint-6/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809 diff --git a/checkpoint-8/adapter_config.json b/checkpoint-8/adapter_config.json index 5601e58d3aec7aef62bafb11ddb8a04c414c4f52..9a6753254d3cb15865ffb290b9553d85b57dac9a 100644 --- a/checkpoint-8/adapter_config.json +++ b/checkpoint-8/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "down_proj", - "o_proj", - "q_proj", "v_proj", "up_proj", + "k_proj", "gate_proj", - "k_proj" + "o_proj", + "q_proj", + "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-8/trainer_state.json b/checkpoint-8/trainer_state.json index 60713415d250188c737244a7e24c1300414b789f..3a6b398642c37165b24d317105212cefd3a27a22 100644 --- a/checkpoint-8/trainer_state.json +++ b/checkpoint-8/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1223, + "eval_runtime": 1030.1126, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 180.29, + "memory(GiB)": 176.98, "step": 8, "train_speed(iter/s)": 0.000433 } diff --git a/checkpoint-8/training_args.bin b/checkpoint-8/training_args.bin index 642b9465d1983574a63f0a9f23bf7996bf8a7eda..db6dc02fc187938d09e0a6626a08092ad8f42c54 100644 --- a/checkpoint-8/training_args.bin +++ b/checkpoint-8/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 +oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 size 9809