diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c58765dd76e06a2566ed7bb016bd9cdc8d1918f5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 16, + "target_modules": [ + "down_proj", + "up_proj", + "q_proj", + "o_proj", + "gate_proj", + "k_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 32, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3b114a21d88d9296b902d65c226ac3d55e35618 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f822a3c589425866b8e43570095c11e50db20fcba48e4a5a0e7a235b7c40527 +size 119801496 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c58765dd76e06a2566ed7bb016bd9cdc8d1918f5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 16, + "target_modules": [ + "down_proj", + "up_proj", + "q_proj", + "o_proj", + "gate_proj", + "k_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 32, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..688012f3efc26c6b8a2a63c4fb037844cb1e02bc --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f552989eece9f876e5767a916e82da6f6d8037a2cfe1276ea0c5b2d59a6b4977 +size 119801496 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c58765dd76e06a2566ed7bb016bd9cdc8d1918f5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 16, + "target_modules": [ + "down_proj", + "up_proj", + "q_proj", + "o_proj", + "gate_proj", + "k_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 32, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..adcca45778e9f0904c95014289135c95dfad3896 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:209855544cc22542ddf222d1ca8f421bf107ceaca61e678191a99b65d5fff3cf +size 119801496 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c58765dd76e06a2566ed7bb016bd9cdc8d1918f5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 16, + "target_modules": [ + "down_proj", + "up_proj", + "q_proj", + "o_proj", + "gate_proj", + "k_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 32, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..500f7fcec1da5e76786d48dfa433a5f33eb3396e --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e5953d8c176e4bbf04085616f43e7ee8872553e393d7e6281779c2b7cf6fbca +size 119801496 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c58765dd76e06a2566ed7bb016bd9cdc8d1918f5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 16, + "target_modules": [ + "down_proj", + "up_proj", + "q_proj", + "o_proj", + "gate_proj", + "k_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 32, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d06891f4493676f362138104d8e37be9daee4a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb67622d2db0eda89f89c15d5107d6f79b83697c5714e2866ca481986e029ecf +size 119801496 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c58765dd76e06a2566ed7bb016bd9cdc8d1918f5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 16, + "target_modules": [ + "down_proj", + "up_proj", + "q_proj", + "o_proj", + "gate_proj", + "k_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 32, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e525bd474c3a8fbf6bd7d545ac6f6acb1458e562 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b58e383eabfdf47630c4e95c903b9929efe156acebc80b668b5bf296040cad9 +size 119801496 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c58765dd76e06a2566ed7bb016bd9cdc8d1918f5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 16, + "target_modules": [ + "down_proj", + "up_proj", + "q_proj", + "o_proj", + "gate_proj", + "k_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 32, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09af40342a6190f6d6f847d40a42cd7e6275bffb --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00bec09157f2ee7dbce109867a22419be66fd7a570c9ae7df6349338632173e7 +size 119801496 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c58765dd76e06a2566ed7bb016bd9cdc8d1918f5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 16, + "target_modules": [ + "down_proj", + "up_proj", + "q_proj", + "o_proj", + "gate_proj", + "k_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 32, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5336692be479ea4cb8f6174567c9c3269e3e816 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecbe1f57a24c8880112a4f30c52682930041296406739a8ef3875b673dce9320 +size 119801496 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..047a2ac0b98c8d68910482d47e6a9c1f0704edea --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 32, + "target_modules": [ + "gate_proj", + "down_proj", + "q_proj", + "k_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 64, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afc5242f49615c0a7fc329bab6afc30ec05d8b7f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8679bb391f69a1a137426889ecd2a592d673ce4461fab94ca19af74e9bf64c39 +size 239536248 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..047a2ac0b98c8d68910482d47e6a9c1f0704edea --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 32, + "target_modules": [ + "gate_proj", + "down_proj", + "q_proj", + "k_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 64, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23b608893eb8423360aa7733c6ce90657e9730be --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:665c58e6c5367d50ceef9e419ece12469ead9c6b5944d84195ee11d5e7ee99cc +size 239536248 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..047a2ac0b98c8d68910482d47e6a9c1f0704edea --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 32, + "target_modules": [ + "gate_proj", + "down_proj", + "q_proj", + "k_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 64, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..edb7e3e56bed36516e3fd33c74b4eccc1ea8e583 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b567bbd1225cee5dcfbc1e8de6f541011d67f6d3f729d2dbd98b5661c5f08da6 +size 239536248 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..047a2ac0b98c8d68910482d47e6a9c1f0704edea --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 32, + "target_modules": [ + "gate_proj", + "down_proj", + "q_proj", + "k_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 64, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ce6eebdc1d14afbbf1cf37e8bfa64a812c44b80 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e941eec492fefda234a9da89b908c7fda18ba9ae3b5108e29cb723abf7e191a +size 239536248 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..047a2ac0b98c8d68910482d47e6a9c1f0704edea --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 32, + "target_modules": [ + "gate_proj", + "down_proj", + "q_proj", + "k_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 64, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe77d73a69b33e49598469bc1e722583a075237a --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7118e7a2f7010c18b62ce0f6bd8ed7e9657373e5a3728cb825feedd79e1b4b2 +size 239536248 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..047a2ac0b98c8d68910482d47e6a9c1f0704edea --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 32, + "target_modules": [ + "gate_proj", + "down_proj", + "q_proj", + "k_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 64, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4c4cc6f8b3ec7b0583ee14a959ebc8c8bc3518a --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:620bd7650389babaf6e5076e1c2675f6c95d9138d9c317a2b91215bc978ea65b +size 239536248 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..047a2ac0b98c8d68910482d47e6a9c1f0704edea --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 32, + "target_modules": [ + "gate_proj", + "down_proj", + "q_proj", + "k_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 64, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff1ddc61f18b4d95f41e4e14e3328b325980b52e --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d1378877f1380822865cd69e3aa520b4653e57f2744e60a57be5ae86a627a6c +size 239536248 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..047a2ac0b98c8d68910482d47e6a9c1f0704edea --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 32, + "target_modules": [ + "gate_proj", + "down_proj", + "q_proj", + "k_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 64, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76ac3336e5bf0a46e39da13f9c54aff71643a095 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9507ee9b3ca871711aea06b38f44e40fbac7392133a6a2222ede283f4c1376f0 +size 239536248 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d373608a5cc2f8e99bfa095f98ced4027cc96858 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 64, + "target_modules": [ + "gate_proj", + "k_proj", + "v_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "exclude_modules": null, + "lora_alpha": 128, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..586ddfa7070e92202b6ee82f4016548b4240b7ee --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d560e44898365c51a026af8774bed301c94201088a500971f98b8c9701e91bd +size 479005032 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d373608a5cc2f8e99bfa095f98ced4027cc96858 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 64, + "target_modules": [ + "gate_proj", + "k_proj", + "v_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "exclude_modules": null, + "lora_alpha": 128, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8dcd4878a1baccc8b11f9544726d3b097e97ad2 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5950026a5e2bdcbd20f764182ddf53aafbb9a9a38d50daf6b92fe5f41210828f +size 479005032 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d373608a5cc2f8e99bfa095f98ced4027cc96858 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 64, + "target_modules": [ + "gate_proj", + "k_proj", + "v_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "exclude_modules": null, + "lora_alpha": 128, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9900d9eb74a22e9a7bbf77c9ea946137e30b164 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29d3dd11e2f379d49325d4cfb88c9ce50c42ee3c2857c070e7d1b363fd59666d +size 479005032 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d373608a5cc2f8e99bfa095f98ced4027cc96858 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 64, + "target_modules": [ + "gate_proj", + "k_proj", + "v_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "exclude_modules": null, + "lora_alpha": 128, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac81cb7e675de1fddbd3bbfcaf0acfe1d5b31e04 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c77eeb1471d65ac214f4ec52ed3343ca093145712ffe4fbf946694dc7fac838 +size 479005032 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d373608a5cc2f8e99bfa095f98ced4027cc96858 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 64, + "target_modules": [ + "gate_proj", + "k_proj", + "v_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "exclude_modules": null, + "lora_alpha": 128, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef62afd1615a1022972b147fc922e56ff52d551e --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c7f5a8736495fca81a8121790f780718aade8683e965c1911c8dfe0a2ada6b4 +size 479005032 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d373608a5cc2f8e99bfa095f98ced4027cc96858 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 64, + "target_modules": [ + "gate_proj", + "k_proj", + "v_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "exclude_modules": null, + "lora_alpha": 128, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8858c45f6407ca133bf41a2fba2e1ff20ba70da2 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f19a4b8876b799ba6d699d3ea4ad887c57995f5c08c89590b6f966eeaae07e4 +size 479005032 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d373608a5cc2f8e99bfa095f98ced4027cc96858 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 64, + "target_modules": [ + "gate_proj", + "k_proj", + "v_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "exclude_modules": null, + "lora_alpha": 128, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b71e04eb2be028e72116194ee253176c1a524a67 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d0459898f25508e73b421d187806d3ec076d4b321d7c31e54c7074bc3d6754 +size 479005032 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d373608a5cc2f8e99bfa095f98ced4027cc96858 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 64, + "target_modules": [ + "gate_proj", + "k_proj", + "v_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "exclude_modules": null, + "lora_alpha": 128, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5969e4b5913bfcabb83addaaf82af96224cb6a6d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066386917aeb9d85b6963d30b3100a8d62dcb4b49fd9ab2d4cc3bd12ff5bb2b6 +size 479005032 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddfcb3da856b789abe20c33a6077754b51877934 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 8, + "target_modules": [ + "down_proj", + "gate_proj", + "k_proj", + "up_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 16, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c52eb435453d75c1b10978881cc4ff24692d0b81 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14e2e6ad8893cb2eea88c870d5e2c36f89a7adc6ce069cb458a155f8723cafa4 +size 59933600 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddfcb3da856b789abe20c33a6077754b51877934 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 8, + "target_modules": [ + "down_proj", + "gate_proj", + "k_proj", + "up_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 16, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..143156e9074f4fa0767e22665ab45af738e906b8 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e345a458e28d170cf0d0a0808ba1195b480e97786604083a7fee8a4565971f +size 59933600 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddfcb3da856b789abe20c33a6077754b51877934 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 8, + "target_modules": [ + "down_proj", + "gate_proj", + "k_proj", + "up_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 16, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1de56483b057c1091a86ec165c1beaa187707880 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066e01fee013fa15b55b325efe69fd4b552410a2dd7a35ef80e4b5fe129e84b6 +size 59933600 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddfcb3da856b789abe20c33a6077754b51877934 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 8, + "target_modules": [ + "down_proj", + "gate_proj", + "k_proj", + "up_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 16, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..408f1e8363d15dfeb7a28bde9f28eeba74f87460 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05d8a1f7dc8f41a55ee0cac395672dfe85a0ee334325ea3860b80ca9a958ff8b +size 59933600 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddfcb3da856b789abe20c33a6077754b51877934 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 8, + "target_modules": [ + "down_proj", + "gate_proj", + "k_proj", + "up_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 16, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58bc35801a2b094d0f355eff71c6220f6859e005 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e5321b85465a21e3af931de821afaac13f5da9fbf20225c2805ad7a6c33d67 +size 59933600 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddfcb3da856b789abe20c33a6077754b51877934 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 8, + "target_modules": [ + "down_proj", + "gate_proj", + "k_proj", + "up_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 16, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffa6400ff2ba85d4388c62f8e79bcbba7f3954b4 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cafcf727af4e7db4f59e3c53ec11324f96f8987b9bc3b6d5f9613c8456ee46d +size 59933600 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddfcb3da856b789abe20c33a6077754b51877934 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 8, + "target_modules": [ + "down_proj", + "gate_proj", + "k_proj", + "up_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 16, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4f3b94d1a26673de72a7a864adb7b17a1804e09 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c6a665812c18578d968936daa0efe9916af521c1b27e29a744704b4ae58c95a +size 59933600 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/lora_adapter/adapter_config.json b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/lora_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddfcb3da856b789abe20c33a6077754b51877934 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/lora_adapter/adapter_config.json @@ -0,0 +1,49 @@ +{ + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "peft_version": "0.18.1", + "base_model_name_or_path": "/dev/shm/verl-cache/b3c28c1b99a08b84eb28d5733b49c01c/aa8e72537993ba99e69dfaafa59ed015b17504d1", + "revision": null, + "inference_mode": false, + "r": 8, + "target_modules": [ + "down_proj", + "gate_proj", + "k_proj", + "up_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "exclude_modules": null, + "lora_alpha": 16, + "lora_dropout": 0.0, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": false, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "alora_invocation_tokens": null, + "use_qalora": false, + "qalora_group_size": 16, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false, + "target_parameters": null, + "arrow_config": null, + "ensure_weight_tying": false +} \ No newline at end of file diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/lora_adapter/adapter_model.safetensors b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/lora_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8079d53b8c94f51211312376aa9351d534b17ba --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/lora_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e97dd9e6da89c8b604d780a11915de8e583142023785da6bd5280ced22782024 +size 59933600