Upload checkpoint 280
Browse files- README.md +4 -4
- adapter_model.safetensors +1 -1
- loss.png +2 -2
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +152 -2
README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
base_model: InfiniAILab/OpenR1-Qwen-3B-SFT-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
-
# T2 3B Instruct (Step
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
@@ -15,11 +15,11 @@ library_name: peft
|
|
| 15 |
</head>
|
| 16 |
<body>
|
| 17 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 18 |
-
<div style="height: 30px; width: 1.
|
| 19 |
-
<!-- 1.
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
-
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress:
|
| 23 |
</body>
|
| 24 |
</html>
|
| 25 |
|
|
|
|
| 2 |
base_model: InfiniAILab/OpenR1-Qwen-3B-SFT-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
+
# T2 3B Instruct (Step 280 Checkpoint)
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
|
|
| 15 |
</head>
|
| 16 |
<body>
|
| 17 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 18 |
+
<div style="height: 30px; width: 1.09%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
|
| 19 |
+
<!-- 1.1% -->
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
+
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 280 out of 25777 steps</p>
|
| 23 |
</body>
|
| 24 |
</html>
|
| 25 |
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca5639c61434b5424c12ee16bfbe51d66f817bd4df5ff7049cf60b085dad1ecd
|
| 3 |
size 479005064
|
loss.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 245114786
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac998e1563beef6662160cf16512dfa25fbf99df69d5a028b3632edb2e9a02c6
|
| 3 |
size 245114786
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d81ef059592c6db0605efffbc0d8f87f2bd2b94e94791b4f00555c956e60895a
|
| 3 |
size 14244
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89e86502432482f478783166257eb71f4b4a191074493563cc72671ac0a92593
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4058,6 +4058,156 @@
|
|
| 4058 |
"rewards/strict_format_reward_func": 0.025000000139698386,
|
| 4059 |
"rewards/thinkcount_reward_func": 0.03750000149011612,
|
| 4060 |
"step": 270
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4061 |
}
|
| 4062 |
],
|
| 4063 |
"logging_steps": 1,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.010862186014935505,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 280,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4058 |
"rewards/strict_format_reward_func": 0.025000000139698386,
|
| 4059 |
"rewards/thinkcount_reward_func": 0.03750000149011612,
|
| 4060 |
"step": 270
|
| 4061 |
+
},
|
| 4062 |
+
{
|
| 4063 |
+
"completion_length": 8108.25,
|
| 4064 |
+
"epoch": 0.010513044321598292,
|
| 4065 |
+
"grad_norm": 5.077273845672607,
|
| 4066 |
+
"kl": 0.0350515958853066,
|
| 4067 |
+
"learning_rate": 5.236617532971297e-07,
|
| 4068 |
+
"loss": 0.0014,
|
| 4069 |
+
"reward": 0.7935546757653356,
|
| 4070 |
+
"reward_std": 0.832713857293129,
|
| 4071 |
+
"rewards/correctness_reward_func": 0.6904296875,
|
| 4072 |
+
"rewards/soft_format_reward_func": 0.03125,
|
| 4073 |
+
"rewards/strict_format_reward_func": 0.043750000186264515,
|
| 4074 |
+
"rewards/thinkcount_reward_func": 0.028125000651925802,
|
| 4075 |
+
"step": 271
|
| 4076 |
+
},
|
| 4077 |
+
{
|
| 4078 |
+
"completion_length": 8054.5,
|
| 4079 |
+
"epoch": 0.010551837843080206,
|
| 4080 |
+
"grad_norm": 3.678874969482422,
|
| 4081 |
+
"kl": 0.016557272523641586,
|
| 4082 |
+
"learning_rate": 5.256012412723042e-07,
|
| 4083 |
+
"loss": 0.0007,
|
| 4084 |
+
"reward": 0.27392577938735485,
|
| 4085 |
+
"reward_std": 0.29840535297989845,
|
| 4086 |
+
"rewards/correctness_reward_func": 0.20361328125,
|
| 4087 |
+
"rewards/soft_format_reward_func": 0.010937500046566129,
|
| 4088 |
+
"rewards/strict_format_reward_func": 0.018750000279396772,
|
| 4089 |
+
"rewards/thinkcount_reward_func": 0.04062500083819032,
|
| 4090 |
+
"step": 272
|
| 4091 |
+
},
|
| 4092 |
+
{
|
| 4093 |
+
"completion_length": 8114.75,
|
| 4094 |
+
"epoch": 0.010590631364562118,
|
| 4095 |
+
"grad_norm": 2.8407578468322754,
|
| 4096 |
+
"kl": 0.014166628941893578,
|
| 4097 |
+
"learning_rate": 5.275407292474787e-07,
|
| 4098 |
+
"loss": 0.0006,
|
| 4099 |
+
"reward": 0.7014647871255875,
|
| 4100 |
+
"reward_std": 0.9922140687704086,
|
| 4101 |
+
"rewards/correctness_reward_func": 0.61865234375,
|
| 4102 |
+
"rewards/soft_format_reward_func": 0.017187500139698386,
|
| 4103 |
+
"rewards/strict_format_reward_func": 0.03125000046566129,
|
| 4104 |
+
"rewards/thinkcount_reward_func": 0.034374999813735485,
|
| 4105 |
+
"step": 273
|
| 4106 |
+
},
|
| 4107 |
+
{
|
| 4108 |
+
"completion_length": 8095.0,
|
| 4109 |
+
"epoch": 0.01062942488604403,
|
| 4110 |
+
"grad_norm": 3.4110143184661865,
|
| 4111 |
+
"kl": 0.023500604555010796,
|
| 4112 |
+
"learning_rate": 5.294802172226533e-07,
|
| 4113 |
+
"loss": 0.0009,
|
| 4114 |
+
"reward": 0.6369484011083841,
|
| 4115 |
+
"reward_std": 0.5855994820594788,
|
| 4116 |
+
"rewards/correctness_reward_func": 0.5556640625,
|
| 4117 |
+
"rewards/soft_format_reward_func": 0.018749999813735485,
|
| 4118 |
+
"rewards/strict_format_reward_func": 0.02500000037252903,
|
| 4119 |
+
"rewards/thinkcount_reward_func": 0.0375343756750226,
|
| 4120 |
+
"step": 274
|
| 4121 |
+
},
|
| 4122 |
+
{
|
| 4123 |
+
"completion_length": 8111.75,
|
| 4124 |
+
"epoch": 0.010668218407525944,
|
| 4125 |
+
"grad_norm": 8.018135070800781,
|
| 4126 |
+
"kl": 0.028027109568938613,
|
| 4127 |
+
"learning_rate": 5.314197051978278e-07,
|
| 4128 |
+
"loss": 0.0011,
|
| 4129 |
+
"reward": 0.9537233952432871,
|
| 4130 |
+
"reward_std": 0.4720949064940214,
|
| 4131 |
+
"rewards/correctness_reward_func": 0.8564453125,
|
| 4132 |
+
"rewards/soft_format_reward_func": 0.025000000605359674,
|
| 4133 |
+
"rewards/strict_format_reward_func": 0.03750000102445483,
|
| 4134 |
+
"rewards/thinkcount_reward_func": 0.03477812418714166,
|
| 4135 |
+
"step": 275
|
| 4136 |
+
},
|
| 4137 |
+
{
|
| 4138 |
+
"completion_length": 8068.75,
|
| 4139 |
+
"epoch": 0.010707011929007856,
|
| 4140 |
+
"grad_norm": 6.280547142028809,
|
| 4141 |
+
"kl": 0.021743500605225563,
|
| 4142 |
+
"learning_rate": 5.333591931730024e-07,
|
| 4143 |
+
"loss": 0.0009,
|
| 4144 |
+
"reward": 0.7119140401482582,
|
| 4145 |
+
"reward_std": 0.8066249378025532,
|
| 4146 |
+
"rewards/correctness_reward_func": 0.6259765625,
|
| 4147 |
+
"rewards/soft_format_reward_func": 0.02031250041909516,
|
| 4148 |
+
"rewards/strict_format_reward_func": 0.03125000046566129,
|
| 4149 |
+
"rewards/thinkcount_reward_func": 0.034374999813735485,
|
| 4150 |
+
"step": 276
|
| 4151 |
+
},
|
| 4152 |
+
{
|
| 4153 |
+
"completion_length": 8114.25,
|
| 4154 |
+
"epoch": 0.010745805450489768,
|
| 4155 |
+
"grad_norm": 4.923548221588135,
|
| 4156 |
+
"kl": 0.03391407220624387,
|
| 4157 |
+
"learning_rate": 5.352986811481769e-07,
|
| 4158 |
+
"loss": 0.0014,
|
| 4159 |
+
"reward": 1.096439028158784,
|
| 4160 |
+
"reward_std": 0.7743532722815871,
|
| 4161 |
+
"rewards/correctness_reward_func": 0.9931640625,
|
| 4162 |
+
"rewards/soft_format_reward_func": 0.03125000046566129,
|
| 4163 |
+
"rewards/strict_format_reward_func": 0.04374999995343387,
|
| 4164 |
+
"rewards/thinkcount_reward_func": 0.028275000513531268,
|
| 4165 |
+
"step": 277
|
| 4166 |
+
},
|
| 4167 |
+
{
|
| 4168 |
+
"completion_length": 8110.75,
|
| 4169 |
+
"epoch": 0.010784598971971681,
|
| 4170 |
+
"grad_norm": 1.8306553363800049,
|
| 4171 |
+
"kl": 0.011716888286173344,
|
| 4172 |
+
"learning_rate": 5.372381691233515e-07,
|
| 4173 |
+
"loss": 0.0005,
|
| 4174 |
+
"reward": 0.4777343515306711,
|
| 4175 |
+
"reward_std": 0.5657017529010773,
|
| 4176 |
+
"rewards/correctness_reward_func": 0.412109375,
|
| 4177 |
+
"rewards/soft_format_reward_func": 0.007812500232830644,
|
| 4178 |
+
"rewards/strict_format_reward_func": 0.015625000465661287,
|
| 4179 |
+
"rewards/thinkcount_reward_func": 0.04218750074505806,
|
| 4180 |
+
"step": 278
|
| 4181 |
+
},
|
| 4182 |
+
{
|
| 4183 |
+
"completion_length": 8072.25,
|
| 4184 |
+
"epoch": 0.010823392493453593,
|
| 4185 |
+
"grad_norm": 5.824916839599609,
|
| 4186 |
+
"kl": 0.01762935658916831,
|
| 4187 |
+
"learning_rate": 5.39177657098526e-07,
|
| 4188 |
+
"loss": 0.0007,
|
| 4189 |
+
"reward": 0.8440061956644058,
|
| 4190 |
+
"reward_std": 1.054400384426117,
|
| 4191 |
+
"rewards/correctness_reward_func": 0.73828125,
|
| 4192 |
+
"rewards/soft_format_reward_func": 0.029687500558793545,
|
| 4193 |
+
"rewards/strict_format_reward_func": 0.04375000158324838,
|
| 4194 |
+
"rewards/thinkcount_reward_func": 0.03228750033304095,
|
| 4195 |
+
"step": 279
|
| 4196 |
+
},
|
| 4197 |
+
{
|
| 4198 |
+
"completion_length": 8096.25,
|
| 4199 |
+
"epoch": 0.010862186014935505,
|
| 4200 |
+
"grad_norm": 7.30731725692749,
|
| 4201 |
+
"kl": 0.034460997907444835,
|
| 4202 |
+
"learning_rate": 5.411171450737006e-07,
|
| 4203 |
+
"loss": 0.0014,
|
| 4204 |
+
"reward": 0.5892468765377998,
|
| 4205 |
+
"reward_std": 0.691438059322536,
|
| 4206 |
+
"rewards/correctness_reward_func": 0.474609375,
|
| 4207 |
+
"rewards/soft_format_reward_func": 0.035937500186264515,
|
| 4208 |
+
"rewards/strict_format_reward_func": 0.05937500111758709,
|
| 4209 |
+
"rewards/thinkcount_reward_func": 0.01932500023394823,
|
| 4210 |
+
"step": 280
|
| 4211 |
}
|
| 4212 |
],
|
| 4213 |
"logging_steps": 1,
|