qingy2024 commited on
Commit
4aa1b59
·
verified ·
1 Parent(s): 564d417

Upload checkpoint 280

Browse files
Files changed (7) hide show
  1. README.md +4 -4
  2. adapter_model.safetensors +1 -1
  3. loss.png +2 -2
  4. optimizer.pt +1 -1
  5. rng_state.pth +1 -1
  6. scheduler.pt +1 -1
  7. trainer_state.json +152 -2
README.md CHANGED
@@ -2,7 +2,7 @@
2
  base_model: InfiniAILab/OpenR1-Qwen-3B-SFT-Instruct
3
  library_name: peft
4
  ---
5
- # T2 3B Instruct (Step 270 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
@@ -15,11 +15,11 @@ library_name: peft
15
  </head>
16
  <body>
17
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
18
- <div style="height: 30px; width: 1.05%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
19
- <!-- 1.0% -->
20
  </div>
21
  </div>
22
- <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 270 out of 25777 steps</p>
23
  </body>
24
  </html>
25
 
 
2
  base_model: InfiniAILab/OpenR1-Qwen-3B-SFT-Instruct
3
  library_name: peft
4
  ---
5
+ # T2 3B Instruct (Step 280 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
 
15
  </head>
16
  <body>
17
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
18
+ <div style="height: 30px; width: 1.09%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
19
+ <!-- 1.1% -->
20
  </div>
21
  </div>
22
+ <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 280 out of 25777 steps</p>
23
  </body>
24
  </html>
25
 
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cead17a5320a5c10b0ee5baeba1d641c6c10a68e5d970453a2cbed210ee53b29
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca5639c61434b5424c12ee16bfbe51d66f817bd4df5ff7049cf60b085dad1ecd
3
  size 479005064
loss.png CHANGED

Git LFS Details

  • SHA256: 38011a186526d309a979cbbc9103fc5162ddbe3dbdf7b3cb22ecf844bb808cff
  • Pointer size: 131 Bytes
  • Size of remote file: 211 kB

Git LFS Details

  • SHA256: 41f46ac94d35c21de189e9baec797d2b1c3b140dadad7dff474529f4e737904c
  • Pointer size: 131 Bytes
  • Size of remote file: 212 kB
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:151b8d066052ab8bb868fadd063ee847dc88b3b683dbaefde906c2e5374189be
3
  size 245114786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac998e1563beef6662160cf16512dfa25fbf99df69d5a028b3632edb2e9a02c6
3
  size 245114786
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb0d8036cd5c96e1082e04c12d9688b102c1f8baf27b3ecd2fa3377a49f35d43
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d81ef059592c6db0605efffbc0d8f87f2bd2b94e94791b4f00555c956e60895a
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:360a0666da510367350d878a2bfe8217b58d367d1e0cf34a572f489a33333dfa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89e86502432482f478783166257eb71f4b4a191074493563cc72671ac0a92593
3
  size 1064
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.01047425080011638,
6
  "eval_steps": 500,
7
- "global_step": 270,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4058,6 +4058,156 @@
4058
  "rewards/strict_format_reward_func": 0.025000000139698386,
4059
  "rewards/thinkcount_reward_func": 0.03750000149011612,
4060
  "step": 270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4061
  }
4062
  ],
4063
  "logging_steps": 1,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.010862186014935505,
6
  "eval_steps": 500,
7
+ "global_step": 280,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4058
  "rewards/strict_format_reward_func": 0.025000000139698386,
4059
  "rewards/thinkcount_reward_func": 0.03750000149011612,
4060
  "step": 270
4061
+ },
4062
+ {
4063
+ "completion_length": 8108.25,
4064
+ "epoch": 0.010513044321598292,
4065
+ "grad_norm": 5.077273845672607,
4066
+ "kl": 0.0350515958853066,
4067
+ "learning_rate": 5.236617532971297e-07,
4068
+ "loss": 0.0014,
4069
+ "reward": 0.7935546757653356,
4070
+ "reward_std": 0.832713857293129,
4071
+ "rewards/correctness_reward_func": 0.6904296875,
4072
+ "rewards/soft_format_reward_func": 0.03125,
4073
+ "rewards/strict_format_reward_func": 0.043750000186264515,
4074
+ "rewards/thinkcount_reward_func": 0.028125000651925802,
4075
+ "step": 271
4076
+ },
4077
+ {
4078
+ "completion_length": 8054.5,
4079
+ "epoch": 0.010551837843080206,
4080
+ "grad_norm": 3.678874969482422,
4081
+ "kl": 0.016557272523641586,
4082
+ "learning_rate": 5.256012412723042e-07,
4083
+ "loss": 0.0007,
4084
+ "reward": 0.27392577938735485,
4085
+ "reward_std": 0.29840535297989845,
4086
+ "rewards/correctness_reward_func": 0.20361328125,
4087
+ "rewards/soft_format_reward_func": 0.010937500046566129,
4088
+ "rewards/strict_format_reward_func": 0.018750000279396772,
4089
+ "rewards/thinkcount_reward_func": 0.04062500083819032,
4090
+ "step": 272
4091
+ },
4092
+ {
4093
+ "completion_length": 8114.75,
4094
+ "epoch": 0.010590631364562118,
4095
+ "grad_norm": 2.8407578468322754,
4096
+ "kl": 0.014166628941893578,
4097
+ "learning_rate": 5.275407292474787e-07,
4098
+ "loss": 0.0006,
4099
+ "reward": 0.7014647871255875,
4100
+ "reward_std": 0.9922140687704086,
4101
+ "rewards/correctness_reward_func": 0.61865234375,
4102
+ "rewards/soft_format_reward_func": 0.017187500139698386,
4103
+ "rewards/strict_format_reward_func": 0.03125000046566129,
4104
+ "rewards/thinkcount_reward_func": 0.034374999813735485,
4105
+ "step": 273
4106
+ },
4107
+ {
4108
+ "completion_length": 8095.0,
4109
+ "epoch": 0.01062942488604403,
4110
+ "grad_norm": 3.4110143184661865,
4111
+ "kl": 0.023500604555010796,
4112
+ "learning_rate": 5.294802172226533e-07,
4113
+ "loss": 0.0009,
4114
+ "reward": 0.6369484011083841,
4115
+ "reward_std": 0.5855994820594788,
4116
+ "rewards/correctness_reward_func": 0.5556640625,
4117
+ "rewards/soft_format_reward_func": 0.018749999813735485,
4118
+ "rewards/strict_format_reward_func": 0.02500000037252903,
4119
+ "rewards/thinkcount_reward_func": 0.0375343756750226,
4120
+ "step": 274
4121
+ },
4122
+ {
4123
+ "completion_length": 8111.75,
4124
+ "epoch": 0.010668218407525944,
4125
+ "grad_norm": 8.018135070800781,
4126
+ "kl": 0.028027109568938613,
4127
+ "learning_rate": 5.314197051978278e-07,
4128
+ "loss": 0.0011,
4129
+ "reward": 0.9537233952432871,
4130
+ "reward_std": 0.4720949064940214,
4131
+ "rewards/correctness_reward_func": 0.8564453125,
4132
+ "rewards/soft_format_reward_func": 0.025000000605359674,
4133
+ "rewards/strict_format_reward_func": 0.03750000102445483,
4134
+ "rewards/thinkcount_reward_func": 0.03477812418714166,
4135
+ "step": 275
4136
+ },
4137
+ {
4138
+ "completion_length": 8068.75,
4139
+ "epoch": 0.010707011929007856,
4140
+ "grad_norm": 6.280547142028809,
4141
+ "kl": 0.021743500605225563,
4142
+ "learning_rate": 5.333591931730024e-07,
4143
+ "loss": 0.0009,
4144
+ "reward": 0.7119140401482582,
4145
+ "reward_std": 0.8066249378025532,
4146
+ "rewards/correctness_reward_func": 0.6259765625,
4147
+ "rewards/soft_format_reward_func": 0.02031250041909516,
4148
+ "rewards/strict_format_reward_func": 0.03125000046566129,
4149
+ "rewards/thinkcount_reward_func": 0.034374999813735485,
4150
+ "step": 276
4151
+ },
4152
+ {
4153
+ "completion_length": 8114.25,
4154
+ "epoch": 0.010745805450489768,
4155
+ "grad_norm": 4.923548221588135,
4156
+ "kl": 0.03391407220624387,
4157
+ "learning_rate": 5.352986811481769e-07,
4158
+ "loss": 0.0014,
4159
+ "reward": 1.096439028158784,
4160
+ "reward_std": 0.7743532722815871,
4161
+ "rewards/correctness_reward_func": 0.9931640625,
4162
+ "rewards/soft_format_reward_func": 0.03125000046566129,
4163
+ "rewards/strict_format_reward_func": 0.04374999995343387,
4164
+ "rewards/thinkcount_reward_func": 0.028275000513531268,
4165
+ "step": 277
4166
+ },
4167
+ {
4168
+ "completion_length": 8110.75,
4169
+ "epoch": 0.010784598971971681,
4170
+ "grad_norm": 1.8306553363800049,
4171
+ "kl": 0.011716888286173344,
4172
+ "learning_rate": 5.372381691233515e-07,
4173
+ "loss": 0.0005,
4174
+ "reward": 0.4777343515306711,
4175
+ "reward_std": 0.5657017529010773,
4176
+ "rewards/correctness_reward_func": 0.412109375,
4177
+ "rewards/soft_format_reward_func": 0.007812500232830644,
4178
+ "rewards/strict_format_reward_func": 0.015625000465661287,
4179
+ "rewards/thinkcount_reward_func": 0.04218750074505806,
4180
+ "step": 278
4181
+ },
4182
+ {
4183
+ "completion_length": 8072.25,
4184
+ "epoch": 0.010823392493453593,
4185
+ "grad_norm": 5.824916839599609,
4186
+ "kl": 0.01762935658916831,
4187
+ "learning_rate": 5.39177657098526e-07,
4188
+ "loss": 0.0007,
4189
+ "reward": 0.8440061956644058,
4190
+ "reward_std": 1.054400384426117,
4191
+ "rewards/correctness_reward_func": 0.73828125,
4192
+ "rewards/soft_format_reward_func": 0.029687500558793545,
4193
+ "rewards/strict_format_reward_func": 0.04375000158324838,
4194
+ "rewards/thinkcount_reward_func": 0.03228750033304095,
4195
+ "step": 279
4196
+ },
4197
+ {
4198
+ "completion_length": 8096.25,
4199
+ "epoch": 0.010862186014935505,
4200
+ "grad_norm": 7.30731725692749,
4201
+ "kl": 0.034460997907444835,
4202
+ "learning_rate": 5.411171450737006e-07,
4203
+ "loss": 0.0014,
4204
+ "reward": 0.5892468765377998,
4205
+ "reward_std": 0.691438059322536,
4206
+ "rewards/correctness_reward_func": 0.474609375,
4207
+ "rewards/soft_format_reward_func": 0.035937500186264515,
4208
+ "rewards/strict_format_reward_func": 0.05937500111758709,
4209
+ "rewards/thinkcount_reward_func": 0.01932500023394823,
4210
+ "step": 280
4211
  }
4212
  ],
4213
  "logging_steps": 1,