smirki commited on
Commit
069984c
·
verified ·
1 Parent(s): 445226d

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1701d1397a8f6bba03a038aad0ad88dddbe56212ed6ac753bf48dccf50090e24
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0b9fe960d8b700432d64261f2e9fb8b5e2feb648ccba2b26954af39f14f187
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d5cbf4360926fd5a69ab224acee68af41a01edad72a7837d83695317fff4262
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6980510b381af43d0d08ceb7a59264815242f1f6223f0a5de785069b7c5b74d7
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7882cf9d1800e045d97afc34ed2d790cd5f0da147adeb6824c51ec77a35e0c5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63e7217b76d9787359cf45d83ba0b63cb6335c60810ebfe7324880fdde71d442
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47e7c3293120b0e1021fffede4430570f0c03435609ec93915f9f3961852aa6d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d42f94a00d3d38a441f86dbb87d1da2ac6b6e6fceeb3fff0437ffb348f193a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0007964256417199608,
5
  "eval_steps": 500,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -127,6 +127,66 @@
127
  "reward_std": 1.2458222389221192,
128
  "rewards/custom_reward_logic_v2": -3.805912530422211,
129
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  }
131
  ],
132
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0011946384625799412,
5
  "eval_steps": 500,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
127
  "reward_std": 1.2458222389221192,
128
  "rewards/custom_reward_logic_v2": -3.805912530422211,
129
  "step": 100
130
+ },
131
+ {
132
+ "completion_length": 729.04375,
133
+ "epoch": 0.0008760682058919569,
134
+ "grad_norm": 0.1617293506860733,
135
+ "kl": 0.0018967354553751647,
136
+ "learning_rate": 4.978612153434527e-06,
137
+ "loss": 0.0001,
138
+ "reward": -3.071175017207861,
139
+ "reward_std": 1.3524149775505065,
140
+ "rewards/custom_reward_logic_v2": -3.071175017207861,
141
+ "step": 110
142
+ },
143
+ {
144
+ "completion_length": 641.74375,
145
+ "epoch": 0.000955710770063953,
146
+ "grad_norm": 0.26608461141586304,
147
+ "kl": 0.0029119997401721776,
148
+ "learning_rate": 4.962019382530521e-06,
149
+ "loss": 0.0001,
150
+ "reward": -2.690687493979931,
151
+ "reward_std": 1.0754198133945465,
152
+ "rewards/custom_reward_logic_v2": -2.690687493979931,
153
+ "step": 120
154
+ },
155
+ {
156
+ "completion_length": 883.9875,
157
+ "epoch": 0.001035353334235949,
158
+ "grad_norm": 0.7612231373786926,
159
+ "kl": 0.003597881377208978,
160
+ "learning_rate": 4.9407400177998335e-06,
161
+ "loss": 0.0001,
162
+ "reward": -3.8035999715328215,
163
+ "reward_std": 1.2502110481262207,
164
+ "rewards/custom_reward_logic_v2": -3.8035999715328215,
165
+ "step": 130
166
+ },
167
+ {
168
+ "completion_length": 524.48125,
169
+ "epoch": 0.0011149958984079452,
170
+ "grad_norm": 0.9543402791023254,
171
+ "kl": 0.08978197913384064,
172
+ "learning_rate": 4.914814565722671e-06,
173
+ "loss": 0.0036,
174
+ "reward": -2.0596874909475447,
175
+ "reward_std": 1.3678732179105282,
176
+ "rewards/custom_reward_logic_v2": -2.0596874909475447,
177
+ "step": 140
178
+ },
179
+ {
180
+ "completion_length": 46.43125,
181
+ "epoch": 0.0011946384625799412,
182
+ "grad_norm": 0.7850804328918457,
183
+ "kl": 0.3018287725746632,
184
+ "learning_rate": 4.884292376870567e-06,
185
+ "loss": 0.0121,
186
+ "reward": -0.04024999849498272,
187
+ "reward_std": 0.430637900531292,
188
+ "rewards/custom_reward_logic_v2": -0.04024999849498272,
189
+ "step": 150
190
  }
191
  ],
192
  "logging_steps": 10,