irishprancer commited on
Commit
bf494ad
·
verified ·
1 Parent(s): faf8b9b

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44a96603bd8eee4cf918c9772a26520b8760099bd8f4cd2de014624bf4fa4c7f
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c571c3d9bc97035512218123eb16c4a5ac39adeb400b8f87dca66f9daff0f4c
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4fd2108d49d07e4d7bee6998e7ecb19e375ff49d05de755c3a44b4b5c6c6dc5
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bb2b5192e62889a659f5c32729030ddd962b6ab8d9ab4f6e8f06e1b2f7f86d0
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5feb56512e955691dc9bb9a1e37b9dd590e06a961d7d94560b679e2730b03194
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:037d13720220086c05f76f1146cd4356e8b9d075b5d306338df00d366045e1c1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cf3f988e8fed2daa2e801eb1f19b681872781cf57f0fb7b896e859a12cfe2bb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f4c00d522bdde510099aafe1617b13d114dce17a17b44e05876f016f4e4d7af
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7962104678153992,
3
- "best_model_checkpoint": "./output/checkpoint-150",
4
- "epoch": 6.521739130434782,
5
  "eval_steps": 150,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -152,6 +152,151 @@
152
  "eval_samples_per_second": 25.287,
153
  "eval_steps_per_second": 25.287,
154
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  }
156
  ],
157
  "logging_steps": 10,
@@ -171,7 +316,7 @@
171
  "attributes": {}
172
  }
173
  },
174
- "total_flos": 3894839614291968.0,
175
  "train_batch_size": 4,
176
  "trial_name": null,
177
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7310147881507874,
3
+ "best_model_checkpoint": "./output/checkpoint-300",
4
+ "epoch": 13.043478260869565,
5
  "eval_steps": 150,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
152
  "eval_samples_per_second": 25.287,
153
  "eval_steps_per_second": 25.287,
154
  "step": 150
155
+ },
156
+ {
157
+ "epoch": 6.956521739130435,
158
+ "grad_norm": 1.540989637374878,
159
+ "learning_rate": 2.9999892980750297e-05,
160
+ "loss": 0.6586,
161
+ "step": 160
162
+ },
163
+ {
164
+ "epoch": 7.391304347826087,
165
+ "grad_norm": 1.345400094985962,
166
+ "learning_rate": 2.9999854334972675e-05,
167
+ "loss": 0.739,
168
+ "step": 170
169
+ },
170
+ {
171
+ "epoch": 7.826086956521739,
172
+ "grad_norm": 1.725967526435852,
173
+ "learning_rate": 2.999980974373204e-05,
174
+ "loss": 0.7291,
175
+ "step": 180
176
+ },
177
+ {
178
+ "epoch": 8.26086956521739,
179
+ "grad_norm": 1.5401579141616821,
180
+ "learning_rate": 2.9999759207046075e-05,
181
+ "loss": 0.6245,
182
+ "step": 190
183
+ },
184
+ {
185
+ "epoch": 8.695652173913043,
186
+ "grad_norm": 1.7425003051757812,
187
+ "learning_rate": 2.9999702724934804e-05,
188
+ "loss": 0.6765,
189
+ "step": 200
190
+ },
191
+ {
192
+ "epoch": 9.130434782608695,
193
+ "grad_norm": 1.0419254302978516,
194
+ "learning_rate": 2.999964029742062e-05,
195
+ "loss": 0.6524,
196
+ "step": 210
197
+ },
198
+ {
199
+ "epoch": 9.565217391304348,
200
+ "grad_norm": 1.2119251489639282,
201
+ "learning_rate": 2.9999571924528263e-05,
202
+ "loss": 0.5593,
203
+ "step": 220
204
+ },
205
+ {
206
+ "epoch": 10.0,
207
+ "grad_norm": 1.5276358127593994,
208
+ "learning_rate": 2.9999497606284837e-05,
209
+ "loss": 0.756,
210
+ "step": 230
211
+ },
212
+ {
213
+ "epoch": 10.434782608695652,
214
+ "grad_norm": 1.413993000984192,
215
+ "learning_rate": 2.9999417342719796e-05,
216
+ "loss": 0.7116,
217
+ "step": 240
218
+ },
219
+ {
220
+ "epoch": 10.869565217391305,
221
+ "grad_norm": 0.9750322699546814,
222
+ "learning_rate": 2.9999331133864956e-05,
223
+ "loss": 0.5897,
224
+ "step": 250
225
+ },
226
+ {
227
+ "epoch": 11.304347826086957,
228
+ "grad_norm": 1.1936322450637817,
229
+ "learning_rate": 2.9999238979754485e-05,
230
+ "loss": 0.6547,
231
+ "step": 260
232
+ },
233
+ {
234
+ "epoch": 11.73913043478261,
235
+ "grad_norm": 1.0489903688430786,
236
+ "learning_rate": 2.999914088042492e-05,
237
+ "loss": 0.6475,
238
+ "step": 270
239
+ },
240
+ {
241
+ "epoch": 12.173913043478262,
242
+ "grad_norm": 1.3138858079910278,
243
+ "learning_rate": 2.9999036835915132e-05,
244
+ "loss": 0.5937,
245
+ "step": 280
246
+ },
247
+ {
248
+ "epoch": 12.608695652173914,
249
+ "grad_norm": 1.0842424631118774,
250
+ "learning_rate": 2.9998926846266365e-05,
251
+ "loss": 0.6326,
252
+ "step": 290
253
+ },
254
+ {
255
+ "epoch": 13.043478260869565,
256
+ "grad_norm": 1.3883247375488281,
257
+ "learning_rate": 2.9998810911522213e-05,
258
+ "loss": 0.5806,
259
+ "step": 300
260
+ },
261
+ {
262
+ "epoch": 13.043478260869565,
263
+ "eval_loss": 0.7310147881507874,
264
+ "eval_runtime": 0.4443,
265
+ "eval_samples_per_second": 22.51,
266
+ "eval_steps_per_second": 22.51,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 13.043478260869565,
271
+ "eval_loss": 0.8606523275375366,
272
+ "eval_runtime": 0.4404,
273
+ "eval_samples_per_second": 22.708,
274
+ "eval_steps_per_second": 22.708,
275
+ "step": 300
276
+ },
277
+ {
278
+ "epoch": 13.043478260869565,
279
+ "eval_loss": 0.7310147881507874,
280
+ "eval_runtime": 0.4391,
281
+ "eval_samples_per_second": 22.773,
282
+ "eval_steps_per_second": 22.773,
283
+ "step": 300
284
+ },
285
+ {
286
+ "epoch": 13.043478260869565,
287
+ "eval_loss": 0.7758375406265259,
288
+ "eval_runtime": 0.4396,
289
+ "eval_samples_per_second": 22.75,
290
+ "eval_steps_per_second": 22.75,
291
+ "step": 300
292
+ },
293
+ {
294
+ "epoch": 13.043478260869565,
295
+ "eval_loss": 0.860957145690918,
296
+ "eval_runtime": 0.4444,
297
+ "eval_samples_per_second": 22.5,
298
+ "eval_steps_per_second": 22.5,
299
+ "step": 300
300
  }
301
  ],
302
  "logging_steps": 10,
 
316
  "attributes": {}
317
  }
318
  },
319
+ "total_flos": 7724643094462464.0,
320
  "train_batch_size": 4,
321
  "trial_name": null,
322
  "trial_params": null