BayesTensor commited on
Commit
81f0d58
·
verified ·
1 Parent(s): 1e092dc

Training in progress, epoch 20, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34a34dbc85953b1cdb0bbdaeaaa03b802543a05a24f2a310fb563e061abb91e3
3
  size 598439784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5218f8af5e122178108819c55ff9986079205f7a6a7d74fda8cd5140f9258c72
3
  size 598439784
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d512c13bd93229b894e041a4d4fbf657700c4467bba68b92fa186e623ee6417
3
  size 1196967418
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb84cd784a908216d9ae0bcbf279833d869a838d7b6224336819fbdcd50e697e
3
  size 1196967418
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b32902f8553b01c87284a6114f412e32aef4bacd0c7545b1d42159c57be6ff4b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1e0c75120694c14303d1f1057e38b3cd324e071e40d05f3ae763c9e7d264782
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c19e7df21c0f0cd8077c7593f632ab8849023b3e4e1c28f2e05b1b3ded8bfeb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:736cbdd4cd09732dc21dbb2f4050abbc24d22cbaa260b4ff80bdb4abddb3c2b0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.8181082820112143,
3
  "best_model_checkpoint": "modernbert-medical-classifier/checkpoint-1196",
4
- "epoch": 17.0,
5
  "eval_steps": 500,
6
- "global_step": 1564,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -265,6 +265,54 @@
265
  "eval_samples_per_second": 12.627,
266
  "eval_steps_per_second": 3.157,
267
  "step": 1564
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  }
269
  ],
270
  "logging_steps": 100,
@@ -284,7 +332,7 @@
284
  "attributes": {}
285
  }
286
  },
287
- "total_flos": 1.0737199862844e+16,
288
  "train_batch_size": 4,
289
  "trial_name": null,
290
  "trial_params": null
 
1
  {
2
  "best_metric": 0.8181082820112143,
3
  "best_model_checkpoint": "modernbert-medical-classifier/checkpoint-1196",
4
+ "epoch": 20.0,
5
  "eval_steps": 500,
6
+ "global_step": 1840,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
265
  "eval_samples_per_second": 12.627,
266
  "eval_steps_per_second": 3.157,
267
  "step": 1564
268
+ },
269
+ {
270
+ "epoch": 17.391304347826086,
271
+ "grad_norm": 8.947516441345215,
272
+ "learning_rate": 1.5217391304347828e-05,
273
+ "loss": 0.0244,
274
+ "step": 1600
275
+ },
276
+ {
277
+ "epoch": 18.0,
278
+ "eval_f1": 0.75442242114237,
279
+ "eval_loss": 1.5790338516235352,
280
+ "eval_runtime": 7.2916,
281
+ "eval_samples_per_second": 12.617,
282
+ "eval_steps_per_second": 3.154,
283
+ "step": 1656
284
+ },
285
+ {
286
+ "epoch": 18.47826086956522,
287
+ "grad_norm": 0.016934270039200783,
288
+ "learning_rate": 1.3043478260869566e-05,
289
+ "loss": 0.0002,
290
+ "step": 1700
291
+ },
292
+ {
293
+ "epoch": 19.0,
294
+ "eval_f1": 0.7539112050739958,
295
+ "eval_loss": 1.929887056350708,
296
+ "eval_runtime": 7.2258,
297
+ "eval_samples_per_second": 12.732,
298
+ "eval_steps_per_second": 3.183,
299
+ "step": 1748
300
+ },
301
+ {
302
+ "epoch": 19.565217391304348,
303
+ "grad_norm": 0.02545306272804737,
304
+ "learning_rate": 1.0869565217391305e-05,
305
+ "loss": 0.0004,
306
+ "step": 1800
307
+ },
308
+ {
309
+ "epoch": 20.0,
310
+ "eval_f1": 0.7634584417193113,
311
+ "eval_loss": 1.7885226011276245,
312
+ "eval_runtime": 7.2042,
313
+ "eval_samples_per_second": 12.77,
314
+ "eval_steps_per_second": 3.193,
315
+ "step": 1840
316
  }
317
  ],
318
  "logging_steps": 100,
 
332
  "attributes": {}
333
  }
334
  },
335
+ "total_flos": 1.263199983864e+16,
336
  "train_batch_size": 4,
337
  "trial_name": null,
338
  "trial_params": null