BayesTensor commited on
Commit
e69a6d8
·
verified ·
1 Parent(s): c7bf711

Training in progress, epoch 16, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18d6f02ce80cc4da6ff5dc5d729a22ffa9bbadd92d73c483add17bfa6d1a6124
3
  size 598439784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd4ebc19581170d4e3e2289f89a6b6b5d0f29626415fdc205ee019874c846d0d
3
  size 598439784
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:769f7d258ccb85a2e7803b7507a411edf5732fe2da0915d07e26c7f6f5b4ae64
3
  size 1196967418
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ac87d06385767c09b376f483d886f26b33863219332f0831983eb51aff3b09
3
  size 1196967418
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d7793d0691719328c173827f0542bcfde3dd39ca36616559c39213d0dbbfb87
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:913ac14e71df2a85f158bf8200b2600b5f905ccb544ef3db6dcd4313366367a9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b604cae0dd7b3123ee29a4c730cc8b904cfff86463b91424f3f841bb58fa2dcd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c831d2844bae6e5dfcbb7cfa2b1a0346a9004d27558539b670e94dfda48813
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.8181082820112143,
3
  "best_model_checkpoint": "modernbert-medical-classifier/checkpoint-1196",
4
- "epoch": 15.0,
5
  "eval_steps": 500,
6
- "global_step": 1380,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -233,6 +233,22 @@
233
  "eval_samples_per_second": 12.722,
234
  "eval_steps_per_second": 3.181,
235
  "step": 1380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  }
237
  ],
238
  "logging_steps": 100,
@@ -252,7 +268,7 @@
252
  "attributes": {}
253
  }
254
  },
255
- "total_flos": 9473999878980000.0,
256
  "train_batch_size": 4,
257
  "trial_name": null,
258
  "trial_params": null
 
1
  {
2
  "best_metric": 0.8181082820112143,
3
  "best_model_checkpoint": "modernbert-medical-classifier/checkpoint-1196",
4
+ "epoch": 16.0,
5
  "eval_steps": 500,
6
+ "global_step": 1472,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
233
  "eval_samples_per_second": 12.722,
234
  "eval_steps_per_second": 3.181,
235
  "step": 1380
236
+ },
237
+ {
238
+ "epoch": 15.217391304347826,
239
+ "grad_norm": 0.007132470607757568,
240
+ "learning_rate": 1.956521739130435e-05,
241
+ "loss": 0.0324,
242
+ "step": 1400
243
+ },
244
+ {
245
+ "epoch": 16.0,
246
+ "eval_f1": 0.7419546636937941,
247
+ "eval_loss": 1.6500256061553955,
248
+ "eval_runtime": 7.2475,
249
+ "eval_samples_per_second": 12.694,
250
+ "eval_steps_per_second": 3.174,
251
+ "step": 1472
252
  }
253
  ],
254
  "logging_steps": 100,
 
268
  "attributes": {}
269
  }
270
  },
271
+ "total_flos": 1.0105599870912e+16,
272
  "train_batch_size": 4,
273
  "trial_name": null,
274
  "trial_params": null