mgh6 commited on
Commit
e8d77f2
·
verified ·
1 Parent(s): d9c1f7f

Training in progress, step 9200, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72fe9633b02a51a49a2cfeadca1ee666fcf1b3fd25d1edaee5806d94baa42146
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b773bdf4055e53552cee0ed25da5c686cd04dc17af6b5a2d0bf3dd951e16525
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a87453f5493e54965bd9c6b74c8cccaf89bc199afc196a39a1776694b5725e8
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e10372dacbd6eaf45478a0c95249dcd88ed2ce0ce6aa512de458d4897ecfc863
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a03dc33a206b870bac827b0d4553fe1f31612e46dc1d6df4c89c4f928ac98827
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20e6dce8f2f1c5a17a180c274638622919bd91b1b4a11df9b9fbd8d8e95cbf84
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:297c2ad1be70bae81e3d823568324b3db1a2bdb9ff27011986573fdf4003489a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54f59af528e00285e6f0500198bcd9b612dd10ddf7de75e7254a328683370acd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.0438764095306396,
3
  "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-8900",
4
- "epoch": 2.641509433962264,
5
  "eval_steps": 100,
6
- "global_step": 9100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1372,6 +1372,21 @@
1372
  "eval_samples_per_second": 213.408,
1373
  "eval_steps_per_second": 3.335,
1374
  "step": 9100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1375
  }
1376
  ],
1377
  "logging_steps": 100,
@@ -1386,7 +1401,7 @@
1386
  "early_stopping_threshold": 0.0
1387
  },
1388
  "attributes": {
1389
- "early_stopping_patience_counter": 2
1390
  }
1391
  },
1392
  "TrainerControl": {
@@ -1400,7 +1415,7 @@
1400
  "attributes": {}
1401
  }
1402
  },
1403
- "total_flos": 9.103292765936026e+16,
1404
  "train_batch_size": 64,
1405
  "trial_name": null,
1406
  "trial_params": null
 
1
  {
2
  "best_metric": 1.0438764095306396,
3
  "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-8900",
4
+ "epoch": 2.6705370101596517,
5
  "eval_steps": 100,
6
+ "global_step": 9200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1372
  "eval_samples_per_second": 213.408,
1373
  "eval_steps_per_second": 3.335,
1374
  "step": 9100
1375
+ },
1376
+ {
1377
+ "epoch": 2.6705370101596517,
1378
+ "grad_norm": 1.25364351272583,
1379
+ "learning_rate": 7.329462989840349e-05,
1380
+ "loss": 1.966,
1381
+ "step": 9200
1382
+ },
1383
+ {
1384
+ "epoch": 2.6705370101596517,
1385
+ "eval_loss": 1.0489540100097656,
1386
+ "eval_runtime": 213.3373,
1387
+ "eval_samples_per_second": 213.273,
1388
+ "eval_steps_per_second": 3.333,
1389
+ "step": 9200
1390
  }
1391
  ],
1392
  "logging_steps": 100,
 
1401
  "early_stopping_threshold": 0.0
1402
  },
1403
  "attributes": {
1404
+ "early_stopping_patience_counter": 3
1405
  }
1406
  },
1407
  "TrainerControl": {
 
1415
  "attributes": {}
1416
  }
1417
  },
1418
+ "total_flos": 9.203331526714982e+16,
1419
  "train_batch_size": 64,
1420
  "trial_name": null,
1421
  "trial_params": null