mgh6 commited on
Commit
83efe14
·
verified ·
1 Parent(s): 817a502

Training in progress, step 9300, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b773bdf4055e53552cee0ed25da5c686cd04dc17af6b5a2d0bf3dd951e16525
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3f3a1db5589596622438f3305091bec4d8eac0af6ce2a5fb67fdb2fabd6a46c
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e10372dacbd6eaf45478a0c95249dcd88ed2ce0ce6aa512de458d4897ecfc863
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d2b73126a971289ab203697cf1e1236e8169aa0b895581f652f1cc563a281b4
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20e6dce8f2f1c5a17a180c274638622919bd91b1b4a11df9b9fbd8d8e95cbf84
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda55b185dc98f3bbacd230aaacff604bd97a164ddf9fc1b48c814e202c2b868
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54f59af528e00285e6f0500198bcd9b612dd10ddf7de75e7254a328683370acd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ad8d31b03afdc3955f524b574e208e67f3c465ce1d021ebb4d8b9cc6f6fd1a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.0438764095306396,
3
  "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-8900",
4
- "epoch": 2.6705370101596517,
5
  "eval_steps": 100,
6
- "global_step": 9200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1387,6 +1387,21 @@
1387
  "eval_samples_per_second": 213.273,
1388
  "eval_steps_per_second": 3.333,
1389
  "step": 9200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1390
  }
1391
  ],
1392
  "logging_steps": 100,
@@ -1401,7 +1416,7 @@
1401
  "early_stopping_threshold": 0.0
1402
  },
1403
  "attributes": {
1404
- "early_stopping_patience_counter": 3
1405
  }
1406
  },
1407
  "TrainerControl": {
@@ -1415,7 +1430,7 @@
1415
  "attributes": {}
1416
  }
1417
  },
1418
- "total_flos": 9.203331526714982e+16,
1419
  "train_batch_size": 64,
1420
  "trial_name": null,
1421
  "trial_params": null
 
1
  {
2
  "best_metric": 1.0438764095306396,
3
  "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-8900",
4
+ "epoch": 2.699564586357039,
5
  "eval_steps": 100,
6
+ "global_step": 9300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1387
  "eval_samples_per_second": 213.273,
1388
  "eval_steps_per_second": 3.333,
1389
  "step": 9200
1390
+ },
1391
+ {
1392
+ "epoch": 2.699564586357039,
1393
+ "grad_norm": 1.317325472831726,
1394
+ "learning_rate": 7.300435413642961e-05,
1395
+ "loss": 1.9853,
1396
+ "step": 9300
1397
+ },
1398
+ {
1399
+ "epoch": 2.699564586357039,
1400
+ "eval_loss": 1.04426109790802,
1401
+ "eval_runtime": 212.5953,
1402
+ "eval_samples_per_second": 214.017,
1403
+ "eval_steps_per_second": 3.344,
1404
+ "step": 9300
1405
  }
1406
  ],
1407
  "logging_steps": 100,
 
1416
  "early_stopping_threshold": 0.0
1417
  },
1418
  "attributes": {
1419
+ "early_stopping_patience_counter": 4
1420
  }
1421
  },
1422
  "TrainerControl": {
 
1430
  "attributes": {}
1431
  }
1432
  },
1433
+ "total_flos": 9.30337028749394e+16,
1434
  "train_batch_size": 64,
1435
  "trial_name": null,
1436
  "trial_params": null