mgh6 commited on
Commit
8d1a864
·
verified ·
1 Parent(s): d72b856

Training in progress, step 13600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c88c5416d2b227c786b1439286f5e08b376fbaf2336d3ad3f7c0ba996ebb7dca
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e74583d1aed86f5fec9822b4801b215609eda6feeb26de4db289f117a5daecd7
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3a28047c481623f0d721dd50781596e0c29b9b96dfc6a7b76cff9611c73f853
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb1aad3e5e0c34a5fa7df6b5f5d65a7432d6603242ab62040f41d61f9c9c3ca
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b3c81bb041799a90cf1c3d50b843bc32c890dd7bc841895016d103e980913f6
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff3d2cc50d3d40f321e2fc5f39a4cb1bcd1bcea10890b330eb07abe1c2f50328
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f731b7a6aefc3f610a3a165904d543d20dce898827aabc1534d10593582b96eb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77ae258af55a12b6a09eeafcb5790aba7e5858b90e20fc4e4a90fe27fc561cdf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 26.995781909076708,
5
  "eval_steps": 500,
6
- "global_step": 13500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1168,6 +1168,13 @@
1168
  "eval_samples_per_second": 19.902,
1169
  "eval_steps_per_second": 2.489,
1170
  "step": 13500
 
 
 
 
 
 
 
1171
  }
1172
  ],
1173
  "logging_steps": 100,
@@ -1187,7 +1194,7 @@
1187
  "attributes": {}
1188
  }
1189
  },
1190
- "total_flos": 2.1069883031775347e+19,
1191
  "train_batch_size": 8,
1192
  "trial_name": null,
1193
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 27.195750663958755,
5
  "eval_steps": 500,
6
+ "global_step": 13600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1168
  "eval_samples_per_second": 19.902,
1169
  "eval_steps_per_second": 2.489,
1170
  "step": 13500
1171
+ },
1172
+ {
1173
+ "epoch": 27.195750663958755,
1174
+ "grad_norm": 0.2545396685600281,
1175
+ "learning_rate": 9.333333333333334e-06,
1176
+ "loss": 1.2217,
1177
+ "step": 13600
1178
  }
1179
  ],
1180
  "logging_steps": 100,
 
1194
  "attributes": {}
1195
  }
1196
  },
1197
+ "total_flos": 2.122594884609324e+19,
1198
  "train_batch_size": 8,
1199
  "trial_name": null,
1200
  "trial_params": null