mgh6 commited on
Commit
12ea2f2
·
verified ·
1 Parent(s): 528267b

Training in progress, step 13500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1386e828b3ba479196522024102112e210e952e5254c67fc939f220aff955ac6
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c88c5416d2b227c786b1439286f5e08b376fbaf2336d3ad3f7c0ba996ebb7dca
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ff9ebb9fca0c17ff991f1cef7618725b58455f772919586e80516856dad8806
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a28047c481623f0d721dd50781596e0c29b9b96dfc6a7b76cff9611c73f853
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dd8409f64af20a99c74e394e705eb79e774a42d36c1eb3e8c1583d929923352
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b3c81bb041799a90cf1c3d50b843bc32c890dd7bc841895016d103e980913f6
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eee4f684a730e891fd1e26e72f5546f480a0c3d9d36c0f17b64dcdf05ef17e02
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f731b7a6aefc3f610a3a165904d543d20dce898827aabc1534d10593582b96eb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 26.795813154194658,
5
  "eval_steps": 500,
6
- "global_step": 13400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1153,6 +1153,21 @@
1153
  "learning_rate": 1.0666666666666667e-05,
1154
  "loss": 1.2186,
1155
  "step": 13400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1156
  }
1157
  ],
1158
  "logging_steps": 100,
@@ -1172,7 +1187,7 @@
1172
  "attributes": {}
1173
  }
1174
  },
1175
- "total_flos": 2.091380806058718e+19,
1176
  "train_batch_size": 8,
1177
  "trial_name": null,
1178
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 26.995781909076708,
5
  "eval_steps": 500,
6
+ "global_step": 13500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1153
  "learning_rate": 1.0666666666666667e-05,
1154
  "loss": 1.2186,
1155
  "step": 13400
1156
+ },
1157
+ {
1158
+ "epoch": 26.995781909076708,
1159
+ "grad_norm": 0.24657221138477325,
1160
+ "learning_rate": 1e-05,
1161
+ "loss": 1.2211,
1162
+ "step": 13500
1163
+ },
1164
+ {
1165
+ "epoch": 26.995781909076708,
1166
+ "eval_loss": 1.0853413343429565,
1167
+ "eval_runtime": 779.9076,
1168
+ "eval_samples_per_second": 19.902,
1169
+ "eval_steps_per_second": 2.489,
1170
+ "step": 13500
1171
  }
1172
  ],
1173
  "logging_steps": 100,
 
1187
  "attributes": {}
1188
  }
1189
  },
1190
+ "total_flos": 2.1069883031775347e+19,
1191
  "train_batch_size": 8,
1192
  "trial_name": null,
1193
  "trial_params": null