saracandu commited on
Commit
7cf7dde
·
verified ·
1 Parent(s): 5a56ff7

Training in progress, step 1550, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54bfdc5814a5788e32724cc7a8b78d4c264b7ed744956f1ae12967a460f966a3
3
  size 1612184459
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54d8a76550e312d12a91cc5832560b2acbf757d3afaa9f1ef006d9930d5dee73
3
  size 1612184459
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cc17ff3bed0a9716439364aade61f5bad0364b4c946b4ce25a52197f147dc4b
3
  size 806069967
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6101567567161c38233b08fb2271b4b78706457d877f3ea5a7f54f5679d261ee
3
  size 806069967
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22099ffdac5bf2dbcb6d0dba5a9224accc8f51804f8161a7ec007a8969967b48
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d342066754ecfbd9b1ecb8226834c996c511c385e3a5f539eafa72576b871739
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11b676c2f0a59aa9351254878fd91590bffab13cd78e3f6d1bdc7ba9cf1f94b6
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5a2477159012c518c81b4a8d7d160fbfb4b7b3c1dca9998ecc892860de68c9
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1300,
3
  "best_metric": 0.8713989853858948,
4
  "best_model_checkpoint": "./results_decoder_final/checkpoint-1000",
5
- "epoch": 9.680713128038898,
6
  "eval_steps": 100,
7
- "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1186,6 +1186,41 @@
1186
  "eval_samples_per_second": 189.096,
1187
  "eval_steps_per_second": 1.513,
1188
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1189
  }
1190
  ],
1191
  "logging_steps": 10,
@@ -1200,12 +1235,12 @@
1200
  "should_evaluate": false,
1201
  "should_log": false,
1202
  "should_save": true,
1203
- "should_training_stop": false
1204
  },
1205
  "attributes": {}
1206
  }
1207
  },
1208
- "total_flos": 4.7275609360328294e+17,
1209
  "train_batch_size": 128,
1210
  "trial_name": null,
1211
  "trial_params": null
 
2
  "best_global_step": 1300,
3
  "best_metric": 0.8713989853858948,
4
  "best_model_checkpoint": "./results_decoder_final/checkpoint-1000",
5
+ "epoch": 10.0,
6
  "eval_steps": 100,
7
+ "global_step": 1550,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1186
  "eval_samples_per_second": 189.096,
1187
  "eval_steps_per_second": 1.513,
1188
  "step": 1500
1189
+ },
1190
+ {
1191
+ "epoch": 9.745542949756889,
1192
+ "grad_norm": 0.2521146833896637,
1193
+ "learning_rate": 2.6451612903225807e-06,
1194
+ "loss": 0.6493,
1195
+ "step": 1510
1196
+ },
1197
+ {
1198
+ "epoch": 9.810372771474878,
1199
+ "grad_norm": 0.3085962235927582,
1200
+ "learning_rate": 2.0000000000000003e-06,
1201
+ "loss": 0.6482,
1202
+ "step": 1520
1203
+ },
1204
+ {
1205
+ "epoch": 9.875202593192869,
1206
+ "grad_norm": 0.23055943846702576,
1207
+ "learning_rate": 1.3548387096774193e-06,
1208
+ "loss": 0.6502,
1209
+ "step": 1530
1210
+ },
1211
+ {
1212
+ "epoch": 9.94003241491086,
1213
+ "grad_norm": 0.256622314453125,
1214
+ "learning_rate": 7.096774193548388e-07,
1215
+ "loss": 0.654,
1216
+ "step": 1540
1217
+ },
1218
+ {
1219
+ "epoch": 10.0,
1220
+ "grad_norm": 0.802431583404541,
1221
+ "learning_rate": 6.451612903225807e-08,
1222
+ "loss": 0.6508,
1223
+ "step": 1550
1224
  }
1225
  ],
1226
  "logging_steps": 10,
 
1235
  "should_evaluate": false,
1236
  "should_log": false,
1237
  "should_save": true,
1238
+ "should_training_stop": true
1239
  },
1240
  "attributes": {}
1241
  }
1242
  },
1243
+ "total_flos": 4.8831577850904576e+17,
1244
  "train_batch_size": 128,
1245
  "trial_name": null,
1246
  "trial_params": null