textsightai commited on
Commit
077946d
·
verified ·
1 Parent(s): eb672a9

Training in progress, step 3564, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16b6a4a294a1cc7668de259fbd48b70aa442eeff2115186962274aec99084529
3
  size 2950734544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0713a48c012baea1fd0fe85458998ff2d3f30ea39f3a397f2c2c05a6c662d181
3
  size 2950734544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:055b521fe84aaa03f833218cef3a750a0edce67c730dd29402f719d25a5c68f7
3
  size 5357408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b243449f31e422589ead2437362b30eedba441a1287618227cb835db3b1b87
3
  size 5357408
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:792acc32e247e4fae3661c67d51f5f956241a846a12afb588fa3e1e4cdaec8cc
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29117602da5370738e1e6b1d075a348793d2f8f1bc194b48c87fa9e2c883da52
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ccef800644988b261b844e19a8cab522da84de3a3b5e2c5a0e38ca72aeb92c9
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb2fbc646b980ba4517f5e7af3567931794b062e20b71d2610c9a2b1554d1be1
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 3200,
3
  "best_metric": 9.882343832146034e-09,
4
  "best_model_checkpoint": "./humanizer-ckpt/checkpoint-3200",
5
- "epoch": 2.6938947368421053,
6
  "eval_steps": 400,
7
- "global_step": 3200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1192,6 +1192,132 @@
1192
  "eval_samples_per_second": 96.598,
1193
  "eval_steps_per_second": 12.075,
1194
  "step": 3200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1195
  }
1196
  ],
1197
  "logging_steps": 20,
@@ -1206,12 +1332,12 @@
1206
  "should_evaluate": false,
1207
  "should_log": false,
1208
  "should_save": true,
1209
- "should_training_stop": false
1210
  },
1211
  "attributes": {}
1212
  }
1213
  },
1214
- "total_flos": 5.5407890399232e+16,
1215
  "train_batch_size": 8,
1216
  "trial_name": null,
1217
  "trial_params": null
 
2
  "best_global_step": 3200,
3
  "best_metric": 9.882343832146034e-09,
4
  "best_model_checkpoint": "./humanizer-ckpt/checkpoint-3200",
5
+ "epoch": 3.0,
6
  "eval_steps": 400,
7
+ "global_step": 3564,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1192
  "eval_samples_per_second": 96.598,
1193
  "eval_steps_per_second": 12.075,
1194
  "step": 3200
1195
+ },
1196
+ {
1197
+ "epoch": 2.710736842105263,
1198
+ "grad_norm": 0.00035652011865749955,
1199
+ "learning_rate": 7.282783880194681e-06,
1200
+ "loss": 6.31846342002973e-06,
1201
+ "step": 3220
1202
+ },
1203
+ {
1204
+ "epoch": 2.7275789473684213,
1205
+ "grad_norm": 6.81265300954692e-05,
1206
+ "learning_rate": 6.468825834761165e-06,
1207
+ "loss": 1.4050155004952104e-05,
1208
+ "step": 3240
1209
+ },
1210
+ {
1211
+ "epoch": 2.744421052631579,
1212
+ "grad_norm": 0.00012173700815765187,
1213
+ "learning_rate": 5.7020891286328915e-06,
1214
+ "loss": 5.758084807894193e-06,
1215
+ "step": 3260
1216
+ },
1217
+ {
1218
+ "epoch": 2.761263157894737,
1219
+ "grad_norm": 0.0001060321883414872,
1220
+ "learning_rate": 4.9828260159586795e-06,
1221
+ "loss": 7.252940849866718e-06,
1222
+ "step": 3280
1223
+ },
1224
+ {
1225
+ "epoch": 2.7781052631578946,
1226
+ "grad_norm": 0.00028356886468827724,
1227
+ "learning_rate": 4.311273132212895e-06,
1228
+ "loss": 6.122983177192509e-06,
1229
+ "step": 3300
1230
+ },
1231
+ {
1232
+ "epoch": 2.7949473684210524,
1233
+ "grad_norm": 0.00015243480447679758,
1234
+ "learning_rate": 3.6876514163432904e-06,
1235
+ "loss": 6.343883433146402e-06,
1236
+ "step": 3320
1237
+ },
1238
+ {
1239
+ "epoch": 2.8117894736842106,
1240
+ "grad_norm": 0.00012169565161457285,
1241
+ "learning_rate": 3.1121660380825864e-06,
1242
+ "loss": 5.4682153859175745e-06,
1243
+ "step": 3340
1244
+ },
1245
+ {
1246
+ "epoch": 2.8286315789473684,
1247
+ "grad_norm": 6.715493509545922e-05,
1248
+ "learning_rate": 2.5850063304482804e-06,
1249
+ "loss": 5.410446465248242e-06,
1250
+ "step": 3360
1251
+ },
1252
+ {
1253
+ "epoch": 2.845473684210526,
1254
+ "grad_norm": 5.5883188906591386e-05,
1255
+ "learning_rate": 2.106345727452691e-06,
1256
+ "loss": 7.158593507483601e-06,
1257
+ "step": 3380
1258
+ },
1259
+ {
1260
+ "epoch": 2.8623157894736844,
1261
+ "grad_norm": 0.00011430613085394725,
1262
+ "learning_rate": 1.6763417070435326e-06,
1263
+ "loss": 1.355467684334144e-05,
1264
+ "step": 3400
1265
+ },
1266
+ {
1267
+ "epoch": 2.879157894736842,
1268
+ "grad_norm": 0.00011039682431146502,
1269
+ "learning_rate": 1.2951357392942618e-06,
1270
+ "loss": 5.743457586504519e-06,
1271
+ "step": 3420
1272
+ },
1273
+ {
1274
+ "epoch": 2.896,
1275
+ "grad_norm": 0.000167473524925299,
1276
+ "learning_rate": 9.628532398607892e-07,
1277
+ "loss": 8.994613745016978e-06,
1278
+ "step": 3440
1279
+ },
1280
+ {
1281
+ "epoch": 2.9128421052631577,
1282
+ "grad_norm": 0.0004898277693428099,
1283
+ "learning_rate": 6.796035287200186e-07,
1284
+ "loss": 6.411921640392393e-06,
1285
+ "step": 3460
1286
+ },
1287
+ {
1288
+ "epoch": 2.929684210526316,
1289
+ "grad_norm": 0.0002149147039745003,
1290
+ "learning_rate": 4.4547979420403444e-07,
1291
+ "loss": 5.693230923498049e-06,
1292
+ "step": 3480
1293
+ },
1294
+ {
1295
+ "epoch": 2.9465263157894737,
1296
+ "grad_norm": 0.00029297475703060627,
1297
+ "learning_rate": 2.605590623412923e-07,
1298
+ "loss": 5.634667832055129e-06,
1299
+ "step": 3500
1300
+ },
1301
+ {
1302
+ "epoch": 2.9633684210526314,
1303
+ "grad_norm": 0.00044662103755399585,
1304
+ "learning_rate": 1.2490217151532312e-07,
1305
+ "loss": 6.381121784215793e-06,
1306
+ "step": 3520
1307
+ },
1308
+ {
1309
+ "epoch": 2.9802105263157896,
1310
+ "grad_norm": 0.00033745335531421006,
1311
+ "learning_rate": 3.8553752449110854e-08,
1312
+ "loss": 5.156782572157681e-06,
1313
+ "step": 3540
1314
+ },
1315
+ {
1316
+ "epoch": 2.9970526315789474,
1317
+ "grad_norm": 0.00013488579133991152,
1318
+ "learning_rate": 1.5422135216547338e-09,
1319
+ "loss": 5.085239899926819e-06,
1320
+ "step": 3560
1321
  }
1322
  ],
1323
  "logging_steps": 20,
 
1332
  "should_evaluate": false,
1333
  "should_log": false,
1334
  "should_save": true,
1335
+ "should_training_stop": true
1336
  },
1337
  "attributes": {}
1338
  }
1339
  },
1340
+ "total_flos": 6.1703847936e+16,
1341
  "train_batch_size": 8,
1342
  "trial_name": null,
1343
  "trial_params": null