mgh6 commited on
Commit
2ca43dd
·
verified ·
1 Parent(s): a68a40b

Training in progress, epoch 12, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fae6d18c31da2f3123d26fe82f253230f081a7f04d6096c55edf8a79d57c0bff
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9273b0b5b72839b135e5ba3d001ba6d7289e641b1303d533d3f16dcafb32b4ac
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57eff680a79bd13191e5837ccb2bf3e205b17f06697567bf05c733d21f919c9e
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f523e103b2aa541067a3a863044f1b45be43e5156c2986397eee07006ae38a5
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7620d0d74aa4af76afd5408d773ed4748a8acb91b0e964a8cddab8e88f040c64
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f1e7cc261bf54ef1e547376788ce8abb284b4d32ff4f5a42cd1d1aecd5e3670
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b2b83ebf1ec1af378519da07bfb09fb79fd0733745840a41e1fc2e6668cb73a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac558eb50fb0d07f8ffcf509322d18bbb69a3f968f832820a9d4adbe07047818
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.997999636297509,
5
  "eval_steps": 50,
6
- "global_step": 4116,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1237,6 +1237,111 @@
1237
  "eval_samples_per_second": 41.464,
1238
  "eval_steps_per_second": 20.732,
1239
  "step": 4100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1240
  }
1241
  ],
1242
  "logging_steps": 50,
@@ -1256,7 +1361,7 @@
1256
  "attributes": {}
1257
  }
1258
  },
1259
- "total_flos": 1.072157800433451e+18,
1260
  "train_batch_size": 2,
1261
  "trial_name": null,
1262
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.997999636297509,
5
  "eval_steps": 50,
6
+ "global_step": 4459,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1237
  "eval_samples_per_second": 41.464,
1238
  "eval_steps_per_second": 20.732,
1239
  "step": 4100
1240
+ },
1241
+ {
1242
+ "epoch": 12.098927077650481,
1243
+ "grad_norm": 61.96623611450195,
1244
+ "learning_rate": 3.950437317784257e-05,
1245
+ "loss": 0.7457,
1246
+ "step": 4150
1247
+ },
1248
+ {
1249
+ "epoch": 12.098927077650481,
1250
+ "eval_loss": 0.7167355418205261,
1251
+ "eval_runtime": 116.6266,
1252
+ "eval_samples_per_second": 41.38,
1253
+ "eval_steps_per_second": 20.69,
1254
+ "step": 4150
1255
+ },
1256
+ {
1257
+ "epoch": 12.244408074195308,
1258
+ "grad_norm": 112.12747955322266,
1259
+ "learning_rate": 3.8775510204081634e-05,
1260
+ "loss": 0.7356,
1261
+ "step": 4200
1262
+ },
1263
+ {
1264
+ "epoch": 12.244408074195308,
1265
+ "eval_loss": 0.7129220962524414,
1266
+ "eval_runtime": 116.6342,
1267
+ "eval_samples_per_second": 41.377,
1268
+ "eval_steps_per_second": 20.689,
1269
+ "step": 4200
1270
+ },
1271
+ {
1272
+ "epoch": 12.389889070740134,
1273
+ "grad_norm": 51.88462829589844,
1274
+ "learning_rate": 3.8046647230320704e-05,
1275
+ "loss": 0.7361,
1276
+ "step": 4250
1277
+ },
1278
+ {
1279
+ "epoch": 12.389889070740134,
1280
+ "eval_loss": 0.7131578326225281,
1281
+ "eval_runtime": 116.4086,
1282
+ "eval_samples_per_second": 41.457,
1283
+ "eval_steps_per_second": 20.729,
1284
+ "step": 4250
1285
+ },
1286
+ {
1287
+ "epoch": 12.53537006728496,
1288
+ "grad_norm": 65.30543518066406,
1289
+ "learning_rate": 3.731778425655977e-05,
1290
+ "loss": 0.7323,
1291
+ "step": 4300
1292
+ },
1293
+ {
1294
+ "epoch": 12.53537006728496,
1295
+ "eval_loss": 0.7133215665817261,
1296
+ "eval_runtime": 116.5949,
1297
+ "eval_samples_per_second": 41.391,
1298
+ "eval_steps_per_second": 20.696,
1299
+ "step": 4300
1300
+ },
1301
+ {
1302
+ "epoch": 12.680851063829786,
1303
+ "grad_norm": 59.78929901123047,
1304
+ "learning_rate": 3.658892128279884e-05,
1305
+ "loss": 0.7358,
1306
+ "step": 4350
1307
+ },
1308
+ {
1309
+ "epoch": 12.680851063829786,
1310
+ "eval_loss": 0.7077481746673584,
1311
+ "eval_runtime": 116.19,
1312
+ "eval_samples_per_second": 41.535,
1313
+ "eval_steps_per_second": 20.768,
1314
+ "step": 4350
1315
+ },
1316
+ {
1317
+ "epoch": 12.826332060374614,
1318
+ "grad_norm": 43.919498443603516,
1319
+ "learning_rate": 3.58600583090379e-05,
1320
+ "loss": 0.7359,
1321
+ "step": 4400
1322
+ },
1323
+ {
1324
+ "epoch": 12.826332060374614,
1325
+ "eval_loss": 0.7035172581672668,
1326
+ "eval_runtime": 116.6188,
1327
+ "eval_samples_per_second": 41.383,
1328
+ "eval_steps_per_second": 20.691,
1329
+ "step": 4400
1330
+ },
1331
+ {
1332
+ "epoch": 12.97181305691944,
1333
+ "grad_norm": 48.08681869506836,
1334
+ "learning_rate": 3.5131195335276965e-05,
1335
+ "loss": 0.7392,
1336
+ "step": 4450
1337
+ },
1338
+ {
1339
+ "epoch": 12.97181305691944,
1340
+ "eval_loss": 0.7051976323127747,
1341
+ "eval_runtime": 116.393,
1342
+ "eval_samples_per_second": 41.463,
1343
+ "eval_steps_per_second": 20.731,
1344
+ "step": 4450
1345
  }
1346
  ],
1347
  "logging_steps": 50,
 
1361
  "attributes": {}
1362
  }
1363
  },
1364
+ "total_flos": 1.1614435613679288e+18,
1365
  "train_batch_size": 2,
1366
  "trial_name": null,
1367
  "trial_params": null