jflotz commited on
Commit
e6ae1a1
·
1 Parent(s): f47a032

Training in progress, step 70000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11a5b6e2d3a240e31f2407589b74bf56102df3cd6db72efc78606028852235e7
3
- size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed924fbf244552502b394f746883a6e11b9da238444af325c3d3fb38fa9fed4
3
+ size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a9d1af9969e324f9fedc078ab1e1ab334bc4c8eeeb0d4b38445a40029af3cf3
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a2c2b1f31e602dcbc6fa8e9cd193a75249372c2a301104271bb1cb69568ffa1
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f68d3cd45e110417fc415ca02d5ecabffe718dd4e6856c7d2a0556a6509e8b
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f68d3cd45e110417fc415ca02d5ecabffe718dd4e6856c7d2a0556a6509e8b
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f68d3cd45e110417fc415ca02d5ecabffe718dd4e6856c7d2a0556a6509e8b
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f68d3cd45e110417fc415ca02d5ecabffe718dd4e6856c7d2a0556a6509e8b
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f68d3cd45e110417fc415ca02d5ecabffe718dd4e6856c7d2a0556a6509e8b
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f68d3cd45e110417fc415ca02d5ecabffe718dd4e6856c7d2a0556a6509e8b
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f68d3cd45e110417fc415ca02d5ecabffe718dd4e6856c7d2a0556a6509e8b
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f68d3cd45e110417fc415ca02d5ecabffe718dd4e6856c7d2a0556a6509e8b
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90ff94ce099b109f6c343c1450c170171a247badda4343ab1850180869cf03e2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7c802ff93fc9d67d63f3e03f7bd5fd1c7e4a71a3faef71bb4d686a1c5885c38
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6691723453375417,
5
- "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1206,11 +1206,211 @@
1206
  "eval_samples_per_second": 942.115,
1207
  "eval_steps_per_second": 14.765,
1208
  "step": 60000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1209
  }
1210
  ],
1211
  "max_steps": 1000000,
1212
  "num_train_epochs": 12,
1213
- "total_flos": 4.2060156618208287e+21,
1214
  "trial_name": null,
1215
  "trial_params": null
1216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7807010695604653,
5
+ "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1206
  "eval_samples_per_second": 942.115,
1207
  "eval_steps_per_second": 14.765,
1208
  "step": 60000
1209
+ },
1210
+ {
1211
+ "epoch": 0.67,
1212
+ "learning_rate": 0.00014995780552943551,
1213
+ "loss": 0.3321,
1214
+ "step": 60500
1215
+ },
1216
+ {
1217
+ "epoch": 0.68,
1218
+ "learning_rate": 0.00014995369178303722,
1219
+ "loss": 0.3311,
1220
+ "step": 61000
1221
+ },
1222
+ {
1223
+ "epoch": 0.68,
1224
+ "eval_loss": 0.3181557357311249,
1225
+ "eval_runtime": 2.3632,
1226
+ "eval_samples_per_second": 971.967,
1227
+ "eval_steps_per_second": 15.233,
1228
+ "step": 61000
1229
+ },
1230
+ {
1231
+ "epoch": 0.69,
1232
+ "learning_rate": 0.0001499493867859168,
1233
+ "loss": 0.3298,
1234
+ "step": 61500
1235
+ },
1236
+ {
1237
+ "epoch": 0.69,
1238
+ "learning_rate": 0.0001499448905498439,
1239
+ "loss": 0.3289,
1240
+ "step": 62000
1241
+ },
1242
+ {
1243
+ "epoch": 0.69,
1244
+ "eval_loss": 0.31774210929870605,
1245
+ "eval_runtime": 2.4377,
1246
+ "eval_samples_per_second": 942.269,
1247
+ "eval_steps_per_second": 14.768,
1248
+ "step": 62000
1249
+ },
1250
+ {
1251
+ "epoch": 0.7,
1252
+ "learning_rate": 0.00014994020308711106,
1253
+ "loss": 0.3281,
1254
+ "step": 62500
1255
+ },
1256
+ {
1257
+ "epoch": 0.7,
1258
+ "learning_rate": 0.00014993532441053364,
1259
+ "loss": 0.3272,
1260
+ "step": 63000
1261
+ },
1262
+ {
1263
+ "epoch": 0.7,
1264
+ "eval_loss": 0.31380537152290344,
1265
+ "eval_runtime": 2.4068,
1266
+ "eval_samples_per_second": 954.378,
1267
+ "eval_steps_per_second": 14.958,
1268
+ "step": 63000
1269
+ },
1270
+ {
1271
+ "epoch": 0.71,
1272
+ "learning_rate": 0.0001499302545334498,
1273
+ "loss": 0.3262,
1274
+ "step": 63500
1275
+ },
1276
+ {
1277
+ "epoch": 0.71,
1278
+ "learning_rate": 0.0001499249934697203,
1279
+ "loss": 0.3253,
1280
+ "step": 64000
1281
+ },
1282
+ {
1283
+ "epoch": 0.71,
1284
+ "eval_loss": 0.3134210705757141,
1285
+ "eval_runtime": 2.4456,
1286
+ "eval_samples_per_second": 939.234,
1287
+ "eval_steps_per_second": 14.72,
1288
+ "step": 64000
1289
+ },
1290
+ {
1291
+ "epoch": 0.72,
1292
+ "learning_rate": 0.00014991954123372875,
1293
+ "loss": 0.3246,
1294
+ "step": 64500
1295
+ },
1296
+ {
1297
+ "epoch": 0.72,
1298
+ "learning_rate": 0.0001499138978403813,
1299
+ "loss": 0.3242,
1300
+ "step": 65000
1301
+ },
1302
+ {
1303
+ "epoch": 0.72,
1304
+ "eval_loss": 0.3107437193393707,
1305
+ "eval_runtime": 2.3823,
1306
+ "eval_samples_per_second": 964.183,
1307
+ "eval_steps_per_second": 15.111,
1308
+ "step": 65000
1309
+ },
1310
+ {
1311
+ "epoch": 0.73,
1312
+ "learning_rate": 0.00014990806330510687,
1313
+ "loss": 0.3231,
1314
+ "step": 65500
1315
+ },
1316
+ {
1317
+ "epoch": 0.74,
1318
+ "learning_rate": 0.00014990203764385677,
1319
+ "loss": 0.3221,
1320
+ "step": 66000
1321
+ },
1322
+ {
1323
+ "epoch": 0.74,
1324
+ "eval_loss": 0.308339387178421,
1325
+ "eval_runtime": 2.4467,
1326
+ "eval_samples_per_second": 938.797,
1327
+ "eval_steps_per_second": 14.713,
1328
+ "step": 66000
1329
+ },
1330
+ {
1331
+ "epoch": 0.74,
1332
+ "learning_rate": 0.00014989582087310494,
1333
+ "loss": 0.3211,
1334
+ "step": 66500
1335
+ },
1336
+ {
1337
+ "epoch": 0.75,
1338
+ "learning_rate": 0.00014988941300984784,
1339
+ "loss": 0.3203,
1340
+ "step": 67000
1341
+ },
1342
+ {
1343
+ "epoch": 0.75,
1344
+ "eval_loss": 0.3079957365989685,
1345
+ "eval_runtime": 2.3779,
1346
+ "eval_samples_per_second": 965.976,
1347
+ "eval_steps_per_second": 15.139,
1348
+ "step": 67000
1349
+ },
1350
+ {
1351
+ "epoch": 0.75,
1352
+ "learning_rate": 0.00014988281407160426,
1353
+ "loss": 0.3194,
1354
+ "step": 67500
1355
+ },
1356
+ {
1357
+ "epoch": 0.76,
1358
+ "learning_rate": 0.0001498760240764155,
1359
+ "loss": 0.3188,
1360
+ "step": 68000
1361
+ },
1362
+ {
1363
+ "epoch": 0.76,
1364
+ "eval_loss": 0.30548328161239624,
1365
+ "eval_runtime": 2.3902,
1366
+ "eval_samples_per_second": 961.017,
1367
+ "eval_steps_per_second": 15.062,
1368
+ "step": 68000
1369
+ },
1370
+ {
1371
+ "epoch": 0.76,
1372
+ "learning_rate": 0.00014986904304284512,
1373
+ "loss": 0.3181,
1374
+ "step": 68500
1375
+ },
1376
+ {
1377
+ "epoch": 0.77,
1378
+ "learning_rate": 0.000149861870989979,
1379
+ "loss": 0.3169,
1380
+ "step": 69000
1381
+ },
1382
+ {
1383
+ "epoch": 0.77,
1384
+ "eval_loss": 0.3034500181674957,
1385
+ "eval_runtime": 2.3995,
1386
+ "eval_samples_per_second": 957.293,
1387
+ "eval_steps_per_second": 15.003,
1388
+ "step": 69000
1389
+ },
1390
+ {
1391
+ "epoch": 0.78,
1392
+ "learning_rate": 0.00014985450793742527,
1393
+ "loss": 0.3164,
1394
+ "step": 69500
1395
+ },
1396
+ {
1397
+ "epoch": 0.78,
1398
+ "learning_rate": 0.0001498469539053142,
1399
+ "loss": 0.3157,
1400
+ "step": 70000
1401
+ },
1402
+ {
1403
+ "epoch": 0.78,
1404
+ "eval_loss": 0.3020155429840088,
1405
+ "eval_runtime": 2.3865,
1406
+ "eval_samples_per_second": 962.488,
1407
+ "eval_steps_per_second": 15.085,
1408
+ "step": 70000
1409
  }
1410
  ],
1411
  "max_steps": 1000000,
1412
  "num_train_epochs": 12,
1413
+ "total_flos": 4.9070182721243e+21,
1414
  "trial_name": null,
1415
  "trial_params": null
1416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a9d1af9969e324f9fedc078ab1e1ab334bc4c8eeeb0d4b38445a40029af3cf3
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a2c2b1f31e602dcbc6fa8e9cd193a75249372c2a301104271bb1cb69568ffa1
3
  size 449471589