Johannes Garstenauer commited on
Commit
17e0fbb
·
1 Parent(s): b59ce67

Training in progress, step 107780

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1777e896d61479cc9ed84fe5afa72fd7ffbbf7490858dfb4e9f222bbc07ab423
3
  size 133845253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66bee3b0a57e429d1e41b17774ce1e4cb53868e59ca3e96b92b478a8cab1502d
3
  size 133845253
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e522821ed97932d589ba0c4ad5a8a58fad9d75c2eda4f4e237c5b163b96dbf
3
  size 266267309
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1340a29af6f65337d57c67d690c4635184e349901956fb2a74c845e1dc398013
3
  size 266267309
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:601b0f5e6b3cf18abc0aa01c7adf3b860d3e67ac728b814bc1c64b0ee5196942
3
  size 14511
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e83ad7ffa8f84644ff6a66224648e7531293eca5c65b342d7753e1d77ea83e1
3
  size 14511
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58e5109c88232419418f6892fe1d47612449af820166625fcf4b6426cc02819b
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19571ad9ed8aa394d3dfe767a357a80c87260a1219f15b33b1e18fd6b335a086
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.75,
5
- "global_step": 102391,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1262,11 +1262,77 @@
1262
  "learning_rate": 1.3679583596214512e-06,
1263
  "loss": 0.0946,
1264
  "step": 102000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1265
  }
1266
  ],
1267
  "max_steps": 107780,
1268
  "num_train_epochs": 5,
1269
- "total_flos": 1.7361002410889134e+18,
1270
  "trial_name": null,
1271
  "trial_params": null
1272
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 107780,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1262
  "learning_rate": 1.3679583596214512e-06,
1263
  "loss": 0.0946,
1264
  "step": 102000
1265
+ },
1266
+ {
1267
+ "epoch": 4.76,
1268
+ "learning_rate": 1.2496228613843015e-06,
1269
+ "loss": 0.0966,
1270
+ "step": 102500
1271
+ },
1272
+ {
1273
+ "epoch": 4.78,
1274
+ "learning_rate": 1.1312873631471517e-06,
1275
+ "loss": 0.0905,
1276
+ "step": 103000
1277
+ },
1278
+ {
1279
+ "epoch": 4.8,
1280
+ "learning_rate": 1.0129518649100018e-06,
1281
+ "loss": 0.0961,
1282
+ "step": 103500
1283
+ },
1284
+ {
1285
+ "epoch": 4.82,
1286
+ "learning_rate": 8.946163666728521e-07,
1287
+ "loss": 0.0922,
1288
+ "step": 104000
1289
+ },
1290
+ {
1291
+ "epoch": 4.85,
1292
+ "learning_rate": 7.762808684357024e-07,
1293
+ "loss": 0.0922,
1294
+ "step": 104500
1295
+ },
1296
+ {
1297
+ "epoch": 4.87,
1298
+ "learning_rate": 6.579453701985527e-07,
1299
+ "loss": 0.0946,
1300
+ "step": 105000
1301
+ },
1302
+ {
1303
+ "epoch": 4.89,
1304
+ "learning_rate": 5.396098719614028e-07,
1305
+ "loss": 0.0903,
1306
+ "step": 105500
1307
+ },
1308
+ {
1309
+ "epoch": 4.92,
1310
+ "learning_rate": 4.2127437372425313e-07,
1311
+ "loss": 0.0938,
1312
+ "step": 106000
1313
+ },
1314
+ {
1315
+ "epoch": 4.94,
1316
+ "learning_rate": 3.029388754871034e-07,
1317
+ "loss": 0.0959,
1318
+ "step": 106500
1319
+ },
1320
+ {
1321
+ "epoch": 4.96,
1322
+ "learning_rate": 1.8460337724995362e-07,
1323
+ "loss": 0.0952,
1324
+ "step": 107000
1325
+ },
1326
+ {
1327
+ "epoch": 4.99,
1328
+ "learning_rate": 6.626787901280386e-08,
1329
+ "loss": 0.0936,
1330
+ "step": 107500
1331
  }
1332
  ],
1333
  "max_steps": 107780,
1334
  "num_train_epochs": 5,
1335
+ "total_flos": 1.8274692319096934e+18,
1336
  "trial_name": null,
1337
  "trial_params": null
1338
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e522821ed97932d589ba0c4ad5a8a58fad9d75c2eda4f4e237c5b163b96dbf
3
  size 266267309
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1340a29af6f65337d57c67d690c4635184e349901956fb2a74c845e1dc398013
3
  size 266267309