tokhey commited on
Commit
d98e8da
·
verified ·
1 Parent(s): ae5cc88

Training in progress, step 880, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ebe70a745247409cae6b79f3c1184ce97abbd4bac127f999909b5b73251d4c8
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ca0f4e76acdd877e061b54b094915f061d5aef2a48a215afcdffebf99684346
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17ac518a3753be37e8a64fd1d9865d36bd71db712f97713b82aeaa261792226a
3
  size 148053627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e456b05a1308ce2179ec568426c87fba32d61b44110a58f117291568af87b5e3
3
  size 148053627
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7895d483b77b3cb6aee62b3bc42d0d3e4277b9cef4ded3fbeb54271ff22e18ca
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b51625db2aad97eb7aa57bda210faaf43e85a6afe5e9f14d65cd48ce0ab9a8f6
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37fe9d97d6d21af00d1c8335a3abe36a9edcee7b7c01e459eb702311857d4d02
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df5db66705b412cd18d5ad4eb6f72a79adbb3f849df7f1405564a81fc27e59d0
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04a7e2e967d181c1ed113e98417e3884ff02f87ef8b2066c0ee1e0fd9a352607
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe155ae67690223b1383fab8be573d608c35abd7fd572e583add75faf527c0df
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 9.548571428571428,
6
  "eval_steps": 100,
7
- "global_step": 840,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1248,6 +1248,62 @@
1248
  "learning_rate": 3.298903859953517e-06,
1249
  "loss": 0.0239,
1250
  "step": 840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1251
  }
1252
  ],
1253
  "logging_steps": 5,
@@ -1262,12 +1318,12 @@
1262
  "should_evaluate": false,
1263
  "should_log": false,
1264
  "should_save": true,
1265
- "should_training_stop": false
1266
  },
1267
  "attributes": {}
1268
  }
1269
  },
1270
- "total_flos": 5.45693806582825e+16,
1271
  "train_batch_size": 1,
1272
  "trial_name": null,
1273
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 10.0,
6
  "eval_steps": 100,
7
+ "global_step": 880,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1248
  "learning_rate": 3.298903859953517e-06,
1249
  "loss": 0.0239,
1250
  "step": 840
1251
+ },
1252
+ {
1253
+ "epoch": 9.605714285714285,
1254
+ "grad_norm": 0.11044926196336746,
1255
+ "learning_rate": 2.544639529766829e-06,
1256
+ "loss": 0.0234,
1257
+ "step": 845
1258
+ },
1259
+ {
1260
+ "epoch": 9.662857142857142,
1261
+ "grad_norm": 0.12619182467460632,
1262
+ "learning_rate": 1.887710869163284e-06,
1263
+ "loss": 0.0233,
1264
+ "step": 850
1265
+ },
1266
+ {
1267
+ "epoch": 9.72,
1268
+ "grad_norm": 0.12324284762144089,
1269
+ "learning_rate": 1.328376278651705e-06,
1270
+ "loss": 0.023,
1271
+ "step": 855
1272
+ },
1273
+ {
1274
+ "epoch": 9.777142857142858,
1275
+ "grad_norm": 0.11614653468132019,
1276
+ "learning_rate": 8.668557704669122e-07,
1277
+ "loss": 0.0238,
1278
+ "step": 860
1279
+ },
1280
+ {
1281
+ "epoch": 9.834285714285715,
1282
+ "grad_norm": 0.13400106132030487,
1283
+ "learning_rate": 5.033308820289185e-07,
1284
+ "loss": 0.0237,
1285
+ "step": 865
1286
+ },
1287
+ {
1288
+ "epoch": 9.891428571428571,
1289
+ "grad_norm": 0.12201932817697525,
1290
+ "learning_rate": 2.3794460453555045e-07,
1291
+ "loss": 0.0216,
1292
+ "step": 870
1293
+ },
1294
+ {
1295
+ "epoch": 9.948571428571428,
1296
+ "grad_norm": 0.12058448791503906,
1297
+ "learning_rate": 7.080132671774542e-08,
1298
+ "loss": 0.0238,
1299
+ "step": 875
1300
+ },
1301
+ {
1302
+ "epoch": 10.0,
1303
+ "grad_norm": 0.15471021831035614,
1304
+ "learning_rate": 1.966793778229725e-09,
1305
+ "loss": 0.0221,
1306
+ "step": 880
1307
  }
1308
  ],
1309
  "logging_steps": 5,
 
1318
  "should_evaluate": false,
1319
  "should_log": false,
1320
  "should_save": true,
1321
+ "should_training_stop": true
1322
  },
1323
  "attributes": {}
1324
  }
1325
  },
1326
+ "total_flos": 5.7149261611008e+16,
1327
  "train_batch_size": 1,
1328
  "trial_name": null,
1329
  "trial_params": null