ljcamargo commited on
Commit
fa4158f
·
verified ·
1 Parent(s): 8b75cae

Training in progress, step 1900, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:229e19659100db88dd521b24c7f3783cb59725c077f5c58e0b1e8cbed6566cad
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84f12537cd80e96d5b00db2adce34c918e49c02c0163196b020849bfc5dcea70
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:086d4ae403b4653b546f4d669e2f8c98a6c3bc786f7ff28201dea70b2067e4f2
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bba8b74741b8f956ed154bc4ece6dfb19904a6a7c6b034624740300cde18a97
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1547aae10ac7691e1716f567b08e3b4d274fa923879a48af8c2bb55c815a28a2
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb4bb891f0ebc45b239e473bb43ab5a6e8916e99a94e990939ec84f3da08f81e
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdd5d251a495085a19339ae2e6833dedf33f9b2050a0e70b16dd4cd5da2b7a12
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34908dcef28f44e129bc7cf6f353b95daa97084843f3b737ac3e87c6a4beba8f
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.72,
6
  "eval_steps": 500,
7
- "global_step": 1800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1268,6 +1268,76 @@
1268
  "learning_rate": 1.414790996784566e-05,
1269
  "loss": 0.5619,
1270
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1271
  }
1272
  ],
1273
  "logging_steps": 10,
@@ -1287,7 +1357,7 @@
1287
  "attributes": {}
1288
  }
1289
  },
1290
- "total_flos": 3.2448823590445056e+16,
1291
  "train_batch_size": 2,
1292
  "trial_name": null,
1293
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.76,
6
  "eval_steps": 500,
7
+ "global_step": 1900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1268
  "learning_rate": 1.414790996784566e-05,
1269
  "loss": 0.5619,
1270
  "step": 1800
1271
+ },
1272
+ {
1273
+ "epoch": 0.724,
1274
+ "grad_norm": 5.905683517456055,
1275
+ "learning_rate": 1.3946945337620579e-05,
1276
+ "loss": 0.3286,
1277
+ "step": 1810
1278
+ },
1279
+ {
1280
+ "epoch": 0.728,
1281
+ "grad_norm": 10.061610221862793,
1282
+ "learning_rate": 1.3745980707395497e-05,
1283
+ "loss": 0.3899,
1284
+ "step": 1820
1285
+ },
1286
+ {
1287
+ "epoch": 0.732,
1288
+ "grad_norm": 12.293854713439941,
1289
+ "learning_rate": 1.354501607717042e-05,
1290
+ "loss": 0.4059,
1291
+ "step": 1830
1292
+ },
1293
+ {
1294
+ "epoch": 0.736,
1295
+ "grad_norm": 13.248871803283691,
1296
+ "learning_rate": 1.3344051446945338e-05,
1297
+ "loss": 0.4339,
1298
+ "step": 1840
1299
+ },
1300
+ {
1301
+ "epoch": 0.74,
1302
+ "grad_norm": 9.589434623718262,
1303
+ "learning_rate": 1.3143086816720257e-05,
1304
+ "loss": 0.4178,
1305
+ "step": 1850
1306
+ },
1307
+ {
1308
+ "epoch": 0.744,
1309
+ "grad_norm": 8.538604736328125,
1310
+ "learning_rate": 1.2942122186495179e-05,
1311
+ "loss": 0.3152,
1312
+ "step": 1860
1313
+ },
1314
+ {
1315
+ "epoch": 0.748,
1316
+ "grad_norm": 18.58129119873047,
1317
+ "learning_rate": 1.2741157556270097e-05,
1318
+ "loss": 0.4276,
1319
+ "step": 1870
1320
+ },
1321
+ {
1322
+ "epoch": 0.752,
1323
+ "grad_norm": 8.69501781463623,
1324
+ "learning_rate": 1.2540192926045016e-05,
1325
+ "loss": 0.4304,
1326
+ "step": 1880
1327
+ },
1328
+ {
1329
+ "epoch": 0.756,
1330
+ "grad_norm": 14.74836254119873,
1331
+ "learning_rate": 1.2339228295819937e-05,
1332
+ "loss": 0.3541,
1333
+ "step": 1890
1334
+ },
1335
+ {
1336
+ "epoch": 0.76,
1337
+ "grad_norm": 7.415429592132568,
1338
+ "learning_rate": 1.2138263665594855e-05,
1339
+ "loss": 0.3713,
1340
+ "step": 1900
1341
  }
1342
  ],
1343
  "logging_steps": 10,
 
1357
  "attributes": {}
1358
  }
1359
  },
1360
+ "total_flos": 3.4245796106594304e+16,
1361
  "train_batch_size": 2,
1362
  "trial_name": null,
1363
  "trial_params": null