FredericFan commited on
Commit
2b5df73
·
verified ·
1 Parent(s): b41f2e7

Training in progress, step 8500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14e55e875213d92682e84555aa6b33ea2bd487aa3e64808e8e018ff13e39def4
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:301efe15fcd22f178a59817e7ac95437175ae1902e5f8351d10927e9d460d888
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f06185fe3645dcc7fe9ce829eede891f7480be5faf7d32fbc087ca425886173
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da312327cdcf8ee87d3ec3f29034a334cec0891a627a2192c1ab49ddbc3560d9
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:089eaed71453cd0e3401835315e75796803c6c4fdbddff74a2269b34ba454a8b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fe1606aea4247336928fb0ca7460d9c32bfd4a5f09cd12bbfa040c328a1e6f1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c911fb82d73d273c8bd13fe16df7396949ad9b406bf6a976c6d2d8dce418f3d4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f1cf6f72829f199e2c33545d808f1bb98dc287a37068472d85b2db721614981
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.08401757478713989,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-8000",
4
- "epoch": 0.64,
5
  "eval_steps": 500,
6
- "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1255,6 +1255,84 @@
1255
  "eval_samples_per_second": 22.707,
1256
  "eval_steps_per_second": 5.677,
1257
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1258
  }
1259
  ],
1260
  "logging_steps": 50,
@@ -1274,7 +1352,7 @@
1274
  "attributes": {}
1275
  }
1276
  },
1277
- "total_flos": 1.948665249792e+16,
1278
  "train_batch_size": 4,
1279
  "trial_name": null,
1280
  "trial_params": null
 
1
  {
2
  "best_metric": 0.08401757478713989,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-8000",
4
+ "epoch": 0.68,
5
  "eval_steps": 500,
6
+ "global_step": 8500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1255
  "eval_samples_per_second": 22.707,
1256
  "eval_steps_per_second": 5.677,
1257
  "step": 8000
1258
+ },
1259
+ {
1260
+ "epoch": 0.644,
1261
+ "grad_norm": 0.09193145483732224,
1262
+ "learning_rate": 2.03424e-05,
1263
+ "loss": 0.0592,
1264
+ "step": 8050
1265
+ },
1266
+ {
1267
+ "epoch": 0.648,
1268
+ "grad_norm": 0.13023436069488525,
1269
+ "learning_rate": 2.02824e-05,
1270
+ "loss": 0.0627,
1271
+ "step": 8100
1272
+ },
1273
+ {
1274
+ "epoch": 0.652,
1275
+ "grad_norm": 0.12572939693927765,
1276
+ "learning_rate": 2.02224e-05,
1277
+ "loss": 0.0696,
1278
+ "step": 8150
1279
+ },
1280
+ {
1281
+ "epoch": 0.656,
1282
+ "grad_norm": 0.08949209004640579,
1283
+ "learning_rate": 2.0162400000000002e-05,
1284
+ "loss": 0.0633,
1285
+ "step": 8200
1286
+ },
1287
+ {
1288
+ "epoch": 0.66,
1289
+ "grad_norm": 0.18614652752876282,
1290
+ "learning_rate": 2.01024e-05,
1291
+ "loss": 0.0683,
1292
+ "step": 8250
1293
+ },
1294
+ {
1295
+ "epoch": 0.664,
1296
+ "grad_norm": 0.1969350129365921,
1297
+ "learning_rate": 2.00424e-05,
1298
+ "loss": 0.0629,
1299
+ "step": 8300
1300
+ },
1301
+ {
1302
+ "epoch": 0.668,
1303
+ "grad_norm": 0.18870118260383606,
1304
+ "learning_rate": 1.99824e-05,
1305
+ "loss": 0.0697,
1306
+ "step": 8350
1307
+ },
1308
+ {
1309
+ "epoch": 0.672,
1310
+ "grad_norm": 0.13722488284111023,
1311
+ "learning_rate": 1.99224e-05,
1312
+ "loss": 0.0654,
1313
+ "step": 8400
1314
+ },
1315
+ {
1316
+ "epoch": 0.676,
1317
+ "grad_norm": 0.12897425889968872,
1318
+ "learning_rate": 1.98624e-05,
1319
+ "loss": 0.0632,
1320
+ "step": 8450
1321
+ },
1322
+ {
1323
+ "epoch": 0.68,
1324
+ "grad_norm": 0.16306981444358826,
1325
+ "learning_rate": 1.9802400000000002e-05,
1326
+ "loss": 0.0606,
1327
+ "step": 8500
1328
+ },
1329
+ {
1330
+ "epoch": 0.68,
1331
+ "eval_loss": 0.08417751640081406,
1332
+ "eval_runtime": 88.0586,
1333
+ "eval_samples_per_second": 22.712,
1334
+ "eval_steps_per_second": 5.678,
1335
+ "step": 8500
1336
  }
1337
  ],
1338
  "logging_steps": 50,
 
1352
  "attributes": {}
1353
  }
1354
  },
1355
+ "total_flos": 2.070456827904e+16,
1356
  "train_batch_size": 4,
1357
  "trial_name": null,
1358
  "trial_params": null