ljcamargo commited on
Commit
ac06119
·
verified ·
1 Parent(s): 9629a36

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84f12537cd80e96d5b00db2adce34c918e49c02c0163196b020849bfc5dcea70
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bea1b48825d279d5ca7532312e7e81957e535191d5f4e4e23c6756d53ffb5dc5
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bba8b74741b8f956ed154bc4ece6dfb19904a6a7c6b034624740300cde18a97
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc649b7fa91947a37cd4744fb1a38adf59d9a1c0676e9bc59a750dc67ad53fa6
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb4bb891f0ebc45b239e473bb43ab5a6e8916e99a94e990939ec84f3da08f81e
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53febc76262518d0519b05d74ab6f65dd5851f3bbee84bc1c2b8f6935b1f50de
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34908dcef28f44e129bc7cf6f353b95daa97084843f3b737ac3e87c6a4beba8f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d68747f6eb2bb192bc48db140d8e66025b016a51ccd2dd4f8273e6973eed04b3
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.76,
6
  "eval_steps": 500,
7
- "global_step": 1900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1338,6 +1338,76 @@
1338
  "learning_rate": 1.2138263665594855e-05,
1339
  "loss": 0.3713,
1340
  "step": 1900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1341
  }
1342
  ],
1343
  "logging_steps": 10,
@@ -1357,7 +1427,7 @@
1357
  "attributes": {}
1358
  }
1359
  },
1360
- "total_flos": 3.4245796106594304e+16,
1361
  "train_batch_size": 2,
1362
  "trial_name": null,
1363
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.8,
6
  "eval_steps": 500,
7
+ "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1338
  "learning_rate": 1.2138263665594855e-05,
1339
  "loss": 0.3713,
1340
  "step": 1900
1341
+ },
1342
+ {
1343
+ "epoch": 0.764,
1344
+ "grad_norm": 8.78702163696289,
1345
+ "learning_rate": 1.1937299035369776e-05,
1346
+ "loss": 0.3025,
1347
+ "step": 1910
1348
+ },
1349
+ {
1350
+ "epoch": 0.768,
1351
+ "grad_norm": 2.6222591400146484,
1352
+ "learning_rate": 1.1736334405144696e-05,
1353
+ "loss": 0.2279,
1354
+ "step": 1920
1355
+ },
1356
+ {
1357
+ "epoch": 0.772,
1358
+ "grad_norm": 8.457213401794434,
1359
+ "learning_rate": 1.1535369774919615e-05,
1360
+ "loss": 0.3841,
1361
+ "step": 1930
1362
+ },
1363
+ {
1364
+ "epoch": 0.776,
1365
+ "grad_norm": 9.097604751586914,
1366
+ "learning_rate": 1.1334405144694535e-05,
1367
+ "loss": 0.3436,
1368
+ "step": 1940
1369
+ },
1370
+ {
1371
+ "epoch": 0.78,
1372
+ "grad_norm": 9.933280944824219,
1373
+ "learning_rate": 1.1133440514469454e-05,
1374
+ "loss": 0.4276,
1375
+ "step": 1950
1376
+ },
1377
+ {
1378
+ "epoch": 0.784,
1379
+ "grad_norm": 9.58340072631836,
1380
+ "learning_rate": 1.0932475884244374e-05,
1381
+ "loss": 0.281,
1382
+ "step": 1960
1383
+ },
1384
+ {
1385
+ "epoch": 0.788,
1386
+ "grad_norm": 13.846723556518555,
1387
+ "learning_rate": 1.0731511254019293e-05,
1388
+ "loss": 0.2836,
1389
+ "step": 1970
1390
+ },
1391
+ {
1392
+ "epoch": 0.792,
1393
+ "grad_norm": 30.122060775756836,
1394
+ "learning_rate": 1.0530546623794213e-05,
1395
+ "loss": 0.3722,
1396
+ "step": 1980
1397
+ },
1398
+ {
1399
+ "epoch": 0.796,
1400
+ "grad_norm": 8.666303634643555,
1401
+ "learning_rate": 1.0329581993569132e-05,
1402
+ "loss": 0.2778,
1403
+ "step": 1990
1404
+ },
1405
+ {
1406
+ "epoch": 0.8,
1407
+ "grad_norm": 7.968908786773682,
1408
+ "learning_rate": 1.0128617363344052e-05,
1409
+ "loss": 0.2778,
1410
+ "step": 2000
1411
  }
1412
  ],
1413
  "logging_steps": 10,
 
1427
  "attributes": {}
1428
  }
1429
  },
1430
+ "total_flos": 3.604261231669248e+16,
1431
  "train_batch_size": 2,
1432
  "trial_name": null,
1433
  "trial_params": null