jflotz commited on
Commit
c704192
·
1 Parent(s): bfc781d

Training in progress, step 620000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34fa3fbc162567eded2c54001cc8f21167119645a7ad5d9438d4d6580d7e0aaf
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a097619eaa9ef720984e4ddd9dde8f3b697ba4e3a54ad0e09caff9a338f70f3
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb7fc5d60355a8dfd752019852cdb15ae9d272ab89ceb9fe9acfccaaa3393cce
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6297a4aa090b90aa3635b3517c4b127894ad2c42e14fd6d228c6743ce17aee7d
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd344a2f09d330c943a3dd3d04668fe450de22ee225e64c890f4722c7155791f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef2eb325ca73aeb9167731426720e192b4dc476427d6c01affc7b3b2a3e583b
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7587b2027544c125ad8875c396124abeaa8b26e560f514a758d79f5c77d1245e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ef8f093c13597a9033ad0961b449077de3ab17c6b5e598e7ffe900737a37b62
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b98e6e4730ae83aa0bbaf43b6a70f2dbea908df991dba53b4ba6b61ef5a4bc51
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e20445f0abf4410bec575adab612ba675a2a9e22a555ffb2b2fb85961556a332
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fae6c192050786077eb0c3ad856996ecc90d4abf868828c40917e34e278ddf8
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20bb77018a33c479ea4b69c28339611c22b8e8641554d8590d8198df56a8bc21
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04edc224f8d5fe4b319f5145ba3be953f45d1403c8d037fe0f2d7e1df55860c5
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34450b80230704bfaa6dfc6f8672078f09277102e090e32e505412e8b0a06323
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fd673ae46299e38ec0092baeee3dfcef9b18e2fdce8cd906ed6e5311b6eb2f0
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f073590871f68cf52e503ea996a2b041d76f54fc155c63b39985473c3a2c6e9
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba46334d5cfb098826b8bc46bdfd08caddd05dd0a2ad976845aaf16ed9488107
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb10ba8d357eb68fad272ff5bc8fb10ce9e5818ee9a6bd185f2331209c9c5eee
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:071dfcafc848e779fb2959117671810686f513947ddff515a5f53d88a149a7ef
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bdc5c8f1d9880eb7b2e1404af5eeae63b870215c24de51ca47db7d2d9d87809
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1451fdba239492a726627053577a5d97726556e48bcec30f5b94d6df5305247b
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e0be0a1dddd0483c31953c97f497ac534ef42fa519c13cb1ceaab964eeaafea
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2194195562912054,
5
- "global_step": 610000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7326,11 +7326,131 @@
7326
  "learning_rate": 6.058179638667089e-05,
7327
  "loss": 0.3027,
7328
  "step": 610000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7329
  }
7330
  ],
7331
  "max_steps": 1000000,
7332
  "num_train_epochs": 2,
7333
- "total_flos": 4.124028700869167e+22,
7334
  "trial_name": null,
7335
  "trial_params": null
7336
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2394100408205695,
5
+ "global_step": 620000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7326
  "learning_rate": 6.058179638667089e-05,
7327
  "loss": 0.3027,
7328
  "step": 610000
7329
+ },
7330
+ {
7331
+ "epoch": 1.22,
7332
+ "learning_rate": 6.047062257725395e-05,
7333
+ "loss": 0.3035,
7334
+ "step": 610500
7335
+ },
7336
+ {
7337
+ "epoch": 1.22,
7338
+ "learning_rate": 6.035950216040917e-05,
7339
+ "loss": 0.303,
7340
+ "step": 611000
7341
+ },
7342
+ {
7343
+ "epoch": 1.22,
7344
+ "learning_rate": 6.0248435439935516e-05,
7345
+ "loss": 0.3031,
7346
+ "step": 611500
7347
+ },
7348
+ {
7349
+ "epoch": 1.22,
7350
+ "learning_rate": 6.0137422719485145e-05,
7351
+ "loss": 0.3032,
7352
+ "step": 612000
7353
+ },
7354
+ {
7355
+ "epoch": 1.22,
7356
+ "learning_rate": 6.0026464302562636e-05,
7357
+ "loss": 0.303,
7358
+ "step": 612500
7359
+ },
7360
+ {
7361
+ "epoch": 1.23,
7362
+ "learning_rate": 5.991556049252401e-05,
7363
+ "loss": 0.303,
7364
+ "step": 613000
7365
+ },
7366
+ {
7367
+ "epoch": 1.23,
7368
+ "learning_rate": 5.980471159257609e-05,
7369
+ "loss": 0.3031,
7370
+ "step": 613500
7371
+ },
7372
+ {
7373
+ "epoch": 1.23,
7374
+ "learning_rate": 5.969391790577551e-05,
7375
+ "loss": 0.3026,
7376
+ "step": 614000
7377
+ },
7378
+ {
7379
+ "epoch": 1.23,
7380
+ "learning_rate": 5.958317973502798e-05,
7381
+ "loss": 0.3026,
7382
+ "step": 614500
7383
+ },
7384
+ {
7385
+ "epoch": 1.23,
7386
+ "learning_rate": 5.947249738308747e-05,
7387
+ "loss": 0.3024,
7388
+ "step": 615000
7389
+ },
7390
+ {
7391
+ "epoch": 1.23,
7392
+ "learning_rate": 5.9361871152555254e-05,
7393
+ "loss": 0.3031,
7394
+ "step": 615500
7395
+ },
7396
+ {
7397
+ "epoch": 1.23,
7398
+ "learning_rate": 5.925130134587924e-05,
7399
+ "loss": 0.3021,
7400
+ "step": 616000
7401
+ },
7402
+ {
7403
+ "epoch": 1.23,
7404
+ "learning_rate": 5.914078826535307e-05,
7405
+ "loss": 0.3021,
7406
+ "step": 616500
7407
+ },
7408
+ {
7409
+ "epoch": 1.23,
7410
+ "learning_rate": 5.903033221311528e-05,
7411
+ "loss": 0.3023,
7412
+ "step": 617000
7413
+ },
7414
+ {
7415
+ "epoch": 1.23,
7416
+ "learning_rate": 5.891993349114847e-05,
7417
+ "loss": 0.3025,
7418
+ "step": 617500
7419
+ },
7420
+ {
7421
+ "epoch": 1.24,
7422
+ "learning_rate": 5.880959240127858e-05,
7423
+ "loss": 0.3021,
7424
+ "step": 618000
7425
+ },
7426
+ {
7427
+ "epoch": 1.24,
7428
+ "learning_rate": 5.86993092451739e-05,
7429
+ "loss": 0.3022,
7430
+ "step": 618500
7431
+ },
7432
+ {
7433
+ "epoch": 1.24,
7434
+ "learning_rate": 5.858908432434438e-05,
7435
+ "loss": 0.3021,
7436
+ "step": 619000
7437
+ },
7438
+ {
7439
+ "epoch": 1.24,
7440
+ "learning_rate": 5.847891794014074e-05,
7441
+ "loss": 0.3017,
7442
+ "step": 619500
7443
+ },
7444
+ {
7445
+ "epoch": 1.24,
7446
+ "learning_rate": 5.8368810393753684e-05,
7447
+ "loss": 0.3021,
7448
+ "step": 620000
7449
  }
7450
  ],
7451
  "max_steps": 1000000,
7452
  "num_train_epochs": 2,
7453
+ "total_flos": 4.191638959928546e+22,
7454
  "trial_name": null,
7455
  "trial_params": null
7456
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb7fc5d60355a8dfd752019852cdb15ae9d272ab89ceb9fe9acfccaaa3393cce
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6297a4aa090b90aa3635b3517c4b127894ad2c42e14fd6d228c6743ce17aee7d
3
  size 449450757