jflotz commited on
Commit
bdb2235
·
1 Parent(s): 4860c21

Training in progress, step 710000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81ece12c6f9c13f5471c677109a4bde83e1a050417df99ad047004ba4276bba6
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:640777d4df17094532677050d169633ab25c6e5307fe7b26b1ef2480f2fa436c
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfb425b9bd2d7db89a175b6be9f6f3add2f5419aac09b70a86f8357b95b72148
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589beff1989fc9c0ae009d9c4e89cc81dea78aca3df050a942a083e247b0b3bb
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:513fd308756b62d456cf51dbabd0e5432e6e9f801b69e6b4147b7910a92409c3
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d73966766ab27f8d8b82049597603e5e27df9de3bfb4bc68f052966836974a7
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1b41b44339013cf2c800ebd15e56c0ab490da939473759245613333c4ad094b
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d01455789648693f3c874de11c7ed6063a3725328c7accf41053ba2de2ef602
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8acf2390c55c3fcc1c7f54edfdb7e51d825ccf03ad02c2710478f24d67d8d927
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dea50129c89a3bcb4d508277085bd760cf99e8e7596db26b1b0416ea6e662c44
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b884538c1c7cf25c9c76b4e2aeb5b233c0e82af8266b74f74badb8738101de61
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ef7983f20d7466e22a66c14b4eb278bac441cabcddf12c2ab45c6a7ad45ccb
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0611954260b29d5933679a0b205628fe7afa2763d89a93117a665d8810ddfaa
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7030fabf7b722e2752b51d9deb3cc7dac4745661c75fe5c48a022d295a9ffb0d
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a2570d894279bdec746170684ab6ee38cfa6adc0692ab4c8e2d19fca72b235d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db8fd80ed1622a37fb8836b353462fe160453f93a1dbe072cd8bd1e37f25f658
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c360a23dde048e054ef7310a763eb2729b26bcfc6d980f3d3e175d2d2287e150
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9333397ba4ec25f889f96bb6104d7c6cf0bc5fd5c8036f75df959bdd43d52a66
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ded6c10a8ae184d010b20213595dce955d5ae9ca4fec0187e6e124f4763508bf
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58730a986e4ecbe1e49387372f3829be2d2d88e8ab521897bf3b764a726b44be
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afc49fe155c033502b3ff00fe8f2d949db5aba4e89748d4722dc58fa6f673d45
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:335855e99fe5c1eafe16f664b8e35342405853ed0ef1faa2892c2126df9feea5
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3993339170554817,
5
- "global_step": 700000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8406,11 +8406,131 @@
8406
  "learning_rate": 4.171362893143013e-05,
8407
  "loss": 0.2965,
8408
  "step": 700000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8409
  }
8410
  ],
8411
  "max_steps": 1000000,
8412
  "num_train_epochs": 2,
8413
- "total_flos": 4.732499294618889e+22,
8414
  "trial_name": null,
8415
  "trial_params": null
8416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4193244015848456,
5
+ "global_step": 710000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8406
  "learning_rate": 4.171362893143013e-05,
8407
  "loss": 0.2965,
8408
  "step": 700000
8409
+ },
8410
+ {
8411
+ "epoch": 1.4,
8412
+ "learning_rate": 4.1616785247027506e-05,
8413
+ "loss": 0.2963,
8414
+ "step": 700500
8415
+ },
8416
+ {
8417
+ "epoch": 1.4,
8418
+ "learning_rate": 4.1520046500868384e-05,
8419
+ "loss": 0.2966,
8420
+ "step": 701000
8421
+ },
8422
+ {
8423
+ "epoch": 1.4,
8424
+ "learning_rate": 4.1423412957432775e-05,
8425
+ "loss": 0.2963,
8426
+ "step": 701500
8427
+ },
8428
+ {
8429
+ "epoch": 1.4,
8430
+ "learning_rate": 4.1326884880913074e-05,
8431
+ "loss": 0.2966,
8432
+ "step": 702000
8433
+ },
8434
+ {
8435
+ "epoch": 1.4,
8436
+ "learning_rate": 4.123046253521341e-05,
8437
+ "loss": 0.2962,
8438
+ "step": 702500
8439
+ },
8440
+ {
8441
+ "epoch": 1.41,
8442
+ "learning_rate": 4.1134146183948724e-05,
8443
+ "loss": 0.2961,
8444
+ "step": 703000
8445
+ },
8446
+ {
8447
+ "epoch": 1.41,
8448
+ "learning_rate": 4.1037936090444315e-05,
8449
+ "loss": 0.296,
8450
+ "step": 703500
8451
+ },
8452
+ {
8453
+ "epoch": 1.41,
8454
+ "learning_rate": 4.0941832517734885e-05,
8455
+ "loss": 0.2959,
8456
+ "step": 704000
8457
+ },
8458
+ {
8459
+ "epoch": 1.41,
8460
+ "learning_rate": 4.084583572856388e-05,
8461
+ "loss": 0.2962,
8462
+ "step": 704500
8463
+ },
8464
+ {
8465
+ "epoch": 1.41,
8466
+ "learning_rate": 4.0749945985382915e-05,
8467
+ "loss": 0.2961,
8468
+ "step": 705000
8469
+ },
8470
+ {
8471
+ "epoch": 1.41,
8472
+ "learning_rate": 4.065416355035087e-05,
8473
+ "loss": 0.296,
8474
+ "step": 705500
8475
+ },
8476
+ {
8477
+ "epoch": 1.41,
8478
+ "learning_rate": 4.0558488685333235e-05,
8479
+ "loss": 0.2958,
8480
+ "step": 706000
8481
+ },
8482
+ {
8483
+ "epoch": 1.41,
8484
+ "learning_rate": 4.04629216519015e-05,
8485
+ "loss": 0.2961,
8486
+ "step": 706500
8487
+ },
8488
+ {
8489
+ "epoch": 1.41,
8490
+ "learning_rate": 4.036746271133223e-05,
8491
+ "loss": 0.2962,
8492
+ "step": 707000
8493
+ },
8494
+ {
8495
+ "epoch": 1.41,
8496
+ "learning_rate": 4.0272112124606546e-05,
8497
+ "loss": 0.2962,
8498
+ "step": 707500
8499
+ },
8500
+ {
8501
+ "epoch": 1.42,
8502
+ "learning_rate": 4.0176870152409324e-05,
8503
+ "loss": 0.296,
8504
+ "step": 708000
8505
+ },
8506
+ {
8507
+ "epoch": 1.42,
8508
+ "learning_rate": 4.008173705512842e-05,
8509
+ "loss": 0.295,
8510
+ "step": 708500
8511
+ },
8512
+ {
8513
+ "epoch": 1.42,
8514
+ "learning_rate": 3.998671309285417e-05,
8515
+ "loss": 0.2958,
8516
+ "step": 709000
8517
+ },
8518
+ {
8519
+ "epoch": 1.42,
8520
+ "learning_rate": 3.989179852537839e-05,
8521
+ "loss": 0.2964,
8522
+ "step": 709500
8523
+ },
8524
+ {
8525
+ "epoch": 1.42,
8526
+ "learning_rate": 3.979699361219395e-05,
8527
+ "loss": 0.2956,
8528
+ "step": 710000
8529
  }
8530
  ],
8531
  "max_steps": 1000000,
8532
  "num_train_epochs": 2,
8533
+ "total_flos": 4.800106373011031e+22,
8534
  "trial_name": null,
8535
  "trial_params": null
8536
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfb425b9bd2d7db89a175b6be9f6f3add2f5419aac09b70a86f8357b95b72148
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589beff1989fc9c0ae009d9c4e89cc81dea78aca3df050a942a083e247b0b3bb
3
  size 449450757