jflotz commited on
Commit
bfc781d
·
1 Parent(s): 463335c

Training in progress, step 610000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3011d318834eca8ab53dbb6676157bffa205e38d5b1da950aa905371e910e8ab
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34fa3fbc162567eded2c54001cc8f21167119645a7ad5d9438d4d6580d7e0aaf
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd1db57f69735f18b491ceac5dcc53e66fc033f8278840d48a2d1d7789a922a3
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb7fc5d60355a8dfd752019852cdb15ae9d272ab89ceb9fe9acfccaaa3393cce
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d5ef830797817d960f06c7d56a345ac3affb87d9629b56b5bc8c9c3338bb01c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd344a2f09d330c943a3dd3d04668fe450de22ee225e64c890f4722c7155791f
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53e8adf2ea40b3fc922a7d11d4e61b79f1bf2e372d1d097107cce4c6dc566b51
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7587b2027544c125ad8875c396124abeaa8b26e560f514a758d79f5c77d1245e
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:198286929a9142846ad67730b33946ccbc3ba475f115c3a0aef90b3b51ce6035
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b98e6e4730ae83aa0bbaf43b6a70f2dbea908df991dba53b4ba6b61ef5a4bc51
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03eebf4a4ea0cd9321666974d21bc38214f697b5e4b4b439941abd6a346b886d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fae6c192050786077eb0c3ad856996ecc90d4abf868828c40917e34e278ddf8
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b03692cd1ad26b8eef58c8c71c41233d4ef014e517e327f8e4798a79a3642d38
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04edc224f8d5fe4b319f5145ba3be953f45d1403c8d037fe0f2d7e1df55860c5
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4840589ea9729ad2c28a92a929b64277e6de332cdd873f9187cab513a624b326
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd673ae46299e38ec0092baeee3dfcef9b18e2fdce8cd906ed6e5311b6eb2f0
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1d4e4a138e230b3265d7fc7f258a4846b3b6af6bce4e94e22abcfe742c12213
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba46334d5cfb098826b8bc46bdfd08caddd05dd0a2ad976845aaf16ed9488107
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7cc50ebf91016fa727fad392dd121c0404733da11c9da06aed7a47a81b65900
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:071dfcafc848e779fb2959117671810686f513947ddff515a5f53d88a149a7ef
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1317449e5c457cd18ac6087cac07774393562aa747fda3bc1ae1eb6a47f1311
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1451fdba239492a726627053577a5d97726556e48bcec30f5b94d6df5305247b
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1994290717618414,
5
- "global_step": 600000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7206,11 +7206,131 @@
7206
  "learning_rate": 6.281601590014407e-05,
7207
  "loss": 0.3039,
7208
  "step": 600000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7209
  }
7210
  ],
7211
  "max_steps": 1000000,
7212
  "num_train_epochs": 2,
7213
- "total_flos": 4.056420774973463e+22,
7214
  "trial_name": null,
7215
  "trial_params": null
7216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2194195562912054,
5
+ "global_step": 610000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7206
  "learning_rate": 6.281601590014407e-05,
7207
  "loss": 0.3039,
7208
  "step": 600000
7209
+ },
7210
+ {
7211
+ "epoch": 1.2,
7212
+ "learning_rate": 6.270383825577923e-05,
7213
+ "loss": 0.3039,
7214
+ "step": 600500
7215
+ },
7216
+ {
7217
+ "epoch": 1.2,
7218
+ "learning_rate": 6.259170789846017e-05,
7219
+ "loss": 0.3034,
7220
+ "step": 601000
7221
+ },
7222
+ {
7223
+ "epoch": 1.2,
7224
+ "learning_rate": 6.247962513474697e-05,
7225
+ "loss": 0.3039,
7226
+ "step": 601500
7227
+ },
7228
+ {
7229
+ "epoch": 1.2,
7230
+ "learning_rate": 6.236759027106965e-05,
7231
+ "loss": 0.3035,
7232
+ "step": 602000
7233
+ },
7234
+ {
7235
+ "epoch": 1.2,
7236
+ "learning_rate": 6.225560361372722e-05,
7237
+ "loss": 0.3034,
7238
+ "step": 602500
7239
+ },
7240
+ {
7241
+ "epoch": 1.21,
7242
+ "learning_rate": 6.214366546888694e-05,
7243
+ "loss": 0.3045,
7244
+ "step": 603000
7245
+ },
7246
+ {
7247
+ "epoch": 1.21,
7248
+ "learning_rate": 6.203177614258345e-05,
7249
+ "loss": 0.3031,
7250
+ "step": 603500
7251
+ },
7252
+ {
7253
+ "epoch": 1.21,
7254
+ "learning_rate": 6.191993594071785e-05,
7255
+ "loss": 0.3039,
7256
+ "step": 604000
7257
+ },
7258
+ {
7259
+ "epoch": 1.21,
7260
+ "learning_rate": 6.180814516905701e-05,
7261
+ "loss": 0.3068,
7262
+ "step": 604500
7263
+ },
7264
+ {
7265
+ "epoch": 1.21,
7266
+ "learning_rate": 6.169640413323262e-05,
7267
+ "loss": 0.3035,
7268
+ "step": 605000
7269
+ },
7270
+ {
7271
+ "epoch": 1.21,
7272
+ "learning_rate": 6.158471313874041e-05,
7273
+ "loss": 0.3027,
7274
+ "step": 605500
7275
+ },
7276
+ {
7277
+ "epoch": 1.21,
7278
+ "learning_rate": 6.147307249093929e-05,
7279
+ "loss": 0.3039,
7280
+ "step": 606000
7281
+ },
7282
+ {
7283
+ "epoch": 1.21,
7284
+ "learning_rate": 6.136148249505053e-05,
7285
+ "loss": 0.3037,
7286
+ "step": 606500
7287
+ },
7288
+ {
7289
+ "epoch": 1.21,
7290
+ "learning_rate": 6.124994345615693e-05,
7291
+ "loss": 0.3034,
7292
+ "step": 607000
7293
+ },
7294
+ {
7295
+ "epoch": 1.21,
7296
+ "learning_rate": 6.113845567920194e-05,
7297
+ "loss": 0.3033,
7298
+ "step": 607500
7299
+ },
7300
+ {
7301
+ "epoch": 1.22,
7302
+ "learning_rate": 6.102701946898891e-05,
7303
+ "loss": 0.3041,
7304
+ "step": 608000
7305
+ },
7306
+ {
7307
+ "epoch": 1.22,
7308
+ "learning_rate": 6.0915635130180154e-05,
7309
+ "loss": 0.3027,
7310
+ "step": 608500
7311
+ },
7312
+ {
7313
+ "epoch": 1.22,
7314
+ "learning_rate": 6.0804302967296225e-05,
7315
+ "loss": 0.3028,
7316
+ "step": 609000
7317
+ },
7318
+ {
7319
+ "epoch": 1.22,
7320
+ "learning_rate": 6.0693023284715e-05,
7321
+ "loss": 0.3031,
7322
+ "step": 609500
7323
+ },
7324
+ {
7325
+ "epoch": 1.22,
7326
+ "learning_rate": 6.058179638667089e-05,
7327
+ "loss": 0.3027,
7328
+ "step": 610000
7329
  }
7330
  ],
7331
  "max_steps": 1000000,
7332
  "num_train_epochs": 2,
7333
+ "total_flos": 4.124028700869167e+22,
7334
  "trial_name": null,
7335
  "trial_params": null
7336
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd1db57f69735f18b491ceac5dcc53e66fc033f8278840d48a2d1d7789a922a3
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb7fc5d60355a8dfd752019852cdb15ae9d272ab89ceb9fe9acfccaaa3393cce
3
  size 449450757