jflotz commited on
Commit
4860c21
·
1 Parent(s): a6ed06e

Training in progress, step 700000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9c0e525e514fe1c5cef0557ef6488da0c1fdebf6272aba47004231ca6976c18
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81ece12c6f9c13f5471c677109a4bde83e1a050417df99ad047004ba4276bba6
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e75512fbedd8d7adaa6c4c34c4ac863b89edc805a7194b74d72eea23cbb4e11b
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb425b9bd2d7db89a175b6be9f6f3add2f5419aac09b70a86f8357b95b72148
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57580e912c8eb48eaacc83064464221889da2f0844a835381b4fa326341cb678
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:513fd308756b62d456cf51dbabd0e5432e6e9f801b69e6b4147b7910a92409c3
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62f6f091581690af80f436a9b86b0d75c2dad2d63b594e5185c0e63d2bb68d1e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1b41b44339013cf2c800ebd15e56c0ab490da939473759245613333c4ad094b
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a7b8b83cde1bd3869fe090f1cb9c794e65d5033208eec9d00a90a14b364e27f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8acf2390c55c3fcc1c7f54edfdb7e51d825ccf03ad02c2710478f24d67d8d927
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1804d7c7415f17b94f7dec6a3f79e792faa4e7579f021ed604639d6ae0a4f8f2
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b884538c1c7cf25c9c76b4e2aeb5b233c0e82af8266b74f74badb8738101de61
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5379aefa05a140fc154bf332c86fa067c2c130445bc4590eb14522743a5b9947
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0611954260b29d5933679a0b205628fe7afa2763d89a93117a665d8810ddfaa
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:601105f28ea2021a75065a3124db1406fa8cee4a2f9774cf405bde49613afb37
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a2570d894279bdec746170684ab6ee38cfa6adc0692ab4c8e2d19fca72b235d
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:578996460506c4bb1c40a07e7da011e3e8db58802532f0ccbbdc09eaa32c4f08
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c360a23dde048e054ef7310a763eb2729b26bcfc6d980f3d3e175d2d2287e150
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a80cf61947b33b1a7acf0fb558355efcd3fbb3669a40d006ee3d143181ad13b1
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ded6c10a8ae184d010b20213595dce955d5ae9ca4fec0187e6e124f4763508bf
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccd22c7052fac9ec9f037ee44414440d92721875c54fac648c7b4ca864fbffde
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afc49fe155c033502b3ff00fe8f2d949db5aba4e89748d4722dc58fa6f673d45
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3793434325261176,
5
- "global_step": 690000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8286,11 +8286,131 @@
8286
  "learning_rate": 4.3672129835314955e-05,
8287
  "loss": 0.2971,
8288
  "step": 690000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8289
  }
8290
  ],
8291
  "max_steps": 1000000,
8292
  "num_train_epochs": 2,
8293
- "total_flos": 4.664892321779864e+22,
8294
  "trial_name": null,
8295
  "trial_params": null
8296
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3993339170554817,
5
+ "global_step": 700000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8286
  "learning_rate": 4.3672129835314955e-05,
8287
  "loss": 0.2971,
8288
  "step": 690000
8289
+ },
8290
+ {
8291
+ "epoch": 1.38,
8292
+ "learning_rate": 4.3573243363356916e-05,
8293
+ "loss": 0.297,
8294
+ "step": 690500
8295
+ },
8296
+ {
8297
+ "epoch": 1.38,
8298
+ "learning_rate": 4.347445648076057e-05,
8299
+ "loss": 0.2969,
8300
+ "step": 691000
8301
+ },
8302
+ {
8303
+ "epoch": 1.38,
8304
+ "learning_rate": 4.337576945760554e-05,
8305
+ "loss": 0.2978,
8306
+ "step": 691500
8307
+ },
8308
+ {
8309
+ "epoch": 1.38,
8310
+ "learning_rate": 4.327718256369826e-05,
8311
+ "loss": 0.2965,
8312
+ "step": 692000
8313
+ },
8314
+ {
8315
+ "epoch": 1.38,
8316
+ "learning_rate": 4.317869606857162e-05,
8317
+ "loss": 0.2973,
8318
+ "step": 692500
8319
+ },
8320
+ {
8321
+ "epoch": 1.39,
8322
+ "learning_rate": 4.3080310241483885e-05,
8323
+ "loss": 0.2967,
8324
+ "step": 693000
8325
+ },
8326
+ {
8327
+ "epoch": 1.39,
8328
+ "learning_rate": 4.298202535141818e-05,
8329
+ "loss": 0.2974,
8330
+ "step": 693500
8331
+ },
8332
+ {
8333
+ "epoch": 1.39,
8334
+ "learning_rate": 4.2883841667081675e-05,
8335
+ "loss": 0.2967,
8336
+ "step": 694000
8337
+ },
8338
+ {
8339
+ "epoch": 1.39,
8340
+ "learning_rate": 4.2785759456904745e-05,
8341
+ "loss": 0.2966,
8342
+ "step": 694500
8343
+ },
8344
+ {
8345
+ "epoch": 1.39,
8346
+ "learning_rate": 4.268777898904044e-05,
8347
+ "loss": 0.2969,
8348
+ "step": 695000
8349
+ },
8350
+ {
8351
+ "epoch": 1.39,
8352
+ "learning_rate": 4.2589900531363606e-05,
8353
+ "loss": 0.2967,
8354
+ "step": 695500
8355
+ },
8356
+ {
8357
+ "epoch": 1.39,
8358
+ "learning_rate": 4.2492124351470214e-05,
8359
+ "loss": 0.2962,
8360
+ "step": 696000
8361
+ },
8362
+ {
8363
+ "epoch": 1.39,
8364
+ "learning_rate": 4.239445071667666e-05,
8365
+ "loss": 0.297,
8366
+ "step": 696500
8367
+ },
8368
+ {
8369
+ "epoch": 1.39,
8370
+ "learning_rate": 4.2296879894018835e-05,
8371
+ "loss": 0.2966,
8372
+ "step": 697000
8373
+ },
8374
+ {
8375
+ "epoch": 1.39,
8376
+ "learning_rate": 4.219941215025171e-05,
8377
+ "loss": 0.2971,
8378
+ "step": 697500
8379
+ },
8380
+ {
8381
+ "epoch": 1.4,
8382
+ "learning_rate": 4.210204775184834e-05,
8383
+ "loss": 0.2973,
8384
+ "step": 698000
8385
+ },
8386
+ {
8387
+ "epoch": 1.4,
8388
+ "learning_rate": 4.2004786964999304e-05,
8389
+ "loss": 0.2962,
8390
+ "step": 698500
8391
+ },
8392
+ {
8393
+ "epoch": 1.4,
8394
+ "learning_rate": 4.190763005561186e-05,
8395
+ "loss": 0.2964,
8396
+ "step": 699000
8397
+ },
8398
+ {
8399
+ "epoch": 1.4,
8400
+ "learning_rate": 4.1810577289309266e-05,
8401
+ "loss": 0.2968,
8402
+ "step": 699500
8403
+ },
8404
+ {
8405
+ "epoch": 1.4,
8406
+ "learning_rate": 4.171362893143013e-05,
8407
+ "loss": 0.2965,
8408
+ "step": 700000
8409
  }
8410
  ],
8411
  "max_steps": 1000000,
8412
  "num_train_epochs": 2,
8413
+ "total_flos": 4.732499294618889e+22,
8414
  "trial_name": null,
8415
  "trial_params": null
8416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e75512fbedd8d7adaa6c4c34c4ac863b89edc805a7194b74d72eea23cbb4e11b
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb425b9bd2d7db89a175b6be9f6f3add2f5419aac09b70a86f8357b95b72148
3
  size 449450757