jflotz commited on
Commit
a6ed06e
·
1 Parent(s): 32bb652

Training in progress, step 690000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55e5d00d2cd7677d6c62be4befe12ceb99e54aa60af5a94e2947d4ae516faaf9
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9c0e525e514fe1c5cef0557ef6488da0c1fdebf6272aba47004231ca6976c18
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdc51c6b56a4707511277be7ae25e9ceecb4c1ce951b8055606cd763924bc386
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e75512fbedd8d7adaa6c4c34c4ac863b89edc805a7194b74d72eea23cbb4e11b
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5cdde6fe1592597eb282f0c1adb0c3419a3256dc73cdd2ad8141425d3367aa1
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57580e912c8eb48eaacc83064464221889da2f0844a835381b4fa326341cb678
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3c1fcd1d388653574e603211bfc56c68bd7e902268c20d36edbd0c9c2c455ef
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62f6f091581690af80f436a9b86b0d75c2dad2d63b594e5185c0e63d2bb68d1e
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c40db2c616071d5de318f5594c12558c6be71cc3185a20daba66531013c4562
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a7b8b83cde1bd3869fe090f1cb9c794e65d5033208eec9d00a90a14b364e27f
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abefb15d42bbdbc9d96701958a628e317cbc9e1c294d8e8a76d57bee1d799f4e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1804d7c7415f17b94f7dec6a3f79e792faa4e7579f021ed604639d6ae0a4f8f2
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5923e1314a170c5cdfcf9c517450e0d99f3907dcef2765f816ff70cb6461364
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5379aefa05a140fc154bf332c86fa067c2c130445bc4590eb14522743a5b9947
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9887b880e911257bf7ed749ed55b173537ca64c83004df1495db6bddcf868768
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:601105f28ea2021a75065a3124db1406fa8cee4a2f9774cf405bde49613afb37
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f6acc5ebd016f0085aa2c2381f509e3d8d43d4652dca878fe5c1e4ef96b864c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:578996460506c4bb1c40a07e7da011e3e8db58802532f0ccbbdc09eaa32c4f08
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5eeebb55361b678fa47ae3e37f81a72056ae8266b737b634b72a45e273417dd
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a80cf61947b33b1a7acf0fb558355efcd3fbb3669a40d006ee3d143181ad13b1
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:390e03fe3cc05eeca400d470fa9829ae91cb8c4659d5322fb96cb732dc98e5e1
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd22c7052fac9ec9f037ee44414440d92721875c54fac648c7b4ca864fbffde
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3593529479967534,
5
- "global_step": 680000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8166,11 +8166,131 @@
8166
  "learning_rate": 4.567035473186444e-05,
8167
  "loss": 0.2978,
8168
  "step": 680000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8169
  }
8170
  ],
8171
  "max_steps": 1000000,
8172
  "num_train_epochs": 2,
8173
- "total_flos": 4.597284714742531e+22,
8174
  "trial_name": null,
8175
  "trial_params": null
8176
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3793434325261176,
5
+ "global_step": 690000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8166
  "learning_rate": 4.567035473186444e-05,
8167
  "loss": 0.2978,
8168
  "step": 680000
8169
+ },
8170
+ {
8171
+ "epoch": 1.36,
8172
+ "learning_rate": 4.556953360325625e-05,
8173
+ "loss": 0.2978,
8174
+ "step": 680500
8175
+ },
8176
+ {
8177
+ "epoch": 1.36,
8178
+ "learning_rate": 4.546880660622845e-05,
8179
+ "loss": 0.2975,
8180
+ "step": 681000
8181
+ },
8182
+ {
8183
+ "epoch": 1.36,
8184
+ "learning_rate": 4.5368174016164844e-05,
8185
+ "loss": 0.2976,
8186
+ "step": 681500
8187
+ },
8188
+ {
8189
+ "epoch": 1.36,
8190
+ "learning_rate": 4.5267636108191036e-05,
8191
+ "loss": 0.2983,
8192
+ "step": 682000
8193
+ },
8194
+ {
8195
+ "epoch": 1.36,
8196
+ "learning_rate": 4.5167193157173913e-05,
8197
+ "loss": 0.2978,
8198
+ "step": 682500
8199
+ },
8200
+ {
8201
+ "epoch": 1.37,
8202
+ "learning_rate": 4.5066845437720555e-05,
8203
+ "loss": 0.2977,
8204
+ "step": 683000
8205
+ },
8206
+ {
8207
+ "epoch": 1.37,
8208
+ "learning_rate": 4.4966593224177866e-05,
8209
+ "loss": 0.2976,
8210
+ "step": 683500
8211
+ },
8212
+ {
8213
+ "epoch": 1.37,
8214
+ "learning_rate": 4.4866436790631564e-05,
8215
+ "loss": 0.2971,
8216
+ "step": 684000
8217
+ },
8218
+ {
8219
+ "epoch": 1.37,
8220
+ "learning_rate": 4.476637641090551e-05,
8221
+ "loss": 0.2975,
8222
+ "step": 684500
8223
+ },
8224
+ {
8225
+ "epoch": 1.37,
8226
+ "learning_rate": 4.4666412358560955e-05,
8227
+ "loss": 0.2978,
8228
+ "step": 685000
8229
+ },
8230
+ {
8231
+ "epoch": 1.37,
8232
+ "learning_rate": 4.456654490689578e-05,
8233
+ "loss": 0.2967,
8234
+ "step": 685500
8235
+ },
8236
+ {
8237
+ "epoch": 1.37,
8238
+ "learning_rate": 4.4466774328943796e-05,
8239
+ "loss": 0.2979,
8240
+ "step": 686000
8241
+ },
8242
+ {
8243
+ "epoch": 1.37,
8244
+ "learning_rate": 4.4367100897474e-05,
8245
+ "loss": 0.2975,
8246
+ "step": 686500
8247
+ },
8248
+ {
8249
+ "epoch": 1.37,
8250
+ "learning_rate": 4.426752488498972e-05,
8251
+ "loss": 0.2972,
8252
+ "step": 687000
8253
+ },
8254
+ {
8255
+ "epoch": 1.37,
8256
+ "learning_rate": 4.4168046563727945e-05,
8257
+ "loss": 0.2972,
8258
+ "step": 687500
8259
+ },
8260
+ {
8261
+ "epoch": 1.38,
8262
+ "learning_rate": 4.406866620565862e-05,
8263
+ "loss": 0.2968,
8264
+ "step": 688000
8265
+ },
8266
+ {
8267
+ "epoch": 1.38,
8268
+ "learning_rate": 4.396938408248383e-05,
8269
+ "loss": 0.2973,
8270
+ "step": 688500
8271
+ },
8272
+ {
8273
+ "epoch": 1.38,
8274
+ "learning_rate": 4.3870200465637164e-05,
8275
+ "loss": 0.2972,
8276
+ "step": 689000
8277
+ },
8278
+ {
8279
+ "epoch": 1.38,
8280
+ "learning_rate": 4.377111562628282e-05,
8281
+ "loss": 0.2965,
8282
+ "step": 689500
8283
+ },
8284
+ {
8285
+ "epoch": 1.38,
8286
+ "learning_rate": 4.3672129835314955e-05,
8287
+ "loss": 0.2971,
8288
+ "step": 690000
8289
  }
8290
  ],
8291
  "max_steps": 1000000,
8292
  "num_train_epochs": 2,
8293
+ "total_flos": 4.664892321779864e+22,
8294
  "trial_name": null,
8295
  "trial_params": null
8296
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdc51c6b56a4707511277be7ae25e9ceecb4c1ce951b8055606cd763924bc386
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e75512fbedd8d7adaa6c4c34c4ac863b89edc805a7194b74d72eea23cbb4e11b
3
  size 449450757