FormlessAI commited on
Commit
58ae454
·
verified ·
1 Parent(s): c4c373e

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dcaeecc4de943ff3b1b25e2080130d8e204326396f2c8274b395a49f7f5b0bc
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4350d345d97b56ca7b59dbc011925ec1904d8a564b4e38969c9579b074ff804
3
  size 98088784
last-checkpoint/global_step2150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79a44a13ae97270c3c416f31cb0b1f80f12d2017b4f0d8cf1c6268d7d80a8a9e
3
+ size 73939813
last-checkpoint/global_step2150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ad29f1a01bca6c52caaba5da3ca1b1b7fed1b150ff1a8a793f96d09e741d822
3
+ size 73939813
last-checkpoint/global_step2150/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c041692ece9873bd47a7b865469cc75af6eb0d6bfb4e97d45a24abb258e8a50c
3
+ size 73939877
last-checkpoint/global_step2150/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b262ba0035994932655c5a03030ca63c65a5e37551cbdff4a83db6e4eba9ca2
3
+ size 73939877
last-checkpoint/global_step2150/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ef2a5d14ad0f0ccc005eeb930f4717339e689bc4df3778a18b1154417ee6b78
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2100
 
1
+ global_step2150
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2637efe7ae6c7cccd9f62f625e99a8a78ee05c5b5f326094b177b4c7a2f01e1f
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:065a50993eba0212276345f76d8268040ff81f0b0379c2af74bb3ae5b3dc8bf1
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e1ea8442cc1dd935a65b88ffb95d7e206186e1fb9fc023e698ab89785daa803
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42410dea9cc1279875fd58cdbd387af92a3e6d12f7e52ff497b959ac4ac2873f
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68f88b8b4d04735db53bbe5ad9f56393c2fabdec47f79ec07c155c23fa5a121d
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84327cb94481f4aa330fcca0f3bb1121ad581d5beb94e2475a38bda5aca36760
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5775d8fe4f00e8335f10c2f7d4c07d75ee4c0de4fb5b46deba1255765aab4709
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15a861189dd19e0aceec678d8a75cad82c965ce69f5363194a8cdd37ec6a9f5d
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:729250c178656dffbc14d6226345620f855971d47f1db880250e47e4b7b142b0
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e94beadc98ff451c6f6d2bc6f85a9bd66bfdf5599b86c4c582259ad51b083b3
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6464942693710327,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.0587875076546234,
6
  "eval_steps": 50,
7
- "global_step": 2100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3284,6 +3284,84 @@
3284
  "eval_samples_per_second": 126.157,
3285
  "eval_steps_per_second": 15.778,
3286
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3287
  }
3288
  ],
3289
  "logging_steps": 5,
@@ -3312,7 +3390,7 @@
3312
  "attributes": {}
3313
  }
3314
  },
3315
- "total_flos": 1.0824270063368929e+18,
3316
  "train_batch_size": 2,
3317
  "trial_name": null,
3318
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6448646187782288,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.1077770973668097,
6
  "eval_steps": 50,
7
+ "global_step": 2150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3284
  "eval_samples_per_second": 126.157,
3285
  "eval_steps_per_second": 15.778,
3286
  "step": 2100
3287
+ },
3288
+ {
3289
+ "epoch": 2.063686466625842,
3290
+ "grad_norm": 0.23175491392612457,
3291
+ "learning_rate": 7.102482680582014e-05,
3292
+ "loss": 0.6646,
3293
+ "step": 2105
3294
+ },
3295
+ {
3296
+ "epoch": 2.068585425597061,
3297
+ "grad_norm": 0.23201850056648254,
3298
+ "learning_rate": 7.085436623686342e-05,
3299
+ "loss": 0.663,
3300
+ "step": 2110
3301
+ },
3302
+ {
3303
+ "epoch": 2.073484384568279,
3304
+ "grad_norm": 0.23181083798408508,
3305
+ "learning_rate": 7.068374683639328e-05,
3306
+ "loss": 0.6732,
3307
+ "step": 2115
3308
+ },
3309
+ {
3310
+ "epoch": 2.078383343539498,
3311
+ "grad_norm": 0.21368514001369476,
3312
+ "learning_rate": 7.051297035778806e-05,
3313
+ "loss": 0.6602,
3314
+ "step": 2120
3315
+ },
3316
+ {
3317
+ "epoch": 2.0832823025107166,
3318
+ "grad_norm": 0.25182291865348816,
3319
+ "learning_rate": 7.034203855604029e-05,
3320
+ "loss": 0.6544,
3321
+ "step": 2125
3322
+ },
3323
+ {
3324
+ "epoch": 2.0881812614819353,
3325
+ "grad_norm": 0.2292199581861496,
3326
+ "learning_rate": 7.017095318773873e-05,
3327
+ "loss": 0.6522,
3328
+ "step": 2130
3329
+ },
3330
+ {
3331
+ "epoch": 2.0930802204531536,
3332
+ "grad_norm": 0.20745912194252014,
3333
+ "learning_rate": 6.999971601105022e-05,
3334
+ "loss": 0.6527,
3335
+ "step": 2135
3336
+ },
3337
+ {
3338
+ "epoch": 2.0979791794243723,
3339
+ "grad_norm": 0.22758464515209198,
3340
+ "learning_rate": 6.982832878570172e-05,
3341
+ "loss": 0.6578,
3342
+ "step": 2140
3343
+ },
3344
+ {
3345
+ "epoch": 2.102878138395591,
3346
+ "grad_norm": 0.20944316685199738,
3347
+ "learning_rate": 6.965679327296211e-05,
3348
+ "loss": 0.6584,
3349
+ "step": 2145
3350
+ },
3351
+ {
3352
+ "epoch": 2.1077770973668097,
3353
+ "grad_norm": 0.21847444772720337,
3354
+ "learning_rate": 6.94851112356242e-05,
3355
+ "loss": 0.6733,
3356
+ "step": 2150
3357
+ },
3358
+ {
3359
+ "epoch": 2.1077770973668097,
3360
+ "eval_loss": 0.6448646187782288,
3361
+ "eval_runtime": 15.5401,
3362
+ "eval_samples_per_second": 126.061,
3363
+ "eval_steps_per_second": 15.766,
3364
+ "step": 2150
3365
  }
3366
  ],
3367
  "logging_steps": 5,
 
3390
  "attributes": {}
3391
  }
3392
  },
3393
+ "total_flos": 1.1081578526120346e+18,
3394
  "train_batch_size": 2,
3395
  "trial_name": null,
3396
  "trial_params": null