FredericFan commited on
Commit
bde5128
·
verified ·
1 Parent(s): 7889535

Training in progress, step 22000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46faaddaed72c5d090165795ebfd05b22b125c42c26c4875edbc091a5e362e43
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b24490e1afd177c9d7aa64e2bc93a14c723f2b07f34c096656faca30a819b31
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:035583387e98c556efc70cb4c61a29b96c0d5a81f688d104a9d1131166859bd1
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb51a493e883d2bba3783d9a9c610f4b7ed8a2fdbc2ae094434c3938af33f10
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4dd066f1fef3b26a8b4d2763e6247adb5ad5bebee331f997006d84e56e797cb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75610a62f0c2e3c2144553b21cd56625818792160128bd8489e6566f2e9cc991
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ed9e6e0ddb2712997439a77e00b8aab494233fbe2995e22f0fcce2c0862afad
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16420615b82a27af4c6aa0fff49c44fb5eed4e6ef5c3ebd2f44a0387c672c1ca
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.0817028358578682,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-21000",
4
- "epoch": 1.72,
5
  "eval_steps": 500,
6
- "global_step": 21500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3361,6 +3361,84 @@
3361
  "eval_samples_per_second": 22.713,
3362
  "eval_steps_per_second": 5.678,
3363
  "step": 21500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3364
  }
3365
  ],
3366
  "logging_steps": 50,
@@ -3380,7 +3458,7 @@
3380
  "attributes": {}
3381
  }
3382
  },
3383
- "total_flos": 5.237037858816e+16,
3384
  "train_batch_size": 4,
3385
  "trial_name": null,
3386
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08166228979825974,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-22000",
4
+ "epoch": 1.76,
5
  "eval_steps": 500,
6
+ "global_step": 22000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3361
  "eval_samples_per_second": 22.713,
3362
  "eval_steps_per_second": 5.678,
3363
  "step": 21500
3364
+ },
3365
+ {
3366
+ "epoch": 1.724,
3367
+ "grad_norm": 0.14097870886325836,
3368
+ "learning_rate": 4.1472e-06,
3369
+ "loss": 0.0488,
3370
+ "step": 21550
3371
+ },
3372
+ {
3373
+ "epoch": 1.728,
3374
+ "grad_norm": 0.1706978976726532,
3375
+ "learning_rate": 4.0872000000000004e-06,
3376
+ "loss": 0.0496,
3377
+ "step": 21600
3378
+ },
3379
+ {
3380
+ "epoch": 1.732,
3381
+ "grad_norm": 0.14371682703495026,
3382
+ "learning_rate": 4.0272e-06,
3383
+ "loss": 0.0557,
3384
+ "step": 21650
3385
+ },
3386
+ {
3387
+ "epoch": 1.736,
3388
+ "grad_norm": 0.1176629364490509,
3389
+ "learning_rate": 3.9672e-06,
3390
+ "loss": 0.0531,
3391
+ "step": 21700
3392
+ },
3393
+ {
3394
+ "epoch": 1.74,
3395
+ "grad_norm": 0.17272049188613892,
3396
+ "learning_rate": 3.9072e-06,
3397
+ "loss": 0.0604,
3398
+ "step": 21750
3399
+ },
3400
+ {
3401
+ "epoch": 1.744,
3402
+ "grad_norm": 0.15085135400295258,
3403
+ "learning_rate": 3.8472e-06,
3404
+ "loss": 0.0537,
3405
+ "step": 21800
3406
+ },
3407
+ {
3408
+ "epoch": 1.748,
3409
+ "grad_norm": 0.11613863706588745,
3410
+ "learning_rate": 3.7884e-06,
3411
+ "loss": 0.0544,
3412
+ "step": 21850
3413
+ },
3414
+ {
3415
+ "epoch": 1.752,
3416
+ "grad_norm": 0.13247713446617126,
3417
+ "learning_rate": 3.7284e-06,
3418
+ "loss": 0.0556,
3419
+ "step": 21900
3420
+ },
3421
+ {
3422
+ "epoch": 1.756,
3423
+ "grad_norm": 0.1755180060863495,
3424
+ "learning_rate": 3.6684e-06,
3425
+ "loss": 0.0563,
3426
+ "step": 21950
3427
+ },
3428
+ {
3429
+ "epoch": 1.76,
3430
+ "grad_norm": 0.07918363809585571,
3431
+ "learning_rate": 3.6084e-06,
3432
+ "loss": 0.0508,
3433
+ "step": 22000
3434
+ },
3435
+ {
3436
+ "epoch": 1.76,
3437
+ "eval_loss": 0.08166228979825974,
3438
+ "eval_runtime": 88.0895,
3439
+ "eval_samples_per_second": 22.704,
3440
+ "eval_steps_per_second": 5.676,
3441
+ "step": 22000
3442
  }
3443
  ],
3444
  "logging_steps": 50,
 
3458
  "attributes": {}
3459
  }
3460
  },
3461
+ "total_flos": 5.358829436928e+16,
3462
  "train_batch_size": 4,
3463
  "trial_name": null,
3464
  "trial_params": null