FredericFan commited on
Commit
a753574
·
verified ·
1 Parent(s): a0b3da2

Training in progress, step 23000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:596c1e3afdd59a5f888e2e097f32bc12312812a5874bc9ba56a6a84a477bb16a
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40edcfcd3eadd3934df8237241fafe734256be703a0e3946a4e6ee14e6bf5a22
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5db73359845187641317b27e1b4060f1552fef0e348ea1b12216d2d2fe327502
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d296490c405857ea96e4fbe1deae357928a09b842487dd9e87ba7f64056b04e
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59a02c4a661b5f9af67ada735fa8a7871859bbc9bec13b24943a443d89f55f00
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5df9c2ed680cbe1eadef613230eab99b9589effb616e86f824a6c4d292f402
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5024d85f7d5741e2ee48baa326700e94d49b2910a8865857cf6ad58106fa2d05
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dcf2c6dc0ad63a4d7cbede289213f75c3cbabb91c5c15238e6438903f0efdfd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0815029963850975,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-22500",
4
- "epoch": 1.8,
5
  "eval_steps": 500,
6
- "global_step": 22500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3517,6 +3517,84 @@
3517
  "eval_samples_per_second": 22.709,
3518
  "eval_steps_per_second": 5.677,
3519
  "step": 22500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3520
  }
3521
  ],
3522
  "logging_steps": 50,
@@ -3536,7 +3614,7 @@
3536
  "attributes": {}
3537
  }
3538
  },
3539
- "total_flos": 5.48062101504e+16,
3540
  "train_batch_size": 4,
3541
  "trial_name": null,
3542
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0815029963850975,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-22500",
4
+ "epoch": 1.8399999999999999,
5
  "eval_steps": 500,
6
+ "global_step": 23000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3517
  "eval_samples_per_second": 22.709,
3518
  "eval_steps_per_second": 5.677,
3519
  "step": 22500
3520
+ },
3521
+ {
3522
+ "epoch": 1.804,
3523
+ "grad_norm": 0.10228476673364639,
3524
+ "learning_rate": 2.9496e-06,
3525
+ "loss": 0.0534,
3526
+ "step": 22550
3527
+ },
3528
+ {
3529
+ "epoch": 1.808,
3530
+ "grad_norm": 0.12501686811447144,
3531
+ "learning_rate": 2.8896000000000003e-06,
3532
+ "loss": 0.0529,
3533
+ "step": 22600
3534
+ },
3535
+ {
3536
+ "epoch": 1.812,
3537
+ "grad_norm": 0.10296665877103806,
3538
+ "learning_rate": 2.8296e-06,
3539
+ "loss": 0.0575,
3540
+ "step": 22650
3541
+ },
3542
+ {
3543
+ "epoch": 1.8159999999999998,
3544
+ "grad_norm": 0.15849712491035461,
3545
+ "learning_rate": 2.7696e-06,
3546
+ "loss": 0.054,
3547
+ "step": 22700
3548
+ },
3549
+ {
3550
+ "epoch": 1.8199999999999998,
3551
+ "grad_norm": 0.0910249873995781,
3552
+ "learning_rate": 2.7096e-06,
3553
+ "loss": 0.0533,
3554
+ "step": 22750
3555
+ },
3556
+ {
3557
+ "epoch": 1.8239999999999998,
3558
+ "grad_norm": 0.14466793835163116,
3559
+ "learning_rate": 2.6496e-06,
3560
+ "loss": 0.0512,
3561
+ "step": 22800
3562
+ },
3563
+ {
3564
+ "epoch": 1.8279999999999998,
3565
+ "grad_norm": 0.21741582453250885,
3566
+ "learning_rate": 2.5895999999999997e-06,
3567
+ "loss": 0.0574,
3568
+ "step": 22850
3569
+ },
3570
+ {
3571
+ "epoch": 1.8319999999999999,
3572
+ "grad_norm": 0.14835171401500702,
3573
+ "learning_rate": 2.5296000000000003e-06,
3574
+ "loss": 0.0569,
3575
+ "step": 22900
3576
+ },
3577
+ {
3578
+ "epoch": 1.8359999999999999,
3579
+ "grad_norm": 0.13074947893619537,
3580
+ "learning_rate": 2.4696e-06,
3581
+ "loss": 0.0555,
3582
+ "step": 22950
3583
+ },
3584
+ {
3585
+ "epoch": 1.8399999999999999,
3586
+ "grad_norm": 0.14285966753959656,
3587
+ "learning_rate": 2.4096e-06,
3588
+ "loss": 0.0531,
3589
+ "step": 23000
3590
+ },
3591
+ {
3592
+ "epoch": 1.8399999999999999,
3593
+ "eval_loss": 0.08154193311929703,
3594
+ "eval_runtime": 88.0742,
3595
+ "eval_samples_per_second": 22.708,
3596
+ "eval_steps_per_second": 5.677,
3597
+ "step": 23000
3598
  }
3599
  ],
3600
  "logging_steps": 50,
 
3614
  "attributes": {}
3615
  }
3616
  },
3617
+ "total_flos": 5.602412593152e+16,
3618
  "train_batch_size": 4,
3619
  "trial_name": null,
3620
  "trial_params": null