irishprancer commited on
Commit
5c5c5a6
·
verified ·
1 Parent(s): 09b0699

Training in progress, step 4800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c2fa6d7c57abe80b81e686c5bb261331348a26463db489c0057496c21099267
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73320e76344a133855f71d58f5599b5bab5bec21149e32ab22ea8639c81b6efa
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:798f7293795a2261c83bf45e42597af80061bea9a6acf124906fd2023d47a1d7
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df0f0d4523a1748c8d67b29c00bcdc95ba64d1d0d15e03f3aaf492af944d8a42
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b03ef6d89f6c10452a8bc84393b1dc225e370d174364da48043ff472b287411
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:790c395b0a5037d7be1e3d6aec472475bfb03f273b97c2103b3eaeeee641cbe6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f96e2618e1c452343a3740c32bea2247d1da5a3f0e229791fe5ed8f1e4e8eb3a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0301af88bd95ce6b3924705dcc39f92acccc19dd6a0525d5021e46ffe9ebde47
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 202.17391304347825,
5
  "eval_steps": 150,
6
- "global_step": 4650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4502,6 +4502,151 @@
4502
  "EMA_steps_per_second": 25.688,
4503
  "epoch": 202.17391304347825,
4504
  "step": 4650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4505
  }
4506
  ],
4507
  "logging_steps": 10,
@@ -4521,7 +4666,7 @@
4521
  "attributes": {}
4522
  }
4523
  },
4524
- "total_flos": 1.1967994202384794e+17,
4525
  "train_batch_size": 4,
4526
  "trial_name": null,
4527
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 208.69565217391303,
5
  "eval_steps": 150,
6
+ "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4502
  "EMA_steps_per_second": 25.688,
4503
  "epoch": 202.17391304347825,
4504
  "step": 4650
4505
+ },
4506
+ {
4507
+ "epoch": 202.6086956521739,
4508
+ "grad_norm": 2.3335537910461426,
4509
+ "learning_rate": 1.5297783975071799e-06,
4510
+ "loss": 0.2124,
4511
+ "step": 4660
4512
+ },
4513
+ {
4514
+ "epoch": 203.04347826086956,
4515
+ "grad_norm": 2.041670560836792,
4516
+ "learning_rate": 1.529767331075906e-06,
4517
+ "loss": 0.282,
4518
+ "step": 4670
4519
+ },
4520
+ {
4521
+ "epoch": 203.47826086956522,
4522
+ "grad_norm": 2.462006092071533,
4523
+ "learning_rate": 1.529755961510317e-06,
4524
+ "loss": 0.2352,
4525
+ "step": 4680
4526
+ },
4527
+ {
4528
+ "epoch": 203.91304347826087,
4529
+ "grad_norm": 1.7050849199295044,
4530
+ "learning_rate": 1.5297442888149193e-06,
4531
+ "loss": 0.2125,
4532
+ "step": 4690
4533
+ },
4534
+ {
4535
+ "epoch": 204.34782608695653,
4536
+ "grad_norm": 2.558677911758423,
4537
+ "learning_rate": 1.5297323129943396e-06,
4538
+ "loss": 0.2971,
4539
+ "step": 4700
4540
+ },
4541
+ {
4542
+ "epoch": 204.7826086956522,
4543
+ "grad_norm": 1.8304595947265625,
4544
+ "learning_rate": 1.5297200340533247e-06,
4545
+ "loss": 0.1943,
4546
+ "step": 4710
4547
+ },
4548
+ {
4549
+ "epoch": 205.2173913043478,
4550
+ "grad_norm": 1.568945050239563,
4551
+ "learning_rate": 1.5297074519967415e-06,
4552
+ "loss": 0.1988,
4553
+ "step": 4720
4554
+ },
4555
+ {
4556
+ "epoch": 205.65217391304347,
4557
+ "grad_norm": 2.6844093799591064,
4558
+ "learning_rate": 1.5296945668295776e-06,
4559
+ "loss": 0.2073,
4560
+ "step": 4730
4561
+ },
4562
+ {
4563
+ "epoch": 206.08695652173913,
4564
+ "grad_norm": 2.8607003688812256,
4565
+ "learning_rate": 1.5296813785569398e-06,
4566
+ "loss": 0.2542,
4567
+ "step": 4740
4568
+ },
4569
+ {
4570
+ "epoch": 206.52173913043478,
4571
+ "grad_norm": 2.7412221431732178,
4572
+ "learning_rate": 1.5296678871840554e-06,
4573
+ "loss": 0.2103,
4574
+ "step": 4750
4575
+ },
4576
+ {
4577
+ "epoch": 206.95652173913044,
4578
+ "grad_norm": 2.278228759765625,
4579
+ "learning_rate": 1.5296540927162723e-06,
4580
+ "loss": 0.2414,
4581
+ "step": 4760
4582
+ },
4583
+ {
4584
+ "epoch": 207.3913043478261,
4585
+ "grad_norm": 2.021712064743042,
4586
+ "learning_rate": 1.5296399951590582e-06,
4587
+ "loss": 0.2272,
4588
+ "step": 4770
4589
+ },
4590
+ {
4591
+ "epoch": 207.82608695652175,
4592
+ "grad_norm": 2.287015438079834,
4593
+ "learning_rate": 1.5296255945180007e-06,
4594
+ "loss": 0.2046,
4595
+ "step": 4780
4596
+ },
4597
+ {
4598
+ "epoch": 208.2608695652174,
4599
+ "grad_norm": 2.0657951831817627,
4600
+ "learning_rate": 1.5296108907988078e-06,
4601
+ "loss": 0.2052,
4602
+ "step": 4790
4603
+ },
4604
+ {
4605
+ "epoch": 208.69565217391303,
4606
+ "grad_norm": 2.087261915206909,
4607
+ "learning_rate": 1.529595884007308e-06,
4608
+ "loss": 0.2538,
4609
+ "step": 4800
4610
+ },
4611
+ {
4612
+ "epoch": 208.69565217391303,
4613
+ "eval_loss": 0.9772452116012573,
4614
+ "eval_runtime": 0.4827,
4615
+ "eval_samples_per_second": 20.715,
4616
+ "eval_steps_per_second": 20.715,
4617
+ "step": 4800
4618
+ },
4619
+ {
4620
+ "Start_State_loss": 0.8609819412231445,
4621
+ "Start_State_runtime": 0.4411,
4622
+ "Start_State_samples_per_second": 22.671,
4623
+ "Start_State_steps_per_second": 22.671,
4624
+ "epoch": 208.69565217391303,
4625
+ "step": 4800
4626
+ },
4627
+ {
4628
+ "Raw_Model_loss": 0.9772452116012573,
4629
+ "Raw_Model_runtime": 0.4433,
4630
+ "Raw_Model_samples_per_second": 22.56,
4631
+ "Raw_Model_steps_per_second": 22.56,
4632
+ "epoch": 208.69565217391303,
4633
+ "step": 4800
4634
+ },
4635
+ {
4636
+ "SWA_loss": 0.8191676139831543,
4637
+ "SWA_runtime": 0.4674,
4638
+ "SWA_samples_per_second": 21.394,
4639
+ "SWA_steps_per_second": 21.394,
4640
+ "epoch": 208.69565217391303,
4641
+ "step": 4800
4642
+ },
4643
+ {
4644
+ "EMA_loss": 0.8595923185348511,
4645
+ "EMA_runtime": 0.4649,
4646
+ "EMA_samples_per_second": 21.511,
4647
+ "EMA_steps_per_second": 21.511,
4648
+ "epoch": 208.69565217391303,
4649
+ "step": 4800
4650
  }
4651
  ],
4652
  "logging_steps": 10,
 
4666
  "attributes": {}
4667
  }
4668
  },
4669
+ "total_flos": 1.2352177659543552e+17,
4670
  "train_batch_size": 4,
4671
  "trial_name": null,
4672
  "trial_params": null