FormlessAI commited on
Commit
80138df
·
verified ·
1 Parent(s): 8ee0795

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10ea1db3323fbfc2fcdf43f9fa2ef0ab0320c81b6ef48fc3727aa97d1b9d05c3
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30836e6cc8e9bc7e6ac4860b9a47462bf2b505763ca8f1f016f00dba39dd063
3
  size 98088784
last-checkpoint/global_step3050/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ec2d173ff3f21519c19c4529cf49015f7351b455919aabd2f92d617dfea800e
3
+ size 73939813
last-checkpoint/global_step3050/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9878bd0908e7cc394ee31cc2cf96c0d61401413d535c5e837df907554c2c3be8
3
+ size 73939813
last-checkpoint/global_step3050/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:679f30e0acdc8235795db4f308847a419b26f177124c1a9af50cace3d7f17a2b
3
+ size 73939877
last-checkpoint/global_step3050/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef8aa167f419865f888953e5a460286af57c9fd0ce1367970c1958895bac59da
3
+ size 73939877
last-checkpoint/global_step3050/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54e7843e59f08e3aa7b5e877697b7647ba9834905436322ba9404fa0a4364927
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3000
 
1
+ global_step3050
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ad15cfeacd5f36c2449b08ba47058a2134999ffa45efc211e7ffd4aebce1a7f
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea14b9c6b3a6f295496ef7304910c6b324fa957ee6a873aa9c7ba3e19dee7493
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d6ef78203d9f455391fc1656ed2cb43336f41e389bc41e4a46bbc8baee83bc3
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e7e6647b80c88b8b07c192378c0e9cf459bfbb39240b1f97ee2fd33a11b2d76
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a228d42d04cc102c0c1f9f9104aa3d5ea4923c78db588123d41f7cd87db7872
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb99390398ed2d2ad006f7fe54297d0a309628ca1217f738a1c29766f3fd0e64
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9678b81444b2cc8c0b911ed7e241ed319cf958e011ee1564921f24f2158c7606
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8f06d95fcd0ec8a7f1f05ddeeb19bb5d11c5c887705f6ecb787b504d8cc2514
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16b66c86492a0069dc8985f0926a3b0a40daa6549813a0368d083738441a9d65
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9b613fc00b8ea63138f8d70379fa6d9a68d4e6248cdb72d56dd145fcca5cc7d
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6234270930290222,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.9406001224739744,
6
  "eval_steps": 50,
7
- "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4688,6 +4688,84 @@
4688
  "eval_samples_per_second": 126.096,
4689
  "eval_steps_per_second": 15.77,
4690
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4691
  }
4692
  ],
4693
  "logging_steps": 5,
@@ -4716,7 +4794,7 @@
4716
  "attributes": {}
4717
  }
4718
  },
4719
- "total_flos": 1.5470320857624084e+18,
4720
  "train_batch_size": 2,
4721
  "trial_name": null,
4722
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6222960948944092,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.9895897121861603,
6
  "eval_steps": 50,
7
+ "global_step": 3050,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4688
  "eval_samples_per_second": 126.096,
4689
  "eval_steps_per_second": 15.77,
4690
  "step": 3000
4691
+ },
4692
+ {
4693
+ "epoch": 2.9454990814451927,
4694
+ "grad_norm": 0.20588572323322296,
4695
+ "learning_rate": 3.95000485078324e-05,
4696
+ "loss": 0.6741,
4697
+ "step": 3005
4698
+ },
4699
+ {
4700
+ "epoch": 2.9503980404164114,
4701
+ "grad_norm": 0.21284903585910797,
4702
+ "learning_rate": 3.9330009094856485e-05,
4703
+ "loss": 0.6438,
4704
+ "step": 3010
4705
+ },
4706
+ {
4707
+ "epoch": 2.95529699938763,
4708
+ "grad_norm": 0.22855360805988312,
4709
+ "learning_rate": 3.916013481191187e-05,
4710
+ "loss": 0.65,
4711
+ "step": 3015
4712
+ },
4713
+ {
4714
+ "epoch": 2.960195958358849,
4715
+ "grad_norm": 0.19158318638801575,
4716
+ "learning_rate": 3.899042740471964e-05,
4717
+ "loss": 0.6593,
4718
+ "step": 3020
4719
+ },
4720
+ {
4721
+ "epoch": 2.9650949173300676,
4722
+ "grad_norm": 0.22519658505916595,
4723
+ "learning_rate": 3.8820888617286e-05,
4724
+ "loss": 0.6542,
4725
+ "step": 3025
4726
+ },
4727
+ {
4728
+ "epoch": 2.969993876301286,
4729
+ "grad_norm": 0.20841963589191437,
4730
+ "learning_rate": 3.865152019188429e-05,
4731
+ "loss": 0.6636,
4732
+ "step": 3030
4733
+ },
4734
+ {
4735
+ "epoch": 2.9748928352725046,
4736
+ "grad_norm": 0.22229060530662537,
4737
+ "learning_rate": 3.8482323869037134e-05,
4738
+ "loss": 0.6698,
4739
+ "step": 3035
4740
+ },
4741
+ {
4742
+ "epoch": 2.9797917942437233,
4743
+ "grad_norm": 0.24960780143737793,
4744
+ "learning_rate": 3.831330138749852e-05,
4745
+ "loss": 0.6707,
4746
+ "step": 3040
4747
+ },
4748
+ {
4749
+ "epoch": 2.9846907532149416,
4750
+ "grad_norm": 0.2418794333934784,
4751
+ "learning_rate": 3.814445448423598e-05,
4752
+ "loss": 0.6524,
4753
+ "step": 3045
4754
+ },
4755
+ {
4756
+ "epoch": 2.9895897121861603,
4757
+ "grad_norm": 0.22541861236095428,
4758
+ "learning_rate": 3.7975784894412676e-05,
4759
+ "loss": 0.659,
4760
+ "step": 3050
4761
+ },
4762
+ {
4763
+ "epoch": 2.9895897121861603,
4764
+ "eval_loss": 0.6222960948944092,
4765
+ "eval_runtime": 15.5064,
4766
+ "eval_samples_per_second": 126.335,
4767
+ "eval_steps_per_second": 15.8,
4768
+ "step": 3050
4769
  }
4770
  ],
4771
  "logging_steps": 5,
 
4794
  "attributes": {}
4795
  }
4796
  },
4797
+ "total_flos": 1.5731372043460936e+18,
4798
  "train_batch_size": 2,
4799
  "trial_name": null,
4800
  "trial_params": null