FormlessAI commited on
Commit
5cd782d
·
verified ·
1 Parent(s): e156418

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d30836e6cc8e9bc7e6ac4860b9a47462bf2b505763ca8f1f016f00dba39dd063
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3d3618a6549bfada4acfab50184a00e8169ef527f9fd2d6299b8877447758cb
3
  size 98088784
last-checkpoint/global_step3101/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4fb533299f62b9ce25f3ae0988e4c2e58b101d1c5c87106b853300fad700bcf
3
+ size 73939813
last-checkpoint/global_step3101/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c6add0427eab5604df8457f0b0c96ae6e13a9298f74b97446cdd0a3e51c32ad
3
+ size 73939813
last-checkpoint/global_step3101/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:538f203bc9dec335c9ed3c93f40b10793d873a7700c2c3e88d7e5f29437e5d14
3
+ size 73939877
last-checkpoint/global_step3101/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffb96ff2f8ba9effc52557142b9549943034c6246447792f71e93b98f29a8bf
3
+ size 73939877
last-checkpoint/global_step3101/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c401fb0075566b0e43341ee59679950230715a00367a0f966497bf458e6ba4e
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3050
 
1
+ global_step3101
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea14b9c6b3a6f295496ef7304910c6b324fa957ee6a873aa9c7ba3e19dee7493
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c9f2639515e9a4dbb73442f973fea9b0c88c20b795038a946c56e0e2493ca27
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e7e6647b80c88b8b07c192378c0e9cf459bfbb39240b1f97ee2fd33a11b2d76
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecc1dea28e987710088b449c94c516228a22c7362dc59c8c30f086a46639bae7
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb99390398ed2d2ad006f7fe54297d0a309628ca1217f738a1c29766f3fd0e64
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5887804b4b795f7a573283c1b112d762af0778415a90f4dd264b13d8ce6b6e73
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8f06d95fcd0ec8a7f1f05ddeeb19bb5d11c5c887705f6ecb787b504d8cc2514
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d19ad5422c32a302e30ca2b15abb40a89c5c32a5565cad1734400f2ab8b8c060
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9b613fc00b8ea63138f8d70379fa6d9a68d4e6248cdb72d56dd145fcca5cc7d
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40dddd77817570d823fe2af41de3280b2532f2b28178ec1ba2cdd4222f46c420
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6222960948944092,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.9895897121861603,
6
  "eval_steps": 50,
7
- "global_step": 3050,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4766,6 +4766,84 @@
4766
  "eval_samples_per_second": 126.335,
4767
  "eval_steps_per_second": 15.8,
4768
  "step": 3050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4769
  }
4770
  ],
4771
  "logging_steps": 5,
@@ -4794,7 +4872,7 @@
4794
  "attributes": {}
4795
  }
4796
  },
4797
- "total_flos": 1.5731372043460936e+18,
4798
  "train_batch_size": 2,
4799
  "trial_name": null,
4800
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.620843768119812,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.039191671769749,
6
  "eval_steps": 50,
7
+ "global_step": 3100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4766
  "eval_samples_per_second": 126.335,
4767
  "eval_steps_per_second": 15.8,
4768
  "step": 3050
4769
+ },
4770
+ {
4771
+ "epoch": 2.994488671157379,
4772
+ "grad_norm": 0.23352594673633575,
4773
+ "learning_rate": 3.780729435136962e-05,
4774
+ "loss": 0.6777,
4775
+ "step": 3055
4776
+ },
4777
+ {
4778
+ "epoch": 2.9993876301285978,
4779
+ "grad_norm": 0.19848263263702393,
4780
+ "learning_rate": 3.763898458660783e-05,
4781
+ "loss": 0.6618,
4782
+ "step": 3060
4783
+ },
4784
+ {
4785
+ "epoch": 3.0048989589712187,
4786
+ "grad_norm": 0.21553494036197662,
4787
+ "learning_rate": 3.747085732977055e-05,
4788
+ "loss": 0.7689,
4789
+ "step": 3065
4790
+ },
4791
+ {
4792
+ "epoch": 3.0097979179424375,
4793
+ "grad_norm": 0.23350001871585846,
4794
+ "learning_rate": 3.730291430862548e-05,
4795
+ "loss": 0.6462,
4796
+ "step": 3070
4797
+ },
4798
+ {
4799
+ "epoch": 3.0146968769136557,
4800
+ "grad_norm": 0.22312819957733154,
4801
+ "learning_rate": 3.713515724904701e-05,
4802
+ "loss": 0.629,
4803
+ "step": 3075
4804
+ },
4805
+ {
4806
+ "epoch": 3.0195958358848745,
4807
+ "grad_norm": 0.2268122434616089,
4808
+ "learning_rate": 3.696758787499846e-05,
4809
+ "loss": 0.6607,
4810
+ "step": 3080
4811
+ },
4812
+ {
4813
+ "epoch": 3.024494794856093,
4814
+ "grad_norm": 0.22305847704410553,
4815
+ "learning_rate": 3.6800207908514434e-05,
4816
+ "loss": 0.5964,
4817
+ "step": 3085
4818
+ },
4819
+ {
4820
+ "epoch": 3.029393753827312,
4821
+ "grad_norm": 0.23181577026844025,
4822
+ "learning_rate": 3.6633019069683054e-05,
4823
+ "loss": 0.6446,
4824
+ "step": 3090
4825
+ },
4826
+ {
4827
+ "epoch": 3.03429271279853,
4828
+ "grad_norm": 0.2492285519838333,
4829
+ "learning_rate": 3.646602307662833e-05,
4830
+ "loss": 0.6519,
4831
+ "step": 3095
4832
+ },
4833
+ {
4834
+ "epoch": 3.039191671769749,
4835
+ "grad_norm": 0.2302083671092987,
4836
+ "learning_rate": 3.629922164549246e-05,
4837
+ "loss": 0.647,
4838
+ "step": 3100
4839
+ },
4840
+ {
4841
+ "epoch": 3.039191671769749,
4842
+ "eval_loss": 0.620843768119812,
4843
+ "eval_runtime": 15.4094,
4844
+ "eval_samples_per_second": 127.13,
4845
+ "eval_steps_per_second": 15.899,
4846
+ "step": 3100
4847
  }
4848
  ],
4849
  "logging_steps": 5,
 
4872
  "attributes": {}
4873
  }
4874
  },
4875
+ "total_flos": 1.5987517748506788e+18,
4876
  "train_batch_size": 2,
4877
  "trial_name": null,
4878
  "trial_params": null