FormlessAI commited on
Commit
0a8fcd6
·
verified ·
1 Parent(s): d7d78e6

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5e14a9273234d33f3c178ef3a0ba4d511ae44074d2744710df918a8bf99e5a9
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f9b65681a5e4dae3d1166f15e110526d7dd0659622190adfab490c678c36f3c
3
  size 98088784
last-checkpoint/global_step1750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:542d95be70ce5fc2085034318fb8157da2f587a28efc4f0b879fe4ef234ce98e
3
+ size 73939813
last-checkpoint/global_step1750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:086520196dd722d9adbfec2176094a5c72e623564176914198099d87b80b353b
3
+ size 73939813
last-checkpoint/global_step1750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22a3a5ec2308e2528cbb2fa87f1c254d0afe6bd346597b43f86db0582447ac94
3
+ size 73939877
last-checkpoint/global_step1750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5296f2391ae5108481635ef6b1c252594b9a3ab0cab1ce10deb31f42ebd69da2
3
+ size 73939877
last-checkpoint/global_step1750/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3237b7b70e874acc7f83a0a3e2726db60e3e2e90d5252f3bd1a7a619c17423de
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1700
 
1
+ global_step1750
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9e079811a4f16d069bc6568e820b087638d9087ea98d680117f7863e2b84d72
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db55bbd76d21410901eef766f4ea27c457a7976afda6a56c3aec6194dbccd316
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:796c55d83e5f448be816dff1e47df7da05dc49ac9f33f03c4fe53871b89249f9
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c93ee18f4dcb462dbaccd6716b53d2aaaac174ce6e476d05966a13ba91b15f
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca397837ff76969e457a84025e42de488cf6708cec475bfd6c699a74e998db74
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ec1c53fac56c1684c8a3e92bd85dc740d49fd5758c573121ce67476a27cef9
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01726fe54cdf568e628013444d43170363d989763bb3643861f1834550bf40cc
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b6d93badf06c5d380c3300205be40194653a28adcb4c6283dca4f75f7fb9d76
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32892d87473868561cebb2d12f97121f75551c078a3eed1dd7629182a1fab0ee
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbc8a195efc18da96bf16857984e4d60e7f36373f6730ddca95667a6b0c910ce
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6626052856445312,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.6662584200857318,
6
  "eval_steps": 50,
7
- "global_step": 1700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2660,6 +2660,84 @@
2660
  "eval_samples_per_second": 124.447,
2661
  "eval_steps_per_second": 15.564,
2662
  "step": 1700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2663
  }
2664
  ],
2665
  "logging_steps": 5,
@@ -2688,7 +2766,7 @@
2688
  "attributes": {}
2689
  }
2690
  },
2691
- "total_flos": 8.773118177795113e+17,
2692
  "train_batch_size": 2,
2693
  "trial_name": null,
2694
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6602269411087036,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.715248009797918,
6
  "eval_steps": 50,
7
+ "global_step": 1750,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2660
  "eval_samples_per_second": 124.447,
2661
  "eval_steps_per_second": 15.564,
2662
  "step": 1700
2663
+ },
2664
+ {
2665
+ "epoch": 1.6711573790569503,
2666
+ "grad_norm": 0.203897163271904,
2667
+ "learning_rate": 8.399529398394039e-05,
2668
+ "loss": 0.6896,
2669
+ "step": 1705
2670
+ },
2671
+ {
2672
+ "epoch": 1.676056338028169,
2673
+ "grad_norm": 0.20551460981369019,
2674
+ "learning_rate": 8.384304537099798e-05,
2675
+ "loss": 0.6997,
2676
+ "step": 1710
2677
+ },
2678
+ {
2679
+ "epoch": 1.6809552969993877,
2680
+ "grad_norm": 0.24939224123954773,
2681
+ "learning_rate": 8.369050444776772e-05,
2682
+ "loss": 0.6784,
2683
+ "step": 1715
2684
+ },
2685
+ {
2686
+ "epoch": 1.6858542559706062,
2687
+ "grad_norm": 0.21644407510757446,
2688
+ "learning_rate": 8.353767278184362e-05,
2689
+ "loss": 0.6945,
2690
+ "step": 1720
2691
+ },
2692
+ {
2693
+ "epoch": 1.6907532149418247,
2694
+ "grad_norm": 0.19981370866298676,
2695
+ "learning_rate": 8.338455194380753e-05,
2696
+ "loss": 0.6901,
2697
+ "step": 1725
2698
+ },
2699
+ {
2700
+ "epoch": 1.6956521739130435,
2701
+ "grad_norm": 0.2333899885416031,
2702
+ "learning_rate": 8.323114350721291e-05,
2703
+ "loss": 0.6868,
2704
+ "step": 1730
2705
+ },
2706
+ {
2707
+ "epoch": 1.7005511328842622,
2708
+ "grad_norm": 0.2132989764213562,
2709
+ "learning_rate": 8.307744904856888e-05,
2710
+ "loss": 0.6934,
2711
+ "step": 1735
2712
+ },
2713
+ {
2714
+ "epoch": 1.7054500918554807,
2715
+ "grad_norm": 0.18624679744243622,
2716
+ "learning_rate": 8.292347014732376e-05,
2717
+ "loss": 0.6922,
2718
+ "step": 1740
2719
+ },
2720
+ {
2721
+ "epoch": 1.7103490508266992,
2722
+ "grad_norm": 0.20982161164283752,
2723
+ "learning_rate": 8.276920838584902e-05,
2724
+ "loss": 0.6768,
2725
+ "step": 1745
2726
+ },
2727
+ {
2728
+ "epoch": 1.715248009797918,
2729
+ "grad_norm": 0.20481140911579132,
2730
+ "learning_rate": 8.26146653494229e-05,
2731
+ "loss": 0.7054,
2732
+ "step": 1750
2733
+ },
2734
+ {
2735
+ "epoch": 1.715248009797918,
2736
+ "eval_loss": 0.6602269411087036,
2737
+ "eval_runtime": 15.6809,
2738
+ "eval_samples_per_second": 124.929,
2739
+ "eval_steps_per_second": 15.624,
2740
+ "step": 1750
2741
  }
2742
  ],
2743
  "logging_steps": 5,
 
2766
  "attributes": {}
2767
  }
2768
  },
2769
+ "total_flos": 9.036926821219697e+17,
2770
  "train_batch_size": 2,
2771
  "trial_name": null,
2772
  "trial_params": null