Azrail commited on
Commit
55d8d68
·
verified ·
1 Parent(s): 38b1a1b

Training in progress, step 61000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c41967e5432db5ed91bc1228a51744d8af764a94e341f801caf2cc8d0b340946
3
  size 301235464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff3677e2a6c68c6a9bc84018c91a9abb1bcf7c14c1b566d1f4d545783476a72
3
  size 301235464
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70cad043527913fd0557530d296a1fe5bc45ca60997f5c855298840644081537
3
  size 602335994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95defb77fd9d966f9fb370451c779ea88fb6409a7bea604ae57a6a4ab86f381e
3
  size 602335994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6a4cb233f004dcf5c1bd7310c625e6acfeb53e49f5aa9a513759dc7631fff0b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8ee7735caca4437694ef1fa1c7821cadab81eb5dba9c8318224d8baee7f9384
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9201fef1295387122e53aeeb3fe425d2797e674a7be3dba9faefda446e2071fd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80725391fd9590c70c1e5ba84487c80bcb26eb7012140d59e753f7bdbcc81863
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.40358993244913505,
6
  "eval_steps": 500,
7
- "global_step": 60000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -10688,11 +10688,189 @@
10688
  "eval_steps_per_second": 23.322,
10689
  "num_input_tokens_seen": 15728640000,
10690
  "step": 60000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10691
  }
10692
  ],
10693
  "logging_steps": 50,
10694
- "max_steps": 60000,
10695
- "num_input_tokens_seen": 15728640000,
10696
  "num_train_epochs": 1,
10697
  "save_steps": 1000,
10698
  "stateful_callbacks": {
@@ -10702,12 +10880,12 @@
10702
  "should_evaluate": false,
10703
  "should_log": false,
10704
  "should_save": true,
10705
- "should_training_stop": true
10706
  },
10707
  "attributes": {}
10708
  }
10709
  },
10710
- "total_flos": 4.2075647115264e+18,
10711
  "train_batch_size": 64,
10712
  "trial_name": null,
10713
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4103164313232873,
6
  "eval_steps": 500,
7
+ "global_step": 61000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
10688
  "eval_steps_per_second": 23.322,
10689
  "num_input_tokens_seen": 15728640000,
10690
  "step": 60000
10691
+ },
10692
+ {
10693
+ "epoch": 0.40392625739284266,
10694
+ "grad_norm": 0.2130047082901001,
10695
+ "learning_rate": 0.0006867974850262581,
10696
+ "loss": 3.0074,
10697
+ "num_input_tokens_seen": 15741747200,
10698
+ "step": 60050
10699
+ },
10700
+ {
10701
+ "epoch": 0.40426258233655027,
10702
+ "grad_norm": 0.18596570193767548,
10703
+ "learning_rate": 0.000682235249939575,
10704
+ "loss": 2.9981,
10705
+ "num_input_tokens_seen": 15754854400,
10706
+ "step": 60100
10707
+ },
10708
+ {
10709
+ "epoch": 0.4045989072802579,
10710
+ "grad_norm": 0.2774942219257355,
10711
+ "learning_rate": 0.0006776554506402081,
10712
+ "loss": 3.0024,
10713
+ "num_input_tokens_seen": 15767961600,
10714
+ "step": 60150
10715
+ },
10716
+ {
10717
+ "epoch": 0.4049352322239655,
10718
+ "grad_norm": 0.19329522550106049,
10719
+ "learning_rate": 0.0006730585285387465,
10720
+ "loss": 3.0101,
10721
+ "num_input_tokens_seen": 15781068800,
10722
+ "step": 60200
10723
+ },
10724
+ {
10725
+ "epoch": 0.4052715571676731,
10726
+ "grad_norm": 0.21384254097938538,
10727
+ "learning_rate": 0.0006684449266961101,
10728
+ "loss": 3.0095,
10729
+ "num_input_tokens_seen": 15794176000,
10730
+ "step": 60250
10731
+ },
10732
+ {
10733
+ "epoch": 0.4056078821113807,
10734
+ "grad_norm": 0.3892166018486023,
10735
+ "learning_rate": 0.0006638150897808468,
10736
+ "loss": 3.0101,
10737
+ "num_input_tokens_seen": 15807283200,
10738
+ "step": 60300
10739
+ },
10740
+ {
10741
+ "epoch": 0.4059442070550883,
10742
+ "grad_norm": 0.27356287837028503,
10743
+ "learning_rate": 0.0006591694640262749,
10744
+ "loss": 3.0322,
10745
+ "num_input_tokens_seen": 15820390400,
10746
+ "step": 60350
10747
+ },
10748
+ {
10749
+ "epoch": 0.40628053199879594,
10750
+ "grad_norm": 0.20498153567314148,
10751
+ "learning_rate": 0.0006545084971874737,
10752
+ "loss": 3.0064,
10753
+ "num_input_tokens_seen": 15833497600,
10754
+ "step": 60400
10755
+ },
10756
+ {
10757
+ "epoch": 0.40661685694250355,
10758
+ "grad_norm": 0.19939659535884857,
10759
+ "learning_rate": 0.0006498326384981283,
10760
+ "loss": 3.0158,
10761
+ "num_input_tokens_seen": 15846604800,
10762
+ "step": 60450
10763
+ },
10764
+ {
10765
+ "epoch": 0.40695318188621116,
10766
+ "grad_norm": 0.24545226991176605,
10767
+ "learning_rate": 0.0006451423386272311,
10768
+ "loss": 3.0132,
10769
+ "num_input_tokens_seen": 15859712000,
10770
+ "step": 60500
10771
+ },
10772
+ {
10773
+ "epoch": 0.40695318188621116,
10774
+ "eval_loss": 2.914865255355835,
10775
+ "eval_runtime": 51.2039,
10776
+ "eval_samples_per_second": 97.649,
10777
+ "eval_steps_per_second": 24.412,
10778
+ "num_input_tokens_seen": 15859712000,
10779
+ "step": 60500
10780
+ },
10781
+ {
10782
+ "epoch": 0.40728950682991877,
10783
+ "grad_norm": 0.2364359349012375,
10784
+ "learning_rate": 0.0006404380496356461,
10785
+ "loss": 3.0102,
10786
+ "num_input_tokens_seen": 15872819200,
10787
+ "step": 60550
10788
+ },
10789
+ {
10790
+ "epoch": 0.4076258317736264,
10791
+ "grad_norm": 0.19283762574195862,
10792
+ "learning_rate": 0.0006357202249325371,
10793
+ "loss": 3.0132,
10794
+ "num_input_tokens_seen": 15885926400,
10795
+ "step": 60600
10796
+ },
10797
+ {
10798
+ "epoch": 0.40796215671733405,
10799
+ "grad_norm": 0.19770501554012299,
10800
+ "learning_rate": 0.0006309893192316686,
10801
+ "loss": 3.0106,
10802
+ "num_input_tokens_seen": 15899033600,
10803
+ "step": 60650
10804
+ },
10805
+ {
10806
+ "epoch": 0.40829848166104166,
10807
+ "grad_norm": 0.18395134806632996,
10808
+ "learning_rate": 0.000626245788507579,
10809
+ "loss": 3.005,
10810
+ "num_input_tokens_seen": 15912140800,
10811
+ "step": 60700
10812
+ },
10813
+ {
10814
+ "epoch": 0.40863480660474927,
10815
+ "grad_norm": 0.21380823850631714,
10816
+ "learning_rate": 0.000621490089951632,
10817
+ "loss": 3.0106,
10818
+ "num_input_tokens_seen": 15925248000,
10819
+ "step": 60750
10820
+ },
10821
+ {
10822
+ "epoch": 0.4089711315484569,
10823
+ "grad_norm": 0.17995478212833405,
10824
+ "learning_rate": 0.0006167226819279528,
10825
+ "loss": 3.0237,
10826
+ "num_input_tokens_seen": 15938355200,
10827
+ "step": 60800
10828
+ },
10829
+ {
10830
+ "epoch": 0.4093074564921645,
10831
+ "grad_norm": 0.31993716955184937,
10832
+ "learning_rate": 0.0006119440239292493,
10833
+ "loss": 3.0158,
10834
+ "num_input_tokens_seen": 15951462400,
10835
+ "step": 60850
10836
+ },
10837
+ {
10838
+ "epoch": 0.4096437814358721,
10839
+ "grad_norm": 0.19210565090179443,
10840
+ "learning_rate": 0.0006071545765325253,
10841
+ "loss": 3.0121,
10842
+ "num_input_tokens_seen": 15964569600,
10843
+ "step": 60900
10844
+ },
10845
+ {
10846
+ "epoch": 0.4099801063795797,
10847
+ "grad_norm": 0.4126472771167755,
10848
+ "learning_rate": 0.0006023548013546899,
10849
+ "loss": 3.0215,
10850
+ "num_input_tokens_seen": 15977676800,
10851
+ "step": 60950
10852
+ },
10853
+ {
10854
+ "epoch": 0.4103164313232873,
10855
+ "grad_norm": 0.26418012380599976,
10856
+ "learning_rate": 0.0005975451610080642,
10857
+ "loss": 3.0125,
10858
+ "num_input_tokens_seen": 15990784000,
10859
+ "step": 61000
10860
+ },
10861
+ {
10862
+ "epoch": 0.4103164313232873,
10863
+ "eval_loss": 2.913696765899658,
10864
+ "eval_runtime": 52.0924,
10865
+ "eval_samples_per_second": 95.983,
10866
+ "eval_steps_per_second": 23.996,
10867
+ "num_input_tokens_seen": 15990784000,
10868
+ "step": 61000
10869
  }
10870
  ],
10871
  "logging_steps": 50,
10872
+ "max_steps": 70000,
10873
+ "num_input_tokens_seen": 15990784000,
10874
  "num_train_epochs": 1,
10875
  "save_steps": 1000,
10876
  "stateful_callbacks": {
 
10880
  "should_evaluate": false,
10881
  "should_log": false,
10882
  "should_save": true,
10883
+ "should_training_stop": false
10884
  },
10885
  "attributes": {}
10886
  }
10887
  },
10888
+ "total_flos": 4.27769079005184e+18,
10889
  "train_batch_size": 64,
10890
  "trial_name": null,
10891
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae75bb2a8bb196138369db914584406a32731cac3b2572b642609f88b870f3b8
3
  size 6008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a964c444482261d405cae313adc306063a7a31a0cff9e89a43e151d806eeee7e
3
  size 6008