jflotz commited on
Commit
82e7d63
·
1 Parent(s): 3271735

Training in progress, step 910000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e68aaa7f20cf2655b5ee95587161e9676e99cc34136556add0f9965734e7755
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:833419c3e72d7f9d08d84324dff99c4df5021ccb89fb201b7ceab844fd1fe7de
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59e51d656bcb12cf1d2e57afead6fbd882c17c7bfca89d40bc32ba44fc16d622
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:178bde3b89657b90235cfe9d9ea492b4c3952edf2d20ecb0ed27ebbb80775260
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55ad6fff41d308df46b9a1b2f331c0f91fef2419029183c0886a7d45a3462024
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44cbeb264fb2a81130e0bac112a18c08ec1d410c1c39e23c023c635c67938a81
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a40d19463da3fc8105dffb32921ceec5ee75e4844d0eca7759a43c7f4b14bbba
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dbfbe0d14b5c95909c74387495649181894d175bc5bdbfa086f89dc9ba7b8e5
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfd73f45d2e32308ab057499cc1554bb0a6b5dce90d1016017772831b0342b94
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61414351d7a2b424bf69a03902f1cdb6a014111a0704f27525f615bba9831275
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90198646614d410142ba8b69e323e9efb6043fdb55e8f55a8936dc33416f4c8d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8736baa79faca75f1de8fbb0415f2f8cedffbb08cddfa2e1e02291f66849ffd
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d370228b86aa2250b5e7dcf8be134cb35dc855b6e23853f0ea74181606aa91c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43bc53bd3230780ced8543e570f3a1576c876dc5bb9da2beb20be127155a27a1
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:052919acbfa40e0f835c2850fde0f02534bb1eb7e31ee0f5a540aca7e0f25da3
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:425858a9ea3774cea4e80f4044a1828fb43f83f6100a0fd4aa4ea1985d9d8afe
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5c995fa02cdc8d504061c7c2c431119a7ac4cd2ff34fa2e7bd0545514b474d1
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e99df6b45e45b719db86866dd43b0e8f476f0d4749f66d2f42f7183415384c8
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9f95e8365bd03b8115e7ed1873af4534b3ec6096af843d6743d8130c920c2cc
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ffd4b3aa19562ff729663fcb259b8c71f90c0398d12b236c6885ec68255bc91
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c027d2ad1a7ec1ccedf4c28ec4cf8e5a70643d6805bfcfeb4cb2085d818a7bdc
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa4db73f02c4f7c376b4a473b74af0d764f1f4d822f644308482e4f6b4c6cad2
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.338804697028882,
5
- "global_step": 900000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10806,11 +10806,131 @@
10806
  "learning_rate": 1.3792793080955574e-05,
10807
  "loss": 0.2852,
10808
  "step": 900000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10809
  }
10810
  ],
10811
  "max_steps": 1000000,
10812
  "num_train_epochs": 2,
10813
- "total_flos": 6.0846394803756095e+22,
10814
  "trial_name": null,
10815
  "trial_params": null
10816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3587343850894045,
5
+ "global_step": 910000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10806
  "learning_rate": 1.3792793080955574e-05,
10807
  "loss": 0.2852,
10808
  "step": 900000
10809
+ },
10810
+ {
10811
+ "epoch": 1.34,
10812
+ "learning_rate": 1.3755301948017599e-05,
10813
+ "loss": 0.2853,
10814
+ "step": 900500
10815
+ },
10816
+ {
10817
+ "epoch": 1.34,
10818
+ "learning_rate": 1.3717991925554562e-05,
10819
+ "loss": 0.285,
10820
+ "step": 901000
10821
+ },
10822
+ {
10823
+ "epoch": 1.34,
10824
+ "learning_rate": 1.368086311557062e-05,
10825
+ "loss": 0.2854,
10826
+ "step": 901500
10827
+ },
10828
+ {
10829
+ "epoch": 1.34,
10830
+ "learning_rate": 1.3643915619574529e-05,
10831
+ "loss": 0.2849,
10832
+ "step": 902000
10833
+ },
10834
+ {
10835
+ "epoch": 1.34,
10836
+ "learning_rate": 1.3607149538579341e-05,
10837
+ "loss": 0.2853,
10838
+ "step": 902500
10839
+ },
10840
+ {
10841
+ "epoch": 1.34,
10842
+ "learning_rate": 1.35705649731021e-05,
10843
+ "loss": 0.2849,
10844
+ "step": 903000
10845
+ },
10846
+ {
10847
+ "epoch": 1.35,
10848
+ "learning_rate": 1.3534162023163642e-05,
10849
+ "loss": 0.2853,
10850
+ "step": 903500
10851
+ },
10852
+ {
10853
+ "epoch": 1.35,
10854
+ "learning_rate": 1.3497940788288195e-05,
10855
+ "loss": 0.2853,
10856
+ "step": 904000
10857
+ },
10858
+ {
10859
+ "epoch": 1.35,
10860
+ "learning_rate": 1.3461901367503262e-05,
10861
+ "loss": 0.2858,
10862
+ "step": 904500
10863
+ },
10864
+ {
10865
+ "epoch": 1.35,
10866
+ "learning_rate": 1.3426043859339253e-05,
10867
+ "loss": 0.2854,
10868
+ "step": 905000
10869
+ },
10870
+ {
10871
+ "epoch": 1.35,
10872
+ "learning_rate": 1.3390368361829197e-05,
10873
+ "loss": 0.2853,
10874
+ "step": 905500
10875
+ },
10876
+ {
10877
+ "epoch": 1.35,
10878
+ "learning_rate": 1.3354874972508582e-05,
10879
+ "loss": 0.2851,
10880
+ "step": 906000
10881
+ },
10882
+ {
10883
+ "epoch": 1.35,
10884
+ "learning_rate": 1.3319563788414934e-05,
10885
+ "loss": 0.2847,
10886
+ "step": 906500
10887
+ },
10888
+ {
10889
+ "epoch": 1.35,
10890
+ "learning_rate": 1.3284434906087695e-05,
10891
+ "loss": 0.2846,
10892
+ "step": 907000
10893
+ },
10894
+ {
10895
+ "epoch": 1.35,
10896
+ "learning_rate": 1.3249488421567911e-05,
10897
+ "loss": 0.2851,
10898
+ "step": 907500
10899
+ },
10900
+ {
10901
+ "epoch": 1.35,
10902
+ "learning_rate": 1.3214724430397915e-05,
10903
+ "loss": 0.285,
10904
+ "step": 908000
10905
+ },
10906
+ {
10907
+ "epoch": 1.36,
10908
+ "learning_rate": 1.3180143027621145e-05,
10909
+ "loss": 0.2852,
10910
+ "step": 908500
10911
+ },
10912
+ {
10913
+ "epoch": 1.36,
10914
+ "learning_rate": 1.314574430778182e-05,
10915
+ "loss": 0.2844,
10916
+ "step": 909000
10917
+ },
10918
+ {
10919
+ "epoch": 1.36,
10920
+ "learning_rate": 1.311152836492473e-05,
10921
+ "loss": 0.2848,
10922
+ "step": 909500
10923
+ },
10924
+ {
10925
+ "epoch": 1.36,
10926
+ "learning_rate": 1.3077495292594966e-05,
10927
+ "loss": 0.285,
10928
+ "step": 910000
10929
  }
10930
  ],
10931
  "max_steps": 1000000,
10932
  "num_train_epochs": 2,
10933
+ "total_flos": 6.152247724689881e+22,
10934
  "trial_name": null,
10935
  "trial_params": null
10936
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59e51d656bcb12cf1d2e57afead6fbd882c17c7bfca89d40bc32ba44fc16d622
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:178bde3b89657b90235cfe9d9ea492b4c3952edf2d20ecb0ed27ebbb80775260
3
  size 449450757