jflotz commited on
Commit
a39d95d
·
1 Parent(s): b6d602e

Training in progress, step 670000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:facea498a3305bb3a4cf04ba493e9b7a282bf34c7699144b12c3401905a21a21
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed5bbd83e76bfaea16133e5f4d584916d5b8420b3bb185b8e5801362569d4f69
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b0253822de0f7fe54e007e60affd0913e4b68439fe1550215e7a076507078bb
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d66a36b6e02c5a5390d3c9dec0faf002f0a21fa7c7b5ef13a837f052f84e013
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46211629a4cc11950e226f7dbdda304cda1420c43879285d3e04ebb8508dc043
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12cb7e1a11524752d8ec0a2746c2da7c87cd4d3afc083cf4a0df43b88ed43337
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05c11aa747eb56c76be6b0a8185d8eb345ab22e3a85121576df152b2d5604743
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd3471c82ac0fc930f64e5adbb6702a0e555d4edfcc1c2dab4ff36db308349b1
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87f323fe99852eb9065ab82d8c0f478aeb01397ae4967c2beb7e4de9ca3f02ec
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49fa69a09ea23ef88cf7df6a3190bd2ee20d350293163871b5d1dbdf1a735794
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b8df123d1834d25753f8b002915374feeeb89dc9c6992036e0b849ae27e9320
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dac01a61c51f51dbd4c3cc5d50cf7d5af4a9f263667ab541575cfd5deab9645
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90443240959e9f40acd615af8df90f5d9b01b4b49736141665a99e8168dc3c6a
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:221b7d8fdca30ab22892af203b971ac82533d02ad7492e4e8b5068d84fa6a3ca
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bd0fd3633719fca8fe6bbb82b9e0f5384f9458c9e7cc0938ff32b60be639fdb
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:548649d9f5f2f112c52d8a0f4a7c44c0a8f1f18e8bb96cd91be7066faf617949
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8ca6715dedc0846ae860b91a71263b7a826d6a54596ee7919105a93c87a6496
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:426865c62dd7c19a97ed19d06fe6fa6770f12ecb7a2997d9a0820ed2f9c93c21
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:555ec27b704b2cb1160b682f3e0caec8c125b854ba2add3f3b500f4ed76a0e8e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e886c38b9308720d6c044b1f01de3ee4919b1d3a6edb19ef015bd4926793ada4
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e8e59bcc271d9e83e953974cfcbd52d5f5cf63d456de8e0f805b45487976195
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50de55c4a72c38e6722f7cb77ebe9f35ce412c17a797d93c631371b39d861204
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3193719789380256,
5
- "global_step": 660000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7926,11 +7926,131 @@
7926
  "learning_rate": 4.97771953363055e-05,
7927
  "loss": 0.299,
7928
  "step": 660000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7929
  }
7930
  ],
7931
  "max_steps": 1000000,
7932
  "num_train_epochs": 2,
7933
- "total_flos": 4.462065045446751e+22,
7934
  "trial_name": null,
7935
  "trial_params": null
7936
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3393624634673895,
5
+ "global_step": 670000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7926
  "learning_rate": 4.97771953363055e-05,
7927
  "loss": 0.299,
7928
  "step": 660000
7929
+ },
7930
+ {
7931
+ "epoch": 1.32,
7932
+ "learning_rate": 4.967283762863444e-05,
7933
+ "loss": 0.2994,
7934
+ "step": 660500
7935
+ },
7936
+ {
7937
+ "epoch": 1.32,
7938
+ "learning_rate": 4.956856283426728e-05,
7939
+ "loss": 0.2994,
7940
+ "step": 661000
7941
+ },
7942
+ {
7943
+ "epoch": 1.32,
7944
+ "learning_rate": 4.946437123828732e-05,
7945
+ "loss": 0.2982,
7946
+ "step": 661500
7947
+ },
7948
+ {
7949
+ "epoch": 1.32,
7950
+ "learning_rate": 4.936026312555037e-05,
7951
+ "loss": 0.2992,
7952
+ "step": 662000
7953
+ },
7954
+ {
7955
+ "epoch": 1.32,
7956
+ "learning_rate": 4.925623878068408e-05,
7957
+ "loss": 0.2984,
7958
+ "step": 662500
7959
+ },
7960
+ {
7961
+ "epoch": 1.33,
7962
+ "learning_rate": 4.915229848808698e-05,
7963
+ "loss": 0.2994,
7964
+ "step": 663000
7965
+ },
7966
+ {
7967
+ "epoch": 1.33,
7968
+ "learning_rate": 4.904844253192795e-05,
7969
+ "loss": 0.2989,
7970
+ "step": 663500
7971
+ },
7972
+ {
7973
+ "epoch": 1.33,
7974
+ "learning_rate": 4.8944671196145136e-05,
7975
+ "loss": 0.2991,
7976
+ "step": 664000
7977
+ },
7978
+ {
7979
+ "epoch": 1.33,
7980
+ "learning_rate": 4.884098476444539e-05,
7981
+ "loss": 0.2984,
7982
+ "step": 664500
7983
+ },
7984
+ {
7985
+ "epoch": 1.33,
7986
+ "learning_rate": 4.8737383520303546e-05,
7987
+ "loss": 0.2984,
7988
+ "step": 665000
7989
+ },
7990
+ {
7991
+ "epoch": 1.33,
7992
+ "learning_rate": 4.8633867746961356e-05,
7993
+ "loss": 0.2988,
7994
+ "step": 665500
7995
+ },
7996
+ {
7997
+ "epoch": 1.33,
7998
+ "learning_rate": 4.853043772742709e-05,
7999
+ "loss": 0.2986,
8000
+ "step": 666000
8001
+ },
8002
+ {
8003
+ "epoch": 1.33,
8004
+ "learning_rate": 4.8427093744474364e-05,
8005
+ "loss": 0.299,
8006
+ "step": 666500
8007
+ },
8008
+ {
8009
+ "epoch": 1.33,
8010
+ "learning_rate": 4.832383608064172e-05,
8011
+ "loss": 0.2992,
8012
+ "step": 667000
8013
+ },
8014
+ {
8015
+ "epoch": 1.33,
8016
+ "learning_rate": 4.822066501823172e-05,
8017
+ "loss": 0.299,
8018
+ "step": 667500
8019
+ },
8020
+ {
8021
+ "epoch": 1.34,
8022
+ "learning_rate": 4.811758083931005e-05,
8023
+ "loss": 0.2984,
8024
+ "step": 668000
8025
+ },
8026
+ {
8027
+ "epoch": 1.34,
8028
+ "learning_rate": 4.8014583825704976e-05,
8029
+ "loss": 0.2982,
8030
+ "step": 668500
8031
+ },
8032
+ {
8033
+ "epoch": 1.34,
8034
+ "learning_rate": 4.791167425900632e-05,
8035
+ "loss": 0.2988,
8036
+ "step": 669000
8037
+ },
8038
+ {
8039
+ "epoch": 1.34,
8040
+ "learning_rate": 4.780885242056493e-05,
8041
+ "loss": 0.2983,
8042
+ "step": 669500
8043
+ },
8044
+ {
8045
+ "epoch": 1.34,
8046
+ "learning_rate": 4.770611859149185e-05,
8047
+ "loss": 0.2987,
8048
+ "step": 670000
8049
  }
8050
  ],
8051
  "max_steps": 1000000,
8052
  "num_train_epochs": 2,
8053
+ "total_flos": 4.529677636130487e+22,
8054
  "trial_name": null,
8055
  "trial_params": null
8056
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b0253822de0f7fe54e007e60affd0913e4b68439fe1550215e7a076507078bb
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d66a36b6e02c5a5390d3c9dec0faf002f0a21fa7c7b5ef13a837f052f84e013
3
  size 449450757