jflotz commited on
Commit
d0a731f
·
1 Parent(s): 3d20115

Training in progress, step 840000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ef1b487cabfa82f2f81ded5cef2d529a41a97f8137d750d66e05d443478e91a
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2f81c933b26cfeb60d53ba82d975294e2c7358973e2715677db9ca7fd31945d
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b63e12b39341c1c8d25b10dc8ca07cde5e82303726c5b23ac1fdd24345f08c2a
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80793dabaaad0486a9c6c7d32363ba477ae4d785ddabb640496bf016359dc491
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34241c46169d6140a8feca7f2e1db99ea6d7326e000406064ac61feecf3f17b2
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c09ff3d1788e565e5a086e252ccf0ede212b045e4e5f4392a44c6ea6f0987dd6
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fb4767c32ae0f5cf34a94816a959b9568b2ea2174c179de2a3df992017a5f77
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e058e2238a38cee98eacc9109fd883ace95c4833f253ace4bd37e2704c0fe5af
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efc0c6d968c43ae24dfc91f94bcca80995b7ed16b92115235f58d28e636291a4
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9431567146b6a803c38f6863bbd8c9115e688967dc8f725b32605962fde389b3
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df72b23925eb5f82752cd028feed8cf7d2bbe1dbdf011029efc7554632b26ae2
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f8725dca368138ac60071ebf1967a52a0bdc41ecaaff24531fe8b99b9ccb52
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd1259a368e737ba331fab7b54a98c7cfc3dd87ed455bcbc28503bda235c71d3
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eda6d197ffa3fe6958e94125c5fa0490a4afe5ac2f8a51ad2a4931b09364f04
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f2eac6df5cd35dc068eb0bab0d362a924c2221b17b5c9bfe0e42f4ba3c05c9c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa3a1be70ad51e1c8a5b547f1989a2e92d51a9ec27c3a4490875ff9354ff3dda
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e7f8260a2f4cc917f2e6b69683488cb603bdd620504afe136b5dc796247fcec
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bc51de7c0af3e5027c4a852a232459cf39ee9a71ea51b7603a1f5327ee5a020
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41a93612851ee52955512369b8723183837a90f2506b353970cd64de58829b3d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4a63b1c76ff6ed1a203a2dff4664ff326fc59ea9cbb507ef4f3897d7810fb84
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bde979dd08e67682f748b136bdd4f7c962a7cebec495023e183c816b6c75933
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90777e10c619a178822dbd35785dbd74396ff21ef94c6855b7e97b44a2c700b9
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1992968806052247,
5
- "global_step": 830000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9966,11 +9966,131 @@
9966
  "learning_rate": 2.0773332330534513e-05,
9967
  "loss": 0.2887,
9968
  "step": 830000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9969
  }
9970
  ],
9971
  "max_steps": 1000000,
9972
  "num_train_epochs": 2,
9973
- "total_flos": 5.6113757283593115e+22,
9974
  "trial_name": null,
9975
  "trial_params": null
9976
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2192265686657473,
5
+ "global_step": 840000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9966
  "learning_rate": 2.0773332330534513e-05,
9967
  "loss": 0.2887,
9968
  "step": 830000
9969
+ },
9970
+ {
9971
+ "epoch": 1.2,
9972
+ "learning_rate": 2.0711718689098057e-05,
9973
+ "loss": 0.2886,
9974
+ "step": 830500
9975
+ },
9976
+ {
9977
+ "epoch": 1.2,
9978
+ "learning_rate": 2.0650267139558772e-05,
9979
+ "loss": 0.2887,
9980
+ "step": 831000
9981
+ },
9982
+ {
9983
+ "epoch": 1.2,
9984
+ "learning_rate": 2.058897784992289e-05,
9985
+ "loss": 0.2885,
9986
+ "step": 831500
9987
+ },
9988
+ {
9989
+ "epoch": 1.2,
9990
+ "learning_rate": 2.052785098775293e-05,
9991
+ "loss": 0.2886,
9992
+ "step": 832000
9993
+ },
9994
+ {
9995
+ "epoch": 1.2,
9996
+ "learning_rate": 2.0466886720167436e-05,
9997
+ "loss": 0.2877,
9998
+ "step": 832500
9999
+ },
10000
+ {
10001
+ "epoch": 1.21,
10002
+ "learning_rate": 2.04060852138404e-05,
10003
+ "loss": 0.2878,
10004
+ "step": 833000
10005
+ },
10006
+ {
10007
+ "epoch": 1.21,
10008
+ "learning_rate": 2.0345446635000783e-05,
10009
+ "loss": 0.2887,
10010
+ "step": 833500
10011
+ },
10012
+ {
10013
+ "epoch": 1.21,
10014
+ "learning_rate": 2.028497114943219e-05,
10015
+ "loss": 0.2888,
10016
+ "step": 834000
10017
+ },
10018
+ {
10019
+ "epoch": 1.21,
10020
+ "learning_rate": 2.022465892247223e-05,
10021
+ "loss": 0.2881,
10022
+ "step": 834500
10023
+ },
10024
+ {
10025
+ "epoch": 1.21,
10026
+ "learning_rate": 2.0164510119012263e-05,
10027
+ "loss": 0.2878,
10028
+ "step": 835000
10029
+ },
10030
+ {
10031
+ "epoch": 1.21,
10032
+ "learning_rate": 2.0104524903496834e-05,
10033
+ "loss": 0.2879,
10034
+ "step": 835500
10035
+ },
10036
+ {
10037
+ "epoch": 1.21,
10038
+ "learning_rate": 2.0044703439923217e-05,
10039
+ "loss": 0.2876,
10040
+ "step": 836000
10041
+ },
10042
+ {
10043
+ "epoch": 1.21,
10044
+ "learning_rate": 1.998504589184101e-05,
10045
+ "loss": 0.2879,
10046
+ "step": 836500
10047
+ },
10048
+ {
10049
+ "epoch": 1.21,
10050
+ "learning_rate": 1.9925552422351654e-05,
10051
+ "loss": 0.2878,
10052
+ "step": 837000
10053
+ },
10054
+ {
10055
+ "epoch": 1.21,
10056
+ "learning_rate": 1.9866223194108028e-05,
10057
+ "loss": 0.2884,
10058
+ "step": 837500
10059
+ },
10060
+ {
10061
+ "epoch": 1.22,
10062
+ "learning_rate": 1.9807058369314016e-05,
10063
+ "loss": 0.288,
10064
+ "step": 838000
10065
+ },
10066
+ {
10067
+ "epoch": 1.22,
10068
+ "learning_rate": 1.9748058109723953e-05,
10069
+ "loss": 0.2879,
10070
+ "step": 838500
10071
+ },
10072
+ {
10073
+ "epoch": 1.22,
10074
+ "learning_rate": 1.968922257664231e-05,
10075
+ "loss": 0.2878,
10076
+ "step": 839000
10077
+ },
10078
+ {
10079
+ "epoch": 1.22,
10080
+ "learning_rate": 1.9630551930923155e-05,
10081
+ "loss": 0.288,
10082
+ "step": 839500
10083
+ },
10084
+ {
10085
+ "epoch": 1.22,
10086
+ "learning_rate": 1.9572046332969825e-05,
10087
+ "loss": 0.2881,
10088
+ "step": 840000
10089
  }
10090
  ],
10091
  "max_steps": 1000000,
10092
  "num_train_epochs": 2,
10093
+ "total_flos": 5.678984079326211e+22,
10094
  "trial_name": null,
10095
  "trial_params": null
10096
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b63e12b39341c1c8d25b10dc8ca07cde5e82303726c5b23ac1fdd24345f08c2a
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80793dabaaad0486a9c6c7d32363ba477ae4d785ddabb640496bf016359dc491
3
  size 449450757