jflotz commited on
Commit
32bb652
·
1 Parent(s): a39d95d

Training in progress, step 680000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed5bbd83e76bfaea16133e5f4d584916d5b8420b3bb185b8e5801362569d4f69
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55e5d00d2cd7677d6c62be4befe12ceb99e54aa60af5a94e2947d4ae516faaf9
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d66a36b6e02c5a5390d3c9dec0faf002f0a21fa7c7b5ef13a837f052f84e013
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdc51c6b56a4707511277be7ae25e9ceecb4c1ce951b8055606cd763924bc386
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12cb7e1a11524752d8ec0a2746c2da7c87cd4d3afc083cf4a0df43b88ed43337
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5cdde6fe1592597eb282f0c1adb0c3419a3256dc73cdd2ad8141425d3367aa1
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd3471c82ac0fc930f64e5adbb6702a0e555d4edfcc1c2dab4ff36db308349b1
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3c1fcd1d388653574e603211bfc56c68bd7e902268c20d36edbd0c9c2c455ef
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49fa69a09ea23ef88cf7df6a3190bd2ee20d350293163871b5d1dbdf1a735794
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c40db2c616071d5de318f5594c12558c6be71cc3185a20daba66531013c4562
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dac01a61c51f51dbd4c3cc5d50cf7d5af4a9f263667ab541575cfd5deab9645
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abefb15d42bbdbc9d96701958a628e317cbc9e1c294d8e8a76d57bee1d799f4e
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:221b7d8fdca30ab22892af203b971ac82533d02ad7492e4e8b5068d84fa6a3ca
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5923e1314a170c5cdfcf9c517450e0d99f3907dcef2765f816ff70cb6461364
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:548649d9f5f2f112c52d8a0f4a7c44c0a8f1f18e8bb96cd91be7066faf617949
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9887b880e911257bf7ed749ed55b173537ca64c83004df1495db6bddcf868768
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:426865c62dd7c19a97ed19d06fe6fa6770f12ecb7a2997d9a0820ed2f9c93c21
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6acc5ebd016f0085aa2c2381f509e3d8d43d4652dca878fe5c1e4ef96b864c
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e886c38b9308720d6c044b1f01de3ee4919b1d3a6edb19ef015bd4926793ada4
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5eeebb55361b678fa47ae3e37f81a72056ae8266b737b634b72a45e273417dd
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50de55c4a72c38e6722f7cb77ebe9f35ce412c17a797d93c631371b39d861204
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:390e03fe3cc05eeca400d470fa9829ae91cb8c4659d5322fb96cb732dc98e5e1
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3393624634673895,
5
- "global_step": 670000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8046,11 +8046,131 @@
8046
  "learning_rate": 4.770611859149185e-05,
8047
  "loss": 0.2987,
8048
  "step": 670000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8049
  }
8050
  ],
8051
  "max_steps": 1000000,
8052
  "num_train_epochs": 2,
8053
- "total_flos": 4.529677636130487e+22,
8054
  "trial_name": null,
8055
  "trial_params": null
8056
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3593529479967534,
5
+ "global_step": 680000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8046
  "learning_rate": 4.770611859149185e-05,
8047
  "loss": 0.2987,
8048
  "step": 670000
8049
+ },
8050
+ {
8051
+ "epoch": 1.34,
8052
+ "learning_rate": 4.7603473052657374e-05,
8053
+ "loss": 0.2986,
8054
+ "step": 670500
8055
+ },
8056
+ {
8057
+ "epoch": 1.34,
8058
+ "learning_rate": 4.7500916084690564e-05,
8059
+ "loss": 0.298,
8060
+ "step": 671000
8061
+ },
8062
+ {
8063
+ "epoch": 1.34,
8064
+ "learning_rate": 4.7398447967978165e-05,
8065
+ "loss": 0.2991,
8066
+ "step": 671500
8067
+ },
8068
+ {
8069
+ "epoch": 1.34,
8070
+ "learning_rate": 4.729606898266411e-05,
8071
+ "loss": 0.2981,
8072
+ "step": 672000
8073
+ },
8074
+ {
8075
+ "epoch": 1.34,
8076
+ "learning_rate": 4.71937794086487e-05,
8077
+ "loss": 0.2989,
8078
+ "step": 672500
8079
+ },
8080
+ {
8081
+ "epoch": 1.35,
8082
+ "learning_rate": 4.709157952558768e-05,
8083
+ "loss": 0.2984,
8084
+ "step": 673000
8085
+ },
8086
+ {
8087
+ "epoch": 1.35,
8088
+ "learning_rate": 4.698946961289163e-05,
8089
+ "loss": 0.2981,
8090
+ "step": 673500
8091
+ },
8092
+ {
8093
+ "epoch": 1.35,
8094
+ "learning_rate": 4.688744994972514e-05,
8095
+ "loss": 0.2986,
8096
+ "step": 674000
8097
+ },
8098
+ {
8099
+ "epoch": 1.35,
8100
+ "learning_rate": 4.6785520815006085e-05,
8101
+ "loss": 0.2979,
8102
+ "step": 674500
8103
+ },
8104
+ {
8105
+ "epoch": 1.35,
8106
+ "learning_rate": 4.668368248740485e-05,
8107
+ "loss": 0.2984,
8108
+ "step": 675000
8109
+ },
8110
+ {
8111
+ "epoch": 1.35,
8112
+ "learning_rate": 4.658193524534351e-05,
8113
+ "loss": 0.2985,
8114
+ "step": 675500
8115
+ },
8116
+ {
8117
+ "epoch": 1.35,
8118
+ "learning_rate": 4.6480279366995116e-05,
8119
+ "loss": 0.2986,
8120
+ "step": 676000
8121
+ },
8122
+ {
8123
+ "epoch": 1.35,
8124
+ "learning_rate": 4.637871513028303e-05,
8125
+ "loss": 0.2981,
8126
+ "step": 676500
8127
+ },
8128
+ {
8129
+ "epoch": 1.35,
8130
+ "learning_rate": 4.6277242812879914e-05,
8131
+ "loss": 0.2978,
8132
+ "step": 677000
8133
+ },
8134
+ {
8135
+ "epoch": 1.35,
8136
+ "learning_rate": 4.617586269220728e-05,
8137
+ "loss": 0.2978,
8138
+ "step": 677500
8139
+ },
8140
+ {
8141
+ "epoch": 1.36,
8142
+ "learning_rate": 4.607457504543447e-05,
8143
+ "loss": 0.2977,
8144
+ "step": 678000
8145
+ },
8146
+ {
8147
+ "epoch": 1.36,
8148
+ "learning_rate": 4.597338014947801e-05,
8149
+ "loss": 0.2975,
8150
+ "step": 678500
8151
+ },
8152
+ {
8153
+ "epoch": 1.36,
8154
+ "learning_rate": 4.5872278281000955e-05,
8155
+ "loss": 0.2978,
8156
+ "step": 679000
8157
+ },
8158
+ {
8159
+ "epoch": 1.36,
8160
+ "learning_rate": 4.577126971641189e-05,
8161
+ "loss": 0.2978,
8162
+ "step": 679500
8163
+ },
8164
+ {
8165
+ "epoch": 1.36,
8166
+ "learning_rate": 4.567035473186444e-05,
8167
+ "loss": 0.2978,
8168
+ "step": 680000
8169
  }
8170
  ],
8171
  "max_steps": 1000000,
8172
  "num_train_epochs": 2,
8173
+ "total_flos": 4.597284714742531e+22,
8174
  "trial_name": null,
8175
  "trial_params": null
8176
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d66a36b6e02c5a5390d3c9dec0faf002f0a21fa7c7b5ef13a837f052f84e013
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdc51c6b56a4707511277be7ae25e9ceecb4c1ce951b8055606cd763924bc386
3
  size 449450757