jflotz commited on
Commit
463335c
·
1 Parent(s): 157af62

Training in progress, step 600000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bd1f4862280bc4f9f20cf61fc91f853df59d4ca3541b026db7600611b178d50
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3011d318834eca8ab53dbb6676157bffa205e38d5b1da950aa905371e910e8ab
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e98866b99e3ecd40c53531087e5d2e3191cf9fd862f418f5c2164fc106076e1
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1db57f69735f18b491ceac5dcc53e66fc033f8278840d48a2d1d7789a922a3
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab3dcf04e4677cd69d3fbe91cb1ae1601851f3937f4fd0e788d25fd8dd5a9a5c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d5ef830797817d960f06c7d56a345ac3affb87d9629b56b5bc8c9c3338bb01c
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6068ed85b429651ab3dcdb9d8c131e8794f545bafa654289904ead8dcf71a796
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53e8adf2ea40b3fc922a7d11d4e61b79f1bf2e372d1d097107cce4c6dc566b51
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96d9c6af477a0b5f695c5b258b6a01aca5fc7cfa2d38303757339b849dbd7d3e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:198286929a9142846ad67730b33946ccbc3ba475f115c3a0aef90b3b51ce6035
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:567a88294b09dd010f920f0bbc226b5e5f9189a07eafb2d3246224b7bdd85b03
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03eebf4a4ea0cd9321666974d21bc38214f697b5e4b4b439941abd6a346b886d
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56ebaee321096ea2774b004b42a63721b74b8260bf65ca91c67dcab0fe39ddf6
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b03692cd1ad26b8eef58c8c71c41233d4ef014e517e327f8e4798a79a3642d38
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:400b8eee678f22dae7d12f6670127ef9e783a9bd2487ab20a30cddd0d1ae18fc
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4840589ea9729ad2c28a92a929b64277e6de332cdd873f9187cab513a624b326
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ace2fdb32ece5ddc2b6164c2a1d1ed057e126355e32ac6d8db09bc2cd476da7
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1d4e4a138e230b3265d7fc7f258a4846b3b6af6bce4e94e22abcfe742c12213
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90d67e927134f71d028c643f8c9e8a2e214f618d0796c97003c322009d54ac2a
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7cc50ebf91016fa727fad392dd121c0404733da11c9da06aed7a47a81b65900
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c3d2e37603fc85abdaa1469887efb3190e7c384dc663afb9c8011e46abae231
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1317449e5c457cd18ac6087cac07774393562aa747fda3bc1ae1eb6a47f1311
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1794385872324773,
5
- "global_step": 590000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7086,11 +7086,131 @@
7086
  "learning_rate": 6.506902584793773e-05,
7087
  "loss": 0.3043,
7088
  "step": 590000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7089
  }
7090
  ],
7091
  "max_steps": 1000000,
7092
  "num_train_epochs": 2,
7093
- "total_flos": 3.988811683155628e+22,
7094
  "trial_name": null,
7095
  "trial_params": null
7096
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1994290717618414,
5
+ "global_step": 600000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7086
  "learning_rate": 6.506902584793773e-05,
7087
  "loss": 0.3043,
7088
  "step": 590000
7089
+ },
7090
+ {
7091
+ "epoch": 1.18,
7092
+ "learning_rate": 6.495596703323214e-05,
7093
+ "loss": 0.3039,
7094
+ "step": 590500
7095
+ },
7096
+ {
7097
+ "epoch": 1.18,
7098
+ "learning_rate": 6.484294934833822e-05,
7099
+ "loss": 0.3042,
7100
+ "step": 591000
7101
+ },
7102
+ {
7103
+ "epoch": 1.18,
7104
+ "learning_rate": 6.472997310224204e-05,
7105
+ "loss": 0.304,
7106
+ "step": 591500
7107
+ },
7108
+ {
7109
+ "epoch": 1.18,
7110
+ "learning_rate": 6.461703860381628e-05,
7111
+ "loss": 0.3039,
7112
+ "step": 592000
7113
+ },
7114
+ {
7115
+ "epoch": 1.18,
7116
+ "learning_rate": 6.450414616181959e-05,
7117
+ "loss": 0.3041,
7118
+ "step": 592500
7119
+ },
7120
+ {
7121
+ "epoch": 1.19,
7122
+ "learning_rate": 6.439129608489559e-05,
7123
+ "loss": 0.3043,
7124
+ "step": 593000
7125
+ },
7126
+ {
7127
+ "epoch": 1.19,
7128
+ "learning_rate": 6.427848868157208e-05,
7129
+ "loss": 0.3041,
7130
+ "step": 593500
7131
+ },
7132
+ {
7133
+ "epoch": 1.19,
7134
+ "learning_rate": 6.41657242602602e-05,
7135
+ "loss": 0.304,
7136
+ "step": 594000
7137
+ },
7138
+ {
7139
+ "epoch": 1.19,
7140
+ "learning_rate": 6.405300312925353e-05,
7141
+ "loss": 0.3044,
7142
+ "step": 594500
7143
+ },
7144
+ {
7145
+ "epoch": 1.19,
7146
+ "learning_rate": 6.39403255967274e-05,
7147
+ "loss": 0.304,
7148
+ "step": 595000
7149
+ },
7150
+ {
7151
+ "epoch": 1.19,
7152
+ "learning_rate": 6.382769197073783e-05,
7153
+ "loss": 0.3042,
7154
+ "step": 595500
7155
+ },
7156
+ {
7157
+ "epoch": 1.19,
7158
+ "learning_rate": 6.371510255922088e-05,
7159
+ "loss": 0.3041,
7160
+ "step": 596000
7161
+ },
7162
+ {
7163
+ "epoch": 1.19,
7164
+ "learning_rate": 6.360255766999172e-05,
7165
+ "loss": 0.3036,
7166
+ "step": 596500
7167
+ },
7168
+ {
7169
+ "epoch": 1.19,
7170
+ "learning_rate": 6.349005761074372e-05,
7171
+ "loss": 0.3044,
7172
+ "step": 597000
7173
+ },
7174
+ {
7175
+ "epoch": 1.19,
7176
+ "learning_rate": 6.33776026890478e-05,
7177
+ "loss": 0.3038,
7178
+ "step": 597500
7179
+ },
7180
+ {
7181
+ "epoch": 1.2,
7182
+ "learning_rate": 6.326519321235139e-05,
7183
+ "loss": 0.3036,
7184
+ "step": 598000
7185
+ },
7186
+ {
7187
+ "epoch": 1.2,
7188
+ "learning_rate": 6.315282948797776e-05,
7189
+ "loss": 0.3033,
7190
+ "step": 598500
7191
+ },
7192
+ {
7193
+ "epoch": 1.2,
7194
+ "learning_rate": 6.304051182312496e-05,
7195
+ "loss": 0.3034,
7196
+ "step": 599000
7197
+ },
7198
+ {
7199
+ "epoch": 1.2,
7200
+ "learning_rate": 6.292824052486525e-05,
7201
+ "loss": 0.3036,
7202
+ "step": 599500
7203
+ },
7204
+ {
7205
+ "epoch": 1.2,
7206
+ "learning_rate": 6.281601590014407e-05,
7207
+ "loss": 0.3039,
7208
+ "step": 600000
7209
  }
7210
  ],
7211
  "max_steps": 1000000,
7212
  "num_train_epochs": 2,
7213
+ "total_flos": 4.056420774973463e+22,
7214
  "trial_name": null,
7215
  "trial_params": null
7216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e98866b99e3ecd40c53531087e5d2e3191cf9fd862f418f5c2164fc106076e1
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1db57f69735f18b491ceac5dcc53e66fc033f8278840d48a2d1d7789a922a3
3
  size 449450757