jflotz commited on
Commit
5e11f4e
·
1 Parent(s): c1407b6

Training in progress, step 860000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e51d88aff5257549a283e52c14c7816f109ed1c8f1cd4c0209be1013bd750037
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15a77d7445b898e81ba1dbf302ad4aa88b3930b40801531ded9875b083edd127
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c2b61d52c517c0fe4a2eaad25ea86eca5fed6ebf386f54b8df5e3364654d10f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9da54e69aa4d985aac499cdb17fab8ee1c2bb36a1855c776c5bda5c5106d784
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fbbdae9c95471a40e6c6d019353b081fa6055bc839ed4f2163c0c1b80837934
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1af16532ed7776301ec2b0d23baf8c67ba74ec07e3f7e0782860705643ea3c80
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.122804278241862,
5
- "global_step": 850000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -17006,11 +17006,211 @@
17006
  "eval_samples_per_second": 857.76,
17007
  "eval_steps_per_second": 13.443,
17008
  "step": 850000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17009
  }
17010
  ],
17011
  "max_steps": 1000000,
17012
  "num_train_epochs": 12,
17013
- "total_flos": 5.958472899551867e+22,
17014
  "trial_name": null,
17015
  "trial_params": null
17016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.234333002464785,
5
+ "global_step": 860000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
17006
  "eval_samples_per_second": 857.76,
17007
  "eval_steps_per_second": 13.443,
17008
  "step": 850000
17009
+ },
17010
+ {
17011
+ "epoch": 9.13,
17012
+ "learning_rate": 1.838183407087156e-05,
17013
+ "loss": 0.1843,
17014
+ "step": 850500
17015
+ },
17016
+ {
17017
+ "epoch": 9.13,
17018
+ "learning_rate": 1.8326999187910095e-05,
17019
+ "loss": 0.1843,
17020
+ "step": 851000
17021
+ },
17022
+ {
17023
+ "epoch": 9.13,
17024
+ "eval_loss": 0.17367926239967346,
17025
+ "eval_runtime": 2.6854,
17026
+ "eval_samples_per_second": 855.351,
17027
+ "eval_steps_per_second": 13.406,
17028
+ "step": 851000
17029
+ },
17030
+ {
17031
+ "epoch": 9.14,
17032
+ "learning_rate": 1.8272332916577875e-05,
17033
+ "loss": 0.1846,
17034
+ "step": 851500
17035
+ },
17036
+ {
17037
+ "epoch": 9.15,
17038
+ "learning_rate": 1.8217835406330415e-05,
17039
+ "loss": 0.1844,
17040
+ "step": 852000
17041
+ },
17042
+ {
17043
+ "epoch": 9.15,
17044
+ "eval_loss": 0.17384441196918488,
17045
+ "eval_runtime": 2.6384,
17046
+ "eval_samples_per_second": 870.617,
17047
+ "eval_steps_per_second": 13.645,
17048
+ "step": 852000
17049
+ },
17050
+ {
17051
+ "epoch": 9.15,
17052
+ "learning_rate": 1.81635068061618e-05,
17053
+ "loss": 0.1844,
17054
+ "step": 852500
17055
+ },
17056
+ {
17057
+ "epoch": 9.16,
17058
+ "learning_rate": 1.810934726460436e-05,
17059
+ "loss": 0.1845,
17060
+ "step": 853000
17061
+ },
17062
+ {
17063
+ "epoch": 9.16,
17064
+ "eval_loss": 0.17389260232448578,
17065
+ "eval_runtime": 2.6711,
17066
+ "eval_samples_per_second": 859.942,
17067
+ "eval_steps_per_second": 13.478,
17068
+ "step": 853000
17069
+ },
17070
+ {
17071
+ "epoch": 9.16,
17072
+ "learning_rate": 1.80553569297282e-05,
17073
+ "loss": 0.1843,
17074
+ "step": 853500
17075
+ },
17076
+ {
17077
+ "epoch": 9.17,
17078
+ "learning_rate": 1.800153594914084e-05,
17079
+ "loss": 0.1843,
17080
+ "step": 854000
17081
+ },
17082
+ {
17083
+ "epoch": 9.17,
17084
+ "eval_loss": 0.17477978765964508,
17085
+ "eval_runtime": 2.681,
17086
+ "eval_samples_per_second": 856.769,
17087
+ "eval_steps_per_second": 13.428,
17088
+ "step": 854000
17089
+ },
17090
+ {
17091
+ "epoch": 9.17,
17092
+ "learning_rate": 1.7947884469986816e-05,
17093
+ "loss": 0.1841,
17094
+ "step": 854500
17095
+ },
17096
+ {
17097
+ "epoch": 9.18,
17098
+ "learning_rate": 1.7894402638947176e-05,
17099
+ "loss": 0.1841,
17100
+ "step": 855000
17101
+ },
17102
+ {
17103
+ "epoch": 9.18,
17104
+ "eval_loss": 0.1744370311498642,
17105
+ "eval_runtime": 2.6199,
17106
+ "eval_samples_per_second": 876.754,
17107
+ "eval_steps_per_second": 13.741,
17108
+ "step": 855000
17109
+ },
17110
+ {
17111
+ "epoch": 9.18,
17112
+ "learning_rate": 1.7841090602239237e-05,
17113
+ "loss": 0.1841,
17114
+ "step": 855500
17115
+ },
17116
+ {
17117
+ "epoch": 9.19,
17118
+ "learning_rate": 1.778794850561604e-05,
17119
+ "loss": 0.1844,
17120
+ "step": 856000
17121
+ },
17122
+ {
17123
+ "epoch": 9.19,
17124
+ "eval_loss": 0.17599613964557648,
17125
+ "eval_runtime": 2.6232,
17126
+ "eval_samples_per_second": 875.655,
17127
+ "eval_steps_per_second": 13.724,
17128
+ "step": 856000
17129
+ },
17130
+ {
17131
+ "epoch": 9.2,
17132
+ "learning_rate": 1.7734976494366073e-05,
17133
+ "loss": 0.1837,
17134
+ "step": 856500
17135
+ },
17136
+ {
17137
+ "epoch": 9.2,
17138
+ "learning_rate": 1.7682174713312805e-05,
17139
+ "loss": 0.1843,
17140
+ "step": 857000
17141
+ },
17142
+ {
17143
+ "epoch": 9.2,
17144
+ "eval_loss": 0.17385347187519073,
17145
+ "eval_runtime": 2.7089,
17146
+ "eval_samples_per_second": 847.948,
17147
+ "eval_steps_per_second": 13.29,
17148
+ "step": 857000
17149
+ },
17150
+ {
17151
+ "epoch": 9.21,
17152
+ "learning_rate": 1.7629543306814255e-05,
17153
+ "loss": 0.1838,
17154
+ "step": 857500
17155
+ },
17156
+ {
17157
+ "epoch": 9.21,
17158
+ "learning_rate": 1.75770824187627e-05,
17159
+ "loss": 0.1839,
17160
+ "step": 858000
17161
+ },
17162
+ {
17163
+ "epoch": 9.21,
17164
+ "eval_loss": 0.17458127439022064,
17165
+ "eval_runtime": 2.75,
17166
+ "eval_samples_per_second": 835.274,
17167
+ "eval_steps_per_second": 13.091,
17168
+ "step": 858000
17169
+ },
17170
+ {
17171
+ "epoch": 9.22,
17172
+ "learning_rate": 1.7524792192584186e-05,
17173
+ "loss": 0.1843,
17174
+ "step": 858500
17175
+ },
17176
+ {
17177
+ "epoch": 9.22,
17178
+ "learning_rate": 1.747267277123821e-05,
17179
+ "loss": 0.1839,
17180
+ "step": 859000
17181
+ },
17182
+ {
17183
+ "epoch": 9.22,
17184
+ "eval_loss": 0.1746589094400406,
17185
+ "eval_runtime": 2.6359,
17186
+ "eval_samples_per_second": 871.432,
17187
+ "eval_steps_per_second": 13.658,
17188
+ "step": 859000
17189
+ },
17190
+ {
17191
+ "epoch": 9.23,
17192
+ "learning_rate": 1.74207242972173e-05,
17193
+ "loss": 0.1837,
17194
+ "step": 859500
17195
+ },
17196
+ {
17197
+ "epoch": 9.23,
17198
+ "learning_rate": 1.7368946912546556e-05,
17199
+ "loss": 0.1836,
17200
+ "step": 860000
17201
+ },
17202
+ {
17203
+ "epoch": 9.23,
17204
+ "eval_loss": 0.1775263249874115,
17205
+ "eval_runtime": 2.6639,
17206
+ "eval_samples_per_second": 862.261,
17207
+ "eval_steps_per_second": 13.514,
17208
+ "step": 860000
17209
  }
17210
  ],
17211
  "max_steps": 1000000,
17212
  "num_train_epochs": 12,
17213
+ "total_flos": 6.028573160582214e+22,
17214
  "trial_name": null,
17215
  "trial_params": null
17216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c2b61d52c517c0fe4a2eaad25ea86eca5fed6ebf386f54b8df5e3364654d10f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9da54e69aa4d985aac499cdb17fab8ee1c2bb36a1855c776c5bda5c5106d784
3
  size 449471589