jflotz commited on
Commit
35448ec
·
1 Parent(s): 33d542f

Training in progress, step 460000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6931208249dada9481d4f15fa6a8eb29e66faab6c960f940cb42178be058ede5
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b547411c79d357104c70e5661a49a33c4c727f3979f31a84e358578efcb1bf5d
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96730bdf9c613274e5002868e7a4d31f7cf6da025343ac9c04b48c36b22d877f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9908cf9403ac4306b20a0d3eb86bc03da4da8a781371917c67d6447e4350b51
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc29c434fb0390a8f4f90d65ac745a0b4f381dbd06e857762d450d4a464c7045
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60fd80961b777bf4901f5c7189278f8f31f61a50c51a19e170f6a1919a5ce33
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.037473233404711,
5
- "global_step": 450000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9006,11 +9006,211 @@
9006
  "eval_samples_per_second": 1139.483,
9007
  "eval_steps_per_second": 17.859,
9008
  "step": 450000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9009
  }
9010
  ],
9011
  "max_steps": 500000,
9012
  "num_train_epochs": 12,
9013
- "total_flos": 1.4376636702238347e+22,
9014
  "trial_name": null,
9015
  "trial_params": null
9016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.260528194147037,
5
+ "global_step": 460000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9006
  "eval_samples_per_second": 1139.483,
9007
  "eval_steps_per_second": 17.859,
9008
  "step": 450000
9009
+ },
9010
+ {
9011
+ "epoch": 10.05,
9012
+ "learning_rate": 1.770155470293445e-05,
9013
+ "loss": 0.2555,
9014
+ "step": 450500
9015
+ },
9016
+ {
9017
+ "epoch": 10.06,
9018
+ "learning_rate": 1.7548110926261522e-05,
9019
+ "loss": 0.2557,
9020
+ "step": 451000
9021
+ },
9022
+ {
9023
+ "epoch": 10.06,
9024
+ "eval_loss": 0.23789702355861664,
9025
+ "eval_runtime": 2.0142,
9026
+ "eval_samples_per_second": 1140.419,
9027
+ "eval_steps_per_second": 17.873,
9028
+ "step": 451000
9029
+ },
9030
+ {
9031
+ "epoch": 10.07,
9032
+ "learning_rate": 1.7396170301425777e-05,
9033
+ "loss": 0.2558,
9034
+ "step": 451500
9035
+ },
9036
+ {
9037
+ "epoch": 10.08,
9038
+ "learning_rate": 1.7245734490025544e-05,
9039
+ "loss": 0.2556,
9040
+ "step": 452000
9041
+ },
9042
+ {
9043
+ "epoch": 10.08,
9044
+ "eval_loss": 0.2400251030921936,
9045
+ "eval_runtime": 1.9794,
9046
+ "eval_samples_per_second": 1160.467,
9047
+ "eval_steps_per_second": 18.188,
9048
+ "step": 452000
9049
+ },
9050
+ {
9051
+ "epoch": 10.09,
9052
+ "learning_rate": 1.7096805137202738e-05,
9053
+ "loss": 0.2559,
9054
+ "step": 452500
9055
+ },
9056
+ {
9057
+ "epoch": 10.1,
9058
+ "learning_rate": 1.6949383871624917e-05,
9059
+ "loss": 0.2556,
9060
+ "step": 453000
9061
+ },
9062
+ {
9063
+ "epoch": 10.1,
9064
+ "eval_loss": 0.24092479050159454,
9065
+ "eval_runtime": 1.9634,
9066
+ "eval_samples_per_second": 1169.917,
9067
+ "eval_steps_per_second": 18.336,
9068
+ "step": 453000
9069
+ },
9070
+ {
9071
+ "epoch": 10.12,
9072
+ "learning_rate": 1.6803472305467368e-05,
9073
+ "loss": 0.2557,
9074
+ "step": 453500
9075
+ },
9076
+ {
9077
+ "epoch": 10.13,
9078
+ "learning_rate": 1.665907203439568e-05,
9079
+ "loss": 0.2557,
9080
+ "step": 454000
9081
+ },
9082
+ {
9083
+ "epoch": 10.13,
9084
+ "eval_loss": 0.23987002670764923,
9085
+ "eval_runtime": 1.9519,
9086
+ "eval_samples_per_second": 1176.785,
9087
+ "eval_steps_per_second": 18.443,
9088
+ "step": 454000
9089
+ },
9090
+ {
9091
+ "epoch": 10.14,
9092
+ "learning_rate": 1.6516184637548058e-05,
9093
+ "loss": 0.257,
9094
+ "step": 454500
9095
+ },
9096
+ {
9097
+ "epoch": 10.15,
9098
+ "learning_rate": 1.6374811677518142e-05,
9099
+ "loss": 0.2555,
9100
+ "step": 455000
9101
+ },
9102
+ {
9103
+ "epoch": 10.15,
9104
+ "eval_loss": 0.2387952208518982,
9105
+ "eval_runtime": 2.0231,
9106
+ "eval_samples_per_second": 1135.362,
9107
+ "eval_steps_per_second": 17.794,
9108
+ "step": 455000
9109
+ },
9110
+ {
9111
+ "epoch": 10.16,
9112
+ "learning_rate": 1.6234954700338025e-05,
9113
+ "loss": 0.2555,
9114
+ "step": 455500
9115
+ },
9116
+ {
9117
+ "epoch": 10.17,
9118
+ "learning_rate": 1.6096615235461148e-05,
9119
+ "loss": 0.2549,
9120
+ "step": 456000
9121
+ },
9122
+ {
9123
+ "epoch": 10.17,
9124
+ "eval_loss": 0.23763985931873322,
9125
+ "eval_runtime": 2.0435,
9126
+ "eval_samples_per_second": 1124.037,
9127
+ "eval_steps_per_second": 17.617,
9128
+ "step": 456000
9129
+ },
9130
+ {
9131
+ "epoch": 10.18,
9132
+ "learning_rate": 1.59597947957458e-05,
9133
+ "loss": 0.2557,
9134
+ "step": 456500
9135
+ },
9136
+ {
9137
+ "epoch": 10.19,
9138
+ "learning_rate": 1.5824494877438344e-05,
9139
+ "loss": 0.2551,
9140
+ "step": 457000
9141
+ },
9142
+ {
9143
+ "epoch": 10.19,
9144
+ "eval_loss": 0.23833227157592773,
9145
+ "eval_runtime": 2.01,
9146
+ "eval_samples_per_second": 1142.798,
9147
+ "eval_steps_per_second": 17.911,
9148
+ "step": 457000
9149
+ },
9150
+ {
9151
+ "epoch": 10.2,
9152
+ "learning_rate": 1.569071696015702e-05,
9153
+ "loss": 0.2549,
9154
+ "step": 457500
9155
+ },
9156
+ {
9157
+ "epoch": 10.22,
9158
+ "learning_rate": 1.555846250687569e-05,
9159
+ "loss": 0.2551,
9160
+ "step": 458000
9161
+ },
9162
+ {
9163
+ "epoch": 10.22,
9164
+ "eval_loss": 0.23883755505084991,
9165
+ "eval_runtime": 1.9432,
9166
+ "eval_samples_per_second": 1182.062,
9167
+ "eval_steps_per_second": 18.526,
9168
+ "step": 458000
9169
+ },
9170
+ {
9171
+ "epoch": 10.23,
9172
+ "learning_rate": 1.542773296390789e-05,
9173
+ "loss": 0.2555,
9174
+ "step": 458500
9175
+ },
9176
+ {
9177
+ "epoch": 10.24,
9178
+ "learning_rate": 1.5298529760890945e-05,
9179
+ "loss": 0.2559,
9180
+ "step": 459000
9181
+ },
9182
+ {
9183
+ "epoch": 10.24,
9184
+ "eval_loss": 0.23782269656658173,
9185
+ "eval_runtime": 1.9879,
9186
+ "eval_samples_per_second": 1155.497,
9187
+ "eval_steps_per_second": 18.11,
9188
+ "step": 459000
9189
+ },
9190
+ {
9191
+ "epoch": 10.25,
9192
+ "learning_rate": 1.5170854310770376e-05,
9193
+ "loss": 0.2557,
9194
+ "step": 459500
9195
+ },
9196
+ {
9197
+ "epoch": 10.26,
9198
+ "learning_rate": 1.5044708009784457e-05,
9199
+ "loss": 0.2555,
9200
+ "step": 460000
9201
+ },
9202
+ {
9203
+ "epoch": 10.26,
9204
+ "eval_loss": 0.23906496167182922,
9205
+ "eval_runtime": 1.9573,
9206
+ "eval_samples_per_second": 1173.539,
9207
+ "eval_steps_per_second": 18.392,
9208
+ "step": 460000
9209
  }
9210
  ],
9211
  "max_steps": 500000,
9212
  "num_train_epochs": 12,
9213
+ "total_flos": 1.4696123841012847e+22,
9214
  "trial_name": null,
9215
  "trial_params": null
9216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96730bdf9c613274e5002868e7a4d31f7cf6da025343ac9c04b48c36b22d877f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9908cf9403ac4306b20a0d3eb86bc03da4da8a781371917c67d6447e4350b51
3
  size 102501541