jflotz commited on
Commit
b58a356
·
1 Parent(s): 72c9bd7

Training in progress, step 960000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:555f1620da14174bf24cf8a2c50966c673718f13caf9ff9216cb282d58986be2
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04f217d4f2435c53b27b3ffb23b807fa09f40f06e34bfe7a070589d6890dd66f
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92c3d51ab918ac4532e1d6eeab8c0b6a6ea719ff69a5dc804d4995968ab632fc
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b5376abe825ff6d04d360831bbfd37e2e2d959d6e490763218e7e38b5c10ca1
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76d771b6aa86b6db5c1d3a18a5ba01d5f7ff8a339c98c29586734738700dc44c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:351338e637aa543d98ac6400f2e05e86270a6a5900e20a3e790dbfa3cb26dbef
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.238091520471098,
5
- "global_step": 950000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -19006,11 +19006,211 @@
19006
  "eval_samples_per_second": 878.965,
19007
  "eval_steps_per_second": 13.776,
19008
  "step": 950000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19009
  }
19010
  ],
19011
  "max_steps": 1000000,
19012
  "num_train_epochs": 12,
19013
- "total_flos": 6.6594700334078225e+22,
19014
  "trial_name": null,
19015
  "trial_params": null
19016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.349620244694021,
5
+ "global_step": 960000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
19006
  "eval_samples_per_second": 878.965,
19007
  "eval_steps_per_second": 13.776,
19008
  "step": 950000
19009
+ },
19010
+ {
19011
+ "epoch": 10.24,
19012
+ "learning_rate": 1.0935752500982175e-05,
19013
+ "loss": 0.1805,
19014
+ "step": 950500
19015
+ },
19016
+ {
19017
+ "epoch": 10.25,
19018
+ "learning_rate": 1.091698505917036e-05,
19019
+ "loss": 0.1804,
19020
+ "step": 951000
19021
+ },
19022
+ {
19023
+ "epoch": 10.25,
19024
+ "eval_loss": 0.1698637306690216,
19025
+ "eval_runtime": 2.5965,
19026
+ "eval_samples_per_second": 884.656,
19027
+ "eval_steps_per_second": 13.865,
19028
+ "step": 951000
19029
+ },
19030
+ {
19031
+ "epoch": 10.25,
19032
+ "learning_rate": 1.0898406487683472e-05,
19033
+ "loss": 0.1805,
19034
+ "step": 951500
19035
+ },
19036
+ {
19037
+ "epoch": 10.26,
19038
+ "learning_rate": 1.0880016837314599e-05,
19039
+ "loss": 0.1803,
19040
+ "step": 952000
19041
+ },
19042
+ {
19043
+ "epoch": 10.26,
19044
+ "eval_loss": 0.17085076868534088,
19045
+ "eval_runtime": 2.596,
19046
+ "eval_samples_per_second": 884.806,
19047
+ "eval_steps_per_second": 13.867,
19048
+ "step": 952000
19049
+ },
19050
+ {
19051
+ "epoch": 10.27,
19052
+ "learning_rate": 1.0861816158340365e-05,
19053
+ "loss": 0.1807,
19054
+ "step": 952500
19055
+ },
19056
+ {
19057
+ "epoch": 10.27,
19058
+ "learning_rate": 1.084380450052071e-05,
19059
+ "loss": 0.1803,
19060
+ "step": 953000
19061
+ },
19062
+ {
19063
+ "epoch": 10.27,
19064
+ "eval_loss": 0.17190536856651306,
19065
+ "eval_runtime": 2.595,
19066
+ "eval_samples_per_second": 885.153,
19067
+ "eval_steps_per_second": 13.873,
19068
+ "step": 953000
19069
+ },
19070
+ {
19071
+ "epoch": 10.28,
19072
+ "learning_rate": 1.0825981913098828e-05,
19073
+ "loss": 0.1799,
19074
+ "step": 953500
19075
+ },
19076
+ {
19077
+ "epoch": 10.28,
19078
+ "learning_rate": 1.0808348444801e-05,
19079
+ "loss": 0.1802,
19080
+ "step": 954000
19081
+ },
19082
+ {
19083
+ "epoch": 10.28,
19084
+ "eval_loss": 0.16949187219142914,
19085
+ "eval_runtime": 2.6166,
19086
+ "eval_samples_per_second": 877.869,
19087
+ "eval_steps_per_second": 13.758,
19088
+ "step": 954000
19089
+ },
19090
+ {
19091
+ "epoch": 10.29,
19092
+ "learning_rate": 1.0790904143836438e-05,
19093
+ "loss": 0.1804,
19094
+ "step": 954500
19095
+ },
19096
+ {
19097
+ "epoch": 10.29,
19098
+ "learning_rate": 1.0773649057897206e-05,
19099
+ "loss": 0.1802,
19100
+ "step": 955000
19101
+ },
19102
+ {
19103
+ "epoch": 10.29,
19104
+ "eval_loss": 0.16995471715927124,
19105
+ "eval_runtime": 2.6165,
19106
+ "eval_samples_per_second": 877.886,
19107
+ "eval_steps_per_second": 13.759,
19108
+ "step": 955000
19109
+ },
19110
+ {
19111
+ "epoch": 10.3,
19112
+ "learning_rate": 1.0756583234158057e-05,
19113
+ "loss": 0.1799,
19114
+ "step": 955500
19115
+ },
19116
+ {
19117
+ "epoch": 10.31,
19118
+ "learning_rate": 1.073970671927628e-05,
19119
+ "loss": 0.1802,
19120
+ "step": 956000
19121
+ },
19122
+ {
19123
+ "epoch": 10.31,
19124
+ "eval_loss": 0.17191793024539948,
19125
+ "eval_runtime": 2.6164,
19126
+ "eval_samples_per_second": 877.931,
19127
+ "eval_steps_per_second": 13.759,
19128
+ "step": 956000
19129
+ },
19130
+ {
19131
+ "epoch": 10.31,
19132
+ "learning_rate": 1.0723019559391643e-05,
19133
+ "loss": 0.1804,
19134
+ "step": 956500
19135
+ },
19136
+ {
19137
+ "epoch": 10.32,
19138
+ "learning_rate": 1.0706521800126198e-05,
19139
+ "loss": 0.18,
19140
+ "step": 957000
19141
+ },
19142
+ {
19143
+ "epoch": 10.32,
19144
+ "eval_loss": 0.17065568268299103,
19145
+ "eval_runtime": 2.6326,
19146
+ "eval_samples_per_second": 872.521,
19147
+ "eval_steps_per_second": 13.675,
19148
+ "step": 957000
19149
+ },
19150
+ {
19151
+ "epoch": 10.32,
19152
+ "learning_rate": 1.0690213486584175e-05,
19153
+ "loss": 0.18,
19154
+ "step": 957500
19155
+ },
19156
+ {
19157
+ "epoch": 10.33,
19158
+ "learning_rate": 1.0674094663351906e-05,
19159
+ "loss": 0.18,
19160
+ "step": 958000
19161
+ },
19162
+ {
19163
+ "epoch": 10.33,
19164
+ "eval_loss": 0.1698225736618042,
19165
+ "eval_runtime": 2.6744,
19166
+ "eval_samples_per_second": 858.898,
19167
+ "eval_steps_per_second": 13.461,
19168
+ "step": 958000
19169
+ },
19170
+ {
19171
+ "epoch": 10.33,
19172
+ "learning_rate": 1.0658165374497611e-05,
19173
+ "loss": 0.1804,
19174
+ "step": 958500
19175
+ },
19176
+ {
19177
+ "epoch": 10.34,
19178
+ "learning_rate": 1.0642425663571383e-05,
19179
+ "loss": 0.1802,
19180
+ "step": 959000
19181
+ },
19182
+ {
19183
+ "epoch": 10.34,
19184
+ "eval_loss": 0.17188780009746552,
19185
+ "eval_runtime": 2.6352,
19186
+ "eval_samples_per_second": 871.663,
19187
+ "eval_steps_per_second": 13.661,
19188
+ "step": 959000
19189
+ },
19190
+ {
19191
+ "epoch": 10.34,
19192
+ "learning_rate": 1.062687557360497e-05,
19193
+ "loss": 0.1802,
19194
+ "step": 959500
19195
+ },
19196
+ {
19197
+ "epoch": 10.35,
19198
+ "learning_rate": 1.0611515147111736e-05,
19199
+ "loss": 0.1802,
19200
+ "step": 960000
19201
+ },
19202
+ {
19203
+ "epoch": 10.35,
19204
+ "eval_loss": 0.16846837103366852,
19205
+ "eval_runtime": 2.7425,
19206
+ "eval_samples_per_second": 837.549,
19207
+ "eval_steps_per_second": 13.127,
19208
+ "step": 960000
19209
  }
19210
  ],
19211
  "max_steps": 1000000,
19212
  "num_train_epochs": 12,
19213
+ "total_flos": 6.72957029443817e+22,
19214
  "trial_name": null,
19215
  "trial_params": null
19216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92c3d51ab918ac4532e1d6eeab8c0b6a6ea719ff69a5dc804d4995968ab632fc
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b5376abe825ff6d04d360831bbfd37e2e2d959d6e490763218e7e38b5c10ca1
3
  size 449471589