jflotz commited on
Commit
f00b519
·
1 Parent(s): 2ce75ec

Training in progress, step 560000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2ed2d2ebacd7c7bc94a9d5b359875612bcd82c11229aa6d78716aa5ee16a33b
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b1cc1f37f7674d19f50d0ac065ef69529e5d29f5bb20b814471f7b204857988
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b156aacca311d8af63ef36e1ded9f1bccccc04c94ee9871c381f73fe7e5ad577
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49d2c84e31d61487864f2465a53de7f412017d3a4351764e556c2063f04b645
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69156129ac8d15ff24024e58191195a53f5f104d191324494328fe5de76a632
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69156129ac8d15ff24024e58191195a53f5f104d191324494328fe5de76a632
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69156129ac8d15ff24024e58191195a53f5f104d191324494328fe5de76a632
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69156129ac8d15ff24024e58191195a53f5f104d191324494328fe5de76a632
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69156129ac8d15ff24024e58191195a53f5f104d191324494328fe5de76a632
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69156129ac8d15ff24024e58191195a53f5f104d191324494328fe5de76a632
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69156129ac8d15ff24024e58191195a53f5f104d191324494328fe5de76a632
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69156129ac8d15ff24024e58191195a53f5f104d191324494328fe5de76a632
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8de7df7ff53e44669a043f69e39b55baa82d81ac1777f09e2f6159ffcb51dd66
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af9a25b33d29c3fd157c79676adec6abbe35f3978d907c7efc857fe0437c64ac
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.134079832260799,
5
- "global_step": 550000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -11006,11 +11006,211 @@
11006
  "eval_samples_per_second": 898.971,
11007
  "eval_steps_per_second": 14.089,
11008
  "step": 550000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11009
  }
11010
  ],
11011
  "max_steps": 1000000,
11012
  "num_train_epochs": 12,
11013
- "total_flos": 3.855481497983999e+22,
11014
  "trial_name": null,
11015
  "trial_params": null
11016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.245608556483722,
5
+ "global_step": 560000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
11006
  "eval_samples_per_second": 898.971,
11007
  "eval_steps_per_second": 14.089,
11008
  "step": 550000
11009
+ },
11010
+ {
11011
+ "epoch": 6.14,
11012
+ "learning_rate": 7.410410620480651e-05,
11013
+ "loss": 0.2065,
11014
+ "step": 550500
11015
+ },
11016
+ {
11017
+ "epoch": 6.15,
11018
+ "learning_rate": 7.39887827118248e-05,
11019
+ "loss": 0.2063,
11020
+ "step": 551000
11021
+ },
11022
+ {
11023
+ "epoch": 6.15,
11024
+ "eval_loss": 0.1966421753168106,
11025
+ "eval_runtime": 2.4465,
11026
+ "eval_samples_per_second": 938.899,
11027
+ "eval_steps_per_second": 14.715,
11028
+ "step": 551000
11029
+ },
11030
+ {
11031
+ "epoch": 6.15,
11032
+ "learning_rate": 7.38734756532816e-05,
11033
+ "loss": 0.2062,
11034
+ "step": 551500
11035
+ },
11036
+ {
11037
+ "epoch": 6.16,
11038
+ "learning_rate": 7.375818534442207e-05,
11039
+ "loss": 0.2063,
11040
+ "step": 552000
11041
+ },
11042
+ {
11043
+ "epoch": 6.16,
11044
+ "eval_loss": 0.19533967971801758,
11045
+ "eval_runtime": 2.4902,
11046
+ "eval_samples_per_second": 922.429,
11047
+ "eval_steps_per_second": 14.457,
11048
+ "step": 552000
11049
+ },
11050
+ {
11051
+ "epoch": 6.16,
11052
+ "learning_rate": 7.364291210044542e-05,
11053
+ "loss": 0.2058,
11054
+ "step": 552500
11055
+ },
11056
+ {
11057
+ "epoch": 6.17,
11058
+ "learning_rate": 7.352765623650435e-05,
11059
+ "loss": 0.2061,
11060
+ "step": 553000
11061
+ },
11062
+ {
11063
+ "epoch": 6.17,
11064
+ "eval_loss": 0.1968429684638977,
11065
+ "eval_runtime": 2.5279,
11066
+ "eval_samples_per_second": 908.659,
11067
+ "eval_steps_per_second": 14.241,
11068
+ "step": 553000
11069
+ },
11070
+ {
11071
+ "epoch": 6.17,
11072
+ "learning_rate": 7.341241806770399e-05,
11073
+ "loss": 0.2064,
11074
+ "step": 553500
11075
+ },
11076
+ {
11077
+ "epoch": 6.18,
11078
+ "learning_rate": 7.329719790910108e-05,
11079
+ "loss": 0.2056,
11080
+ "step": 554000
11081
+ },
11082
+ {
11083
+ "epoch": 6.18,
11084
+ "eval_loss": 0.19725964963436127,
11085
+ "eval_runtime": 2.5134,
11086
+ "eval_samples_per_second": 913.905,
11087
+ "eval_steps_per_second": 14.323,
11088
+ "step": 554000
11089
+ },
11090
+ {
11091
+ "epoch": 6.18,
11092
+ "learning_rate": 7.318199607570318e-05,
11093
+ "loss": 0.2057,
11094
+ "step": 554500
11095
+ },
11096
+ {
11097
+ "epoch": 6.19,
11098
+ "learning_rate": 7.30668128824676e-05,
11099
+ "loss": 0.2061,
11100
+ "step": 555000
11101
+ },
11102
+ {
11103
+ "epoch": 6.19,
11104
+ "eval_loss": 0.19744634628295898,
11105
+ "eval_runtime": 2.5342,
11106
+ "eval_samples_per_second": 906.409,
11107
+ "eval_steps_per_second": 14.206,
11108
+ "step": 555000
11109
+ },
11110
+ {
11111
+ "epoch": 6.2,
11112
+ "learning_rate": 7.295164864430088e-05,
11113
+ "loss": 0.2056,
11114
+ "step": 555500
11115
+ },
11116
+ {
11117
+ "epoch": 6.2,
11118
+ "learning_rate": 7.283650367605764e-05,
11119
+ "loss": 0.2062,
11120
+ "step": 556000
11121
+ },
11122
+ {
11123
+ "epoch": 6.2,
11124
+ "eval_loss": 0.19634658098220825,
11125
+ "eval_runtime": 2.4766,
11126
+ "eval_samples_per_second": 927.495,
11127
+ "eval_steps_per_second": 14.536,
11128
+ "step": 556000
11129
+ },
11130
+ {
11131
+ "epoch": 6.21,
11132
+ "learning_rate": 7.272137829253983e-05,
11133
+ "loss": 0.2059,
11134
+ "step": 556500
11135
+ },
11136
+ {
11137
+ "epoch": 6.21,
11138
+ "learning_rate": 7.260627280849581e-05,
11139
+ "loss": 0.2061,
11140
+ "step": 557000
11141
+ },
11142
+ {
11143
+ "epoch": 6.21,
11144
+ "eval_loss": 0.19559474289417267,
11145
+ "eval_runtime": 2.4504,
11146
+ "eval_samples_per_second": 937.415,
11147
+ "eval_steps_per_second": 14.692,
11148
+ "step": 557000
11149
+ },
11150
+ {
11151
+ "epoch": 6.22,
11152
+ "learning_rate": 7.249118753861958e-05,
11153
+ "loss": 0.2056,
11154
+ "step": 557500
11155
+ },
11156
+ {
11157
+ "epoch": 6.22,
11158
+ "learning_rate": 7.23761227975499e-05,
11159
+ "loss": 0.2055,
11160
+ "step": 558000
11161
+ },
11162
+ {
11163
+ "epoch": 6.22,
11164
+ "eval_loss": 0.1950286328792572,
11165
+ "eval_runtime": 2.5356,
11166
+ "eval_samples_per_second": 905.898,
11167
+ "eval_steps_per_second": 14.198,
11168
+ "step": 558000
11169
+ },
11170
+ {
11171
+ "epoch": 6.23,
11172
+ "learning_rate": 7.22610788998694e-05,
11173
+ "loss": 0.2056,
11174
+ "step": 558500
11175
+ },
11176
+ {
11177
+ "epoch": 6.23,
11178
+ "learning_rate": 7.21460561601037e-05,
11179
+ "loss": 0.2055,
11180
+ "step": 559000
11181
+ },
11182
+ {
11183
+ "epoch": 6.23,
11184
+ "eval_loss": 0.19409525394439697,
11185
+ "eval_runtime": 2.5975,
11186
+ "eval_samples_per_second": 884.304,
11187
+ "eval_steps_per_second": 13.859,
11188
+ "step": 559000
11189
+ },
11190
+ {
11191
+ "epoch": 6.24,
11192
+ "learning_rate": 7.203105489272053e-05,
11193
+ "loss": 0.2055,
11194
+ "step": 559500
11195
+ },
11196
+ {
11197
+ "epoch": 6.25,
11198
+ "learning_rate": 7.191607541212897e-05,
11199
+ "loss": 0.2057,
11200
+ "step": 560000
11201
+ },
11202
+ {
11203
+ "epoch": 6.25,
11204
+ "eval_loss": 0.19587305188179016,
11205
+ "eval_runtime": 2.6038,
11206
+ "eval_samples_per_second": 882.169,
11207
+ "eval_steps_per_second": 13.826,
11208
+ "step": 560000
11209
  }
11210
  ],
11211
  "max_steps": 1000000,
11212
  "num_train_epochs": 12,
11213
+ "total_flos": 3.925581759014346e+22,
11214
  "trial_name": null,
11215
  "trial_params": null
11216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b156aacca311d8af63ef36e1ded9f1bccccc04c94ee9871c381f73fe7e5ad577
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49d2c84e31d61487864f2465a53de7f412017d3a4351764e556c2063f04b645
3
  size 449471589