jflotz commited on
Commit
fcbebbe
·
1 Parent(s): 3fd55f4

Training in progress, step 810000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05cce4c1769cad62058c494e3844f2366f5491f558c117a5601783f8f4aca538
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20271fe04ea74ef6c1f5d2a4d1320cc4e3cdd71160c77649afb9825f61cfb447
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f6d411bd0a38f2cff0679d6b8d24e5bbf393f3e64bddfcb6100439be517e6e9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cd0bb237b46c76aefe15f9280bc4734a0b83f8b031790d8bd933f10088a4b40
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57cc3662a36c24e948440a9c5383a944373362c15a98a0fb5317ec1e024dd4c4
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80ff99717fc66d7e2670093ac4b787c0d4e68c8bb6b50d5d8a0a59479daaf2a3
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.565160657127244,
5
- "global_step": 800000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -16006,11 +16006,211 @@
16006
  "eval_samples_per_second": 853.408,
16007
  "eval_steps_per_second": 13.375,
16008
  "step": 800000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16009
  }
16010
  ],
16011
  "max_steps": 1000000,
16012
  "num_train_epochs": 12,
16013
- "total_flos": 5.607977070847647e+22,
16014
  "trial_name": null,
16015
  "trial_params": null
16016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.676689381350167,
5
+ "global_step": 810000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
16006
  "eval_samples_per_second": 853.408,
16007
  "eval_steps_per_second": 13.375,
16008
  "step": 800000
16009
+ },
16010
+ {
16011
+ "epoch": 8.57,
16012
+ "learning_rate": 2.4689149133701672e-05,
16013
+ "loss": 0.1867,
16014
+ "step": 800500
16015
+ },
16016
+ {
16017
+ "epoch": 8.58,
16018
+ "learning_rate": 2.461828514290513e-05,
16019
+ "loss": 0.1869,
16020
+ "step": 801000
16021
+ },
16022
+ {
16023
+ "epoch": 8.58,
16024
+ "eval_loss": 0.17704518139362335,
16025
+ "eval_runtime": 2.7376,
16026
+ "eval_samples_per_second": 839.048,
16027
+ "eval_steps_per_second": 13.15,
16028
+ "step": 801000
16029
+ },
16030
+ {
16031
+ "epoch": 8.58,
16032
+ "learning_rate": 2.4547572563602267e-05,
16033
+ "loss": 0.1872,
16034
+ "step": 801500
16035
+ },
16036
+ {
16037
+ "epoch": 8.59,
16038
+ "learning_rate": 2.447701158911855e-05,
16039
+ "loss": 0.1868,
16040
+ "step": 802000
16041
+ },
16042
+ {
16043
+ "epoch": 8.59,
16044
+ "eval_loss": 0.17794357240200043,
16045
+ "eval_runtime": 2.6487,
16046
+ "eval_samples_per_second": 867.219,
16047
+ "eval_steps_per_second": 13.592,
16048
+ "step": 802000
16049
+ },
16050
+ {
16051
+ "epoch": 8.59,
16052
+ "learning_rate": 2.4406602412365027e-05,
16053
+ "loss": 0.187,
16054
+ "step": 802500
16055
+ },
16056
+ {
16057
+ "epoch": 8.6,
16058
+ "learning_rate": 2.4336345225837658e-05,
16059
+ "loss": 0.1872,
16060
+ "step": 803000
16061
+ },
16062
+ {
16063
+ "epoch": 8.6,
16064
+ "eval_loss": 0.1776154637336731,
16065
+ "eval_runtime": 2.7206,
16066
+ "eval_samples_per_second": 844.313,
16067
+ "eval_steps_per_second": 13.233,
16068
+ "step": 803000
16069
+ },
16070
+ {
16071
+ "epoch": 8.6,
16072
+ "learning_rate": 2.4266240221616956e-05,
16073
+ "loss": 0.1873,
16074
+ "step": 803500
16075
+ },
16076
+ {
16077
+ "epoch": 8.61,
16078
+ "learning_rate": 2.4196287591367296e-05,
16079
+ "loss": 0.1868,
16080
+ "step": 804000
16081
+ },
16082
+ {
16083
+ "epoch": 8.61,
16084
+ "eval_loss": 0.176628977060318,
16085
+ "eval_runtime": 2.6337,
16086
+ "eval_samples_per_second": 872.144,
16087
+ "eval_steps_per_second": 13.669,
16088
+ "step": 804000
16089
+ },
16090
+ {
16091
+ "epoch": 8.62,
16092
+ "learning_rate": 2.412648752633649e-05,
16093
+ "loss": 0.1869,
16094
+ "step": 804500
16095
+ },
16096
+ {
16097
+ "epoch": 8.62,
16098
+ "learning_rate": 2.405684021735527e-05,
16099
+ "loss": 0.1866,
16100
+ "step": 805000
16101
+ },
16102
+ {
16103
+ "epoch": 8.62,
16104
+ "eval_loss": 0.1774420291185379,
16105
+ "eval_runtime": 2.6036,
16106
+ "eval_samples_per_second": 882.223,
16107
+ "eval_steps_per_second": 13.827,
16108
+ "step": 805000
16109
+ },
16110
+ {
16111
+ "epoch": 8.63,
16112
+ "learning_rate": 2.39873458548367e-05,
16113
+ "loss": 0.1871,
16114
+ "step": 805500
16115
+ },
16116
+ {
16117
+ "epoch": 8.63,
16118
+ "learning_rate": 2.3918004628775736e-05,
16119
+ "loss": 0.1871,
16120
+ "step": 806000
16121
+ },
16122
+ {
16123
+ "epoch": 8.63,
16124
+ "eval_loss": 0.1766408383846283,
16125
+ "eval_runtime": 2.6553,
16126
+ "eval_samples_per_second": 865.055,
16127
+ "eval_steps_per_second": 13.558,
16128
+ "step": 806000
16129
+ },
16130
+ {
16131
+ "epoch": 8.64,
16132
+ "learning_rate": 2.3848816728748643e-05,
16133
+ "loss": 0.187,
16134
+ "step": 806500
16135
+ },
16136
+ {
16137
+ "epoch": 8.64,
16138
+ "learning_rate": 2.3779782343912463e-05,
16139
+ "loss": 0.1871,
16140
+ "step": 807000
16141
+ },
16142
+ {
16143
+ "epoch": 8.64,
16144
+ "eval_loss": 0.17578239738941193,
16145
+ "eval_runtime": 2.6261,
16146
+ "eval_samples_per_second": 874.674,
16147
+ "eval_steps_per_second": 13.708,
16148
+ "step": 807000
16149
+ },
16150
+ {
16151
+ "epoch": 8.65,
16152
+ "learning_rate": 2.3710901663004604e-05,
16153
+ "loss": 0.1867,
16154
+ "step": 807500
16155
+ },
16156
+ {
16157
+ "epoch": 8.65,
16158
+ "learning_rate": 2.364217487434221e-05,
16159
+ "loss": 0.1867,
16160
+ "step": 808000
16161
+ },
16162
+ {
16163
+ "epoch": 8.65,
16164
+ "eval_loss": 0.17686133086681366,
16165
+ "eval_runtime": 2.5799,
16166
+ "eval_samples_per_second": 890.356,
16167
+ "eval_steps_per_second": 13.954,
16168
+ "step": 808000
16169
+ },
16170
+ {
16171
+ "epoch": 8.66,
16172
+ "learning_rate": 2.3573602165821668e-05,
16173
+ "loss": 0.187,
16174
+ "step": 808500
16175
+ },
16176
+ {
16177
+ "epoch": 8.67,
16178
+ "learning_rate": 2.3505183724918196e-05,
16179
+ "loss": 0.1867,
16180
+ "step": 809000
16181
+ },
16182
+ {
16183
+ "epoch": 8.67,
16184
+ "eval_loss": 0.1787070780992508,
16185
+ "eval_runtime": 2.6677,
16186
+ "eval_samples_per_second": 861.032,
16187
+ "eval_steps_per_second": 13.495,
16188
+ "step": 809000
16189
+ },
16190
+ {
16191
+ "epoch": 8.67,
16192
+ "learning_rate": 2.3436919738685132e-05,
16193
+ "loss": 0.1864,
16194
+ "step": 809500
16195
+ },
16196
+ {
16197
+ "epoch": 8.68,
16198
+ "learning_rate": 2.3368810393753687e-05,
16199
+ "loss": 0.1866,
16200
+ "step": 810000
16201
+ },
16202
+ {
16203
+ "epoch": 8.68,
16204
+ "eval_loss": 0.17782823741436005,
16205
+ "eval_runtime": 2.6203,
16206
+ "eval_samples_per_second": 876.614,
16207
+ "eval_steps_per_second": 13.739,
16208
+ "step": 810000
16209
  }
16210
  ],
16211
  "max_steps": 1000000,
16212
  "num_train_epochs": 12,
16213
+ "total_flos": 5.678077331877994e+22,
16214
  "trial_name": null,
16215
  "trial_params": null
16216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f6d411bd0a38f2cff0679d6b8d24e5bbf393f3e64bddfcb6100439be517e6e9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cd0bb237b46c76aefe15f9280bc4734a0b83f8b031790d8bd933f10088a4b40
3
  size 449471589