Nadav commited on
Commit
6a219ab
·
1 Parent(s): 1173dbd

Training in progress, step 90000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb0ca75676c80a3c5fd59d0ac1080da68135ad1e20368aa04168b08561f3f84c
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0946b4af042d0003be776279dd8dec310d85422ca12c683daea7163fdc14f58f
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5335ac4e09dbab38db400a30c96e423b02cd136b1548c7b0e7c7dba23f440f1
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:886c14bc146e35ddb8b43369943fdd5ae148eef1a89bed83206fbfc7c2a327ef
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:211fb59b20f9f9f47cec52d841a7a8969719bf6f46a4a3c547f8fd9c6b81c01d
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ab37f55295b05256b0aeab0c9118d1e91676146a55f40c6459b8bea2a5ccfb
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41c9fe17fd2766bc47b158d8b9c3151501d7d3c0213ab927e2871c909d3a891a
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fd1d30ec6a85b35de8f6aedadaea7484a7f1c0d94891107a63ad388e3781007
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6997ee26ce92dcc6722ecbaf41fc375e207a59eb322cd715ff89e49540d1928e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f61de41cc564c25462ca0290993ffba1d92f72b28f091680e90a006d4be7a958
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.966820517286424,
5
- "global_step": 80000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1094,11 +1094,147 @@
1094
  "eval_samples_per_second": 52.447,
1095
  "eval_steps_per_second": 0.829,
1096
  "step": 80000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1097
  }
1098
  ],
1099
  "max_steps": 100000,
1100
  "num_train_epochs": 9,
1101
- "total_flos": 3.767680420025334e+21,
1102
  "trial_name": null,
1103
  "trial_params": null
1104
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.8376730819472264,
5
+ "global_step": 90000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1094
  "eval_samples_per_second": 52.447,
1095
  "eval_steps_per_second": 0.829,
1096
  "step": 80000
1097
+ },
1098
+ {
1099
+ "epoch": 7.01,
1100
+ "learning_rate": 1.827645001383699e-05,
1101
+ "loss": 0.3851,
1102
+ "step": 80500
1103
+ },
1104
+ {
1105
+ "epoch": 7.05,
1106
+ "learning_rate": 1.787137801463301e-05,
1107
+ "loss": 0.3838,
1108
+ "step": 81000
1109
+ },
1110
+ {
1111
+ "epoch": 7.1,
1112
+ "learning_rate": 1.7474742369683822e-05,
1113
+ "loss": 0.3832,
1114
+ "step": 81500
1115
+ },
1116
+ {
1117
+ "epoch": 7.14,
1118
+ "learning_rate": 1.7087461901426146e-05,
1119
+ "loss": 0.3832,
1120
+ "step": 82000
1121
+ },
1122
+ {
1123
+ "epoch": 7.18,
1124
+ "learning_rate": 1.670963216552051e-05,
1125
+ "loss": 0.384,
1126
+ "step": 82500
1127
+ },
1128
+ {
1129
+ "epoch": 7.23,
1130
+ "learning_rate": 1.6342073371699782e-05,
1131
+ "loss": 0.3847,
1132
+ "step": 83000
1133
+ },
1134
+ {
1135
+ "epoch": 7.27,
1136
+ "learning_rate": 1.5983403058409845e-05,
1137
+ "loss": 0.3844,
1138
+ "step": 83500
1139
+ },
1140
+ {
1141
+ "epoch": 7.32,
1142
+ "learning_rate": 1.5634455887471077e-05,
1143
+ "loss": 0.384,
1144
+ "step": 84000
1145
+ },
1146
+ {
1147
+ "epoch": 7.36,
1148
+ "learning_rate": 1.52953179563765e-05,
1149
+ "loss": 0.3841,
1150
+ "step": 84500
1151
+ },
1152
+ {
1153
+ "epoch": 7.4,
1154
+ "learning_rate": 1.496607294233597e-05,
1155
+ "loss": 0.382,
1156
+ "step": 85000
1157
+ },
1158
+ {
1159
+ "epoch": 7.4,
1160
+ "eval_loss": 0.3640451431274414,
1161
+ "eval_runtime": 57.0081,
1162
+ "eval_samples_per_second": 87.707,
1163
+ "eval_steps_per_second": 1.386,
1164
+ "step": 85000
1165
+ },
1166
+ {
1167
+ "epoch": 7.45,
1168
+ "learning_rate": 1.4646802081630117e-05,
1169
+ "loss": 0.3827,
1170
+ "step": 85500
1171
+ },
1172
+ {
1173
+ "epoch": 7.49,
1174
+ "learning_rate": 1.4337584149566472e-05,
1175
+ "loss": 0.3821,
1176
+ "step": 86000
1177
+ },
1178
+ {
1179
+ "epoch": 7.53,
1180
+ "learning_rate": 1.4039083460027203e-05,
1181
+ "loss": 0.3829,
1182
+ "step": 86500
1183
+ },
1184
+ {
1185
+ "epoch": 7.58,
1186
+ "learning_rate": 1.3750177292690856e-05,
1187
+ "loss": 0.3827,
1188
+ "step": 87000
1189
+ },
1190
+ {
1191
+ "epoch": 7.62,
1192
+ "learning_rate": 1.3472092247039123e-05,
1193
+ "loss": 0.383,
1194
+ "step": 87500
1195
+ },
1196
+ {
1197
+ "epoch": 7.66,
1198
+ "learning_rate": 1.3203782390038048e-05,
1199
+ "loss": 0.3823,
1200
+ "step": 88000
1201
+ },
1202
+ {
1203
+ "epoch": 7.71,
1204
+ "learning_rate": 1.2945881505195709e-05,
1205
+ "loss": 0.3816,
1206
+ "step": 88500
1207
+ },
1208
+ {
1209
+ "epoch": 7.75,
1210
+ "learning_rate": 1.2698453225696373e-05,
1211
+ "loss": 0.381,
1212
+ "step": 89000
1213
+ },
1214
+ {
1215
+ "epoch": 7.79,
1216
+ "learning_rate": 1.2461558600765676e-05,
1217
+ "loss": 0.3809,
1218
+ "step": 89500
1219
+ },
1220
+ {
1221
+ "epoch": 7.84,
1222
+ "learning_rate": 1.2235256080607583e-05,
1223
+ "loss": 0.3806,
1224
+ "step": 90000
1225
+ },
1226
+ {
1227
+ "epoch": 7.84,
1228
+ "eval_loss": 0.36302754282951355,
1229
+ "eval_runtime": 77.3446,
1230
+ "eval_samples_per_second": 64.646,
1231
+ "eval_steps_per_second": 1.021,
1232
+ "step": 90000
1233
  }
1234
  ],
1235
  "max_steps": 100000,
1236
  "num_train_epochs": 9,
1237
+ "total_flos": 4.238631778453049e+21,
1238
  "trial_name": null,
1239
  "trial_params": null
1240
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5335ac4e09dbab38db400a30c96e423b02cd136b1548c7b0e7c7dba23f440f1
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:886c14bc146e35ddb8b43369943fdd5ae148eef1a89bed83206fbfc7c2a327ef
3
  size 449471589