Nadav commited on
Commit
9629aa1
·
1 Parent(s): 56d71dc

Training in progress, step 60000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:498d7b9e3f0d37a83926f59b2cb165bdd46bd0c0742c54170b76d952e02a893c
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abf4891b4d7d549b998893b34a4d7fda74fe4d926dd8829b0fd1f3ebe9e05331
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f33e96cd5ae96ee1eef3ce16c2a488ea23ed489c465e68987d362841b961f8c8
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18a2bbc1e1a7ffff89dca088c920d657df7faf45dedf18ccd7ee5e4d0208668c
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d4c51ae1618ba1804c8e334fa9d680770c5a033bbae09ba0b6215394af3efe5
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6674133d74344ac7df609ac13da3436a0f3992a815e6582851a68fd5a7ad5e18
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79bb4f1c3cf2bd9538fe3d60ad5d10def453ecc1e22bfde183478867d351d65b
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:337ac944c1300b5fad5b6313f7b0d4a29d6bf7750594a32866e96847ec6ec635
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57c4a8546e1f655ebc7fedb20c5c49b5e1c8e9f57c8fc9fd75e9a0ca2f699d21
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19d585522f5a4f4e47c69e3dfd034779da400358487b840d5af1cf4b0f318c1e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.678462061925825,
5
- "global_step": 55000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1106,11 +1106,111 @@
1106
  "eval_samples_per_second": 30.017,
1107
  "eval_steps_per_second": 0.961,
1108
  "step": 55000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1109
  }
1110
  ],
1111
  "max_steps": 1000000,
1112
  "num_train_epochs": 86,
1113
- "total_flos": 2.530116704751743e+21,
1114
  "trial_name": null,
1115
  "trial_params": null
1116
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.103776794828173,
5
+ "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1106
  "eval_samples_per_second": 30.017,
1107
  "eval_steps_per_second": 0.961,
1108
  "step": 55000
1109
+ },
1110
+ {
1111
+ "epoch": 4.72,
1112
+ "learning_rate": 9.999999999999999e-06,
1113
+ "loss": 0.411,
1114
+ "step": 55500
1115
+ },
1116
+ {
1117
+ "epoch": 4.76,
1118
+ "learning_rate": 9.999999999999999e-06,
1119
+ "loss": 0.4108,
1120
+ "step": 56000
1121
+ },
1122
+ {
1123
+ "epoch": 4.76,
1124
+ "eval_loss": 0.38048413395881653,
1125
+ "eval_runtime": 27.2288,
1126
+ "eval_samples_per_second": 18.363,
1127
+ "eval_steps_per_second": 0.588,
1128
+ "step": 56000
1129
+ },
1130
+ {
1131
+ "epoch": 4.81,
1132
+ "learning_rate": 9.999999999999999e-06,
1133
+ "loss": 0.4112,
1134
+ "step": 56500
1135
+ },
1136
+ {
1137
+ "epoch": 4.85,
1138
+ "learning_rate": 9.999999999999999e-06,
1139
+ "loss": 0.4098,
1140
+ "step": 57000
1141
+ },
1142
+ {
1143
+ "epoch": 4.85,
1144
+ "eval_loss": 0.3806820809841156,
1145
+ "eval_runtime": 28.3585,
1146
+ "eval_samples_per_second": 17.631,
1147
+ "eval_steps_per_second": 0.564,
1148
+ "step": 57000
1149
+ },
1150
+ {
1151
+ "epoch": 4.89,
1152
+ "learning_rate": 9.999999999999999e-06,
1153
+ "loss": 0.4083,
1154
+ "step": 57500
1155
+ },
1156
+ {
1157
+ "epoch": 4.93,
1158
+ "learning_rate": 9.999999999999999e-06,
1159
+ "loss": 0.4097,
1160
+ "step": 58000
1161
+ },
1162
+ {
1163
+ "epoch": 4.93,
1164
+ "eval_loss": 0.38133466243743896,
1165
+ "eval_runtime": 31.8927,
1166
+ "eval_samples_per_second": 15.678,
1167
+ "eval_steps_per_second": 0.502,
1168
+ "step": 58000
1169
+ },
1170
+ {
1171
+ "epoch": 4.98,
1172
+ "learning_rate": 9.999999999999999e-06,
1173
+ "loss": 0.41,
1174
+ "step": 58500
1175
+ },
1176
+ {
1177
+ "epoch": 5.02,
1178
+ "learning_rate": 9.999999999999999e-06,
1179
+ "loss": 0.4098,
1180
+ "step": 59000
1181
+ },
1182
+ {
1183
+ "epoch": 5.02,
1184
+ "eval_loss": 0.380397766828537,
1185
+ "eval_runtime": 29.3164,
1186
+ "eval_samples_per_second": 17.055,
1187
+ "eval_steps_per_second": 0.546,
1188
+ "step": 59000
1189
+ },
1190
+ {
1191
+ "epoch": 5.06,
1192
+ "learning_rate": 9.999999999999999e-06,
1193
+ "loss": 0.4094,
1194
+ "step": 59500
1195
+ },
1196
+ {
1197
+ "epoch": 5.1,
1198
+ "learning_rate": 9.999999999999999e-06,
1199
+ "loss": 0.4092,
1200
+ "step": 60000
1201
+ },
1202
+ {
1203
+ "epoch": 5.1,
1204
+ "eval_loss": 0.38139721751213074,
1205
+ "eval_runtime": 19.7764,
1206
+ "eval_samples_per_second": 25.283,
1207
+ "eval_steps_per_second": 0.809,
1208
+ "step": 60000
1209
  }
1210
  ],
1211
  "max_steps": 1000000,
1212
  "num_train_epochs": 86,
1213
+ "total_flos": 2.7601170332347956e+21,
1214
  "trial_name": null,
1215
  "trial_params": null
1216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f33e96cd5ae96ee1eef3ce16c2a488ea23ed489c465e68987d362841b961f8c8
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18a2bbc1e1a7ffff89dca088c920d657df7faf45dedf18ccd7ee5e4d0208668c
3
  size 449471589