irishprancer commited on
Commit
fc23094
·
verified ·
1 Parent(s): 40accf8

Training in progress, step 1650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7aae9277305b2d9df700cc34a8dc037a2298180d61d92eaa796ca7ab56c96e70
3
  size 774409936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28336134bd8ddf70906cb764611ebfea3bf5a3afbbb671af041067ff337967be
3
  size 774409936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d01c11ea6c82e90ff3fdc15ad57126864459f35e1e55a3ddd0317f43edbf73a
3
  size 1523152378
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa156338fd8ccab5e360df667036f1d13c9e2b05480fa56217dae09e08607ec1
3
  size 1523152378
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b61492d4d4ac91c8cb36a91bca94c844935c290540b245b381a00010d48d1faa
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3c21eb9bef4a8e7d6925d09fa075fb777907e1fbbb9ec2100dea9bbc025fe2a
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:073f2a20599562712b5ae80345a1a15f5aa5830fa2fbb7867b2023b24249dfcf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10c5d4556cbcaf5ad023a1f296eb2ae31f296155e6ce44fd3bf46ac6d504e69b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.00036135767004452646,
3
  "best_model_checkpoint": "./output/checkpoint-750",
4
- "epoch": 1.5151515151515151,
5
  "eval_steps": 150,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1137,6 +1137,119 @@
1137
  "eval_samples_per_second": 11.254,
1138
  "eval_steps_per_second": 11.254,
1139
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1140
  }
1141
  ],
1142
  "logging_steps": 10,
@@ -1156,7 +1269,7 @@
1156
  "attributes": {}
1157
  }
1158
  },
1159
- "total_flos": 1.4154040493133005e+17,
1160
  "train_batch_size": 16,
1161
  "trial_name": null,
1162
  "trial_params": null
 
1
  {
2
  "best_metric": 0.00036135767004452646,
3
  "best_model_checkpoint": "./output/checkpoint-750",
4
+ "epoch": 1.6666666666666665,
5
  "eval_steps": 150,
6
+ "global_step": 1650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1137
  "eval_samples_per_second": 11.254,
1138
  "eval_steps_per_second": 11.254,
1139
  "step": 1500
1140
+ },
1141
+ {
1142
+ "epoch": 1.5252525252525253,
1143
+ "grad_norm": 0.060537345707416534,
1144
+ "learning_rate": 4.046160941514079e-06,
1145
+ "loss": 0.0001,
1146
+ "step": 1510
1147
+ },
1148
+ {
1149
+ "epoch": 1.5353535353535355,
1150
+ "grad_norm": 0.011248580180108547,
1151
+ "learning_rate": 4.033533821760917e-06,
1152
+ "loss": 0.0001,
1153
+ "step": 1520
1154
+ },
1155
+ {
1156
+ "epoch": 1.5454545454545454,
1157
+ "grad_norm": 0.005074686370790005,
1158
+ "learning_rate": 4.020843664438783e-06,
1159
+ "loss": 0.0001,
1160
+ "step": 1530
1161
+ },
1162
+ {
1163
+ "epoch": 1.5555555555555556,
1164
+ "grad_norm": 0.003374485531821847,
1165
+ "learning_rate": 4.008090991190341e-06,
1166
+ "loss": 0.0001,
1167
+ "step": 1540
1168
+ },
1169
+ {
1170
+ "epoch": 1.5656565656565657,
1171
+ "grad_norm": 0.008233348838984966,
1172
+ "learning_rate": 3.99527632622804e-06,
1173
+ "loss": 0.0003,
1174
+ "step": 1550
1175
+ },
1176
+ {
1177
+ "epoch": 1.5757575757575757,
1178
+ "grad_norm": 0.8828936219215393,
1179
+ "learning_rate": 3.982400196312565e-06,
1180
+ "loss": 0.0002,
1181
+ "step": 1560
1182
+ },
1183
+ {
1184
+ "epoch": 1.5858585858585859,
1185
+ "grad_norm": 0.07183331996202469,
1186
+ "learning_rate": 3.969463130731183e-06,
1187
+ "loss": 0.0006,
1188
+ "step": 1570
1189
+ },
1190
+ {
1191
+ "epoch": 1.595959595959596,
1192
+ "grad_norm": 0.022035278379917145,
1193
+ "learning_rate": 3.9564656612759904e-06,
1194
+ "loss": 0.0001,
1195
+ "step": 1580
1196
+ },
1197
+ {
1198
+ "epoch": 1.606060606060606,
1199
+ "grad_norm": 0.012215990573167801,
1200
+ "learning_rate": 3.943408322222049e-06,
1201
+ "loss": 0.0001,
1202
+ "step": 1590
1203
+ },
1204
+ {
1205
+ "epoch": 1.6161616161616161,
1206
+ "grad_norm": 0.006953661795705557,
1207
+ "learning_rate": 3.930291650305424e-06,
1208
+ "loss": 0.0006,
1209
+ "step": 1600
1210
+ },
1211
+ {
1212
+ "epoch": 1.6262626262626263,
1213
+ "grad_norm": 0.01029939018189907,
1214
+ "learning_rate": 3.917116184701125e-06,
1215
+ "loss": 0.0001,
1216
+ "step": 1610
1217
+ },
1218
+ {
1219
+ "epoch": 1.6363636363636362,
1220
+ "grad_norm": 0.007781410124152899,
1221
+ "learning_rate": 3.903882467000938e-06,
1222
+ "loss": 0.0005,
1223
+ "step": 1620
1224
+ },
1225
+ {
1226
+ "epoch": 1.6464646464646466,
1227
+ "grad_norm": 0.2221326380968094,
1228
+ "learning_rate": 3.890591041191162e-06,
1229
+ "loss": 0.0001,
1230
+ "step": 1630
1231
+ },
1232
+ {
1233
+ "epoch": 1.6565656565656566,
1234
+ "grad_norm": 0.024798329919576645,
1235
+ "learning_rate": 3.8772424536302565e-06,
1236
+ "loss": 0.0001,
1237
+ "step": 1640
1238
+ },
1239
+ {
1240
+ "epoch": 1.6666666666666665,
1241
+ "grad_norm": 0.013180619105696678,
1242
+ "learning_rate": 3.863837253026372e-06,
1243
+ "loss": 0.0001,
1244
+ "step": 1650
1245
+ },
1246
+ {
1247
+ "epoch": 1.6666666666666665,
1248
+ "eval_loss": 0.0007805961649864912,
1249
+ "eval_runtime": 44.2223,
1250
+ "eval_samples_per_second": 11.307,
1251
+ "eval_steps_per_second": 11.307,
1252
+ "step": 1650
1253
  }
1254
  ],
1255
  "logging_steps": 10,
 
1269
  "attributes": {}
1270
  }
1271
  },
1272
+ "total_flos": 1.5561419544251597e+17,
1273
  "train_batch_size": 16,
1274
  "trial_name": null,
1275
  "trial_params": null