ErrorAI commited on
Commit
8873cc7
·
verified ·
1 Parent(s): 683380c

Training in progress, step 204, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:740b9f616f65c988c61dc95217c9c0fdc273f6f55f09725e222b88496c72c59d
3
  size 9823216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f41af54ef48385f1e9240a01ed0e4e9778b1bbdcbbeaa3976f0744a97cee781a
3
  size 9823216
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fea744051c34f8a0de02d059c8345493f41f146deef35aa4b2cb0c6a8429f8b
3
  size 5962860
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d30bc5cfe87f818a6b3f472e279714bdad1dca3906fc14f6922585bbcdc9e13
3
  size 5962860
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bd6748756db41b0863adfb7f8ef25e8a4b1b0052368551cdc549a64c7a648cc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1d1251a5e12d0ab0e0cece07fc1165a0b3630a00ab4bccd575f3d646ed3d1cd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41c7581b7df531d3dbae8c2657d36090edae47306e1ded92753ec3676d20d1b4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e783401ecc99df9e68aa18c0fa2df70c08a04fbed0cb3e6ae60e028c1074e54a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.75,
5
  "eval_steps": 500,
6
- "global_step": 153,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1078,6 +1078,363 @@
1078
  "learning_rate": 1.5349188304533413e-05,
1079
  "loss": 1.9898,
1080
  "step": 153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1081
  }
1082
  ],
1083
  "logging_steps": 1,
@@ -1092,12 +1449,12 @@
1092
  "should_evaluate": false,
1093
  "should_log": false,
1094
  "should_save": true,
1095
- "should_training_stop": false
1096
  },
1097
  "attributes": {}
1098
  }
1099
  },
1100
- "total_flos": 2023918081671168.0,
1101
  "train_batch_size": 4,
1102
  "trial_name": null,
1103
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 204,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1078
  "learning_rate": 1.5349188304533413e-05,
1079
  "loss": 1.9898,
1080
  "step": 153
1081
+ },
1082
+ {
1083
+ "epoch": 0.7549019607843137,
1084
+ "grad_norm": 0.6448200941085815,
1085
+ "learning_rate": 1.4784473717366387e-05,
1086
+ "loss": 2.0231,
1087
+ "step": 154
1088
+ },
1089
+ {
1090
+ "epoch": 0.7598039215686274,
1091
+ "grad_norm": 0.6143677234649658,
1092
+ "learning_rate": 1.4228535577631442e-05,
1093
+ "loss": 1.8083,
1094
+ "step": 155
1095
+ },
1096
+ {
1097
+ "epoch": 0.7647058823529411,
1098
+ "grad_norm": 0.6792232990264893,
1099
+ "learning_rate": 1.3681512436768045e-05,
1100
+ "loss": 2.2576,
1101
+ "step": 156
1102
+ },
1103
+ {
1104
+ "epoch": 0.7696078431372549,
1105
+ "grad_norm": 0.6397396922111511,
1106
+ "learning_rate": 1.314354062441106e-05,
1107
+ "loss": 1.9228,
1108
+ "step": 157
1109
+ },
1110
+ {
1111
+ "epoch": 0.7745098039215687,
1112
+ "grad_norm": 0.6527466773986816,
1113
+ "learning_rate": 1.2614754214414548e-05,
1114
+ "loss": 1.9549,
1115
+ "step": 158
1116
+ },
1117
+ {
1118
+ "epoch": 0.7794117647058824,
1119
+ "grad_norm": 0.558052659034729,
1120
+ "learning_rate": 1.2095284991437733e-05,
1121
+ "loss": 1.6198,
1122
+ "step": 159
1123
+ },
1124
+ {
1125
+ "epoch": 0.7843137254901961,
1126
+ "grad_norm": 0.5108990669250488,
1127
+ "learning_rate": 1.1585262418101467e-05,
1128
+ "loss": 1.7669,
1129
+ "step": 160
1130
+ },
1131
+ {
1132
+ "epoch": 0.7892156862745098,
1133
+ "grad_norm": 0.5814594030380249,
1134
+ "learning_rate": 1.1084813602723515e-05,
1135
+ "loss": 1.8603,
1136
+ "step": 161
1137
+ },
1138
+ {
1139
+ "epoch": 0.7941176470588235,
1140
+ "grad_norm": 0.5403014421463013,
1141
+ "learning_rate": 1.0594063267640386e-05,
1142
+ "loss": 1.6909,
1143
+ "step": 162
1144
+ },
1145
+ {
1146
+ "epoch": 0.7990196078431373,
1147
+ "grad_norm": 0.5353929400444031,
1148
+ "learning_rate": 1.0113133718124035e-05,
1149
+ "loss": 1.8579,
1150
+ "step": 163
1151
+ },
1152
+ {
1153
+ "epoch": 0.803921568627451,
1154
+ "grad_norm": 0.6234535574913025,
1155
+ "learning_rate": 9.642144811900739e-06,
1156
+ "loss": 2.0002,
1157
+ "step": 164
1158
+ },
1159
+ {
1160
+ "epoch": 0.8088235294117647,
1161
+ "grad_norm": 0.5333160161972046,
1162
+ "learning_rate": 9.181213929280046e-06,
1163
+ "loss": 1.7953,
1164
+ "step": 165
1165
+ },
1166
+ {
1167
+ "epoch": 0.8137254901960784,
1168
+ "grad_norm": 0.5747147798538208,
1169
+ "learning_rate": 8.7304559439012e-06,
1170
+ "loss": 2.2429,
1171
+ "step": 166
1172
+ },
1173
+ {
1174
+ "epoch": 0.8186274509803921,
1175
+ "grad_norm": 0.5337631702423096,
1176
+ "learning_rate": 8.28998319410413e-06,
1177
+ "loss": 1.7715,
1178
+ "step": 167
1179
+ },
1180
+ {
1181
+ "epoch": 0.8235294117647058,
1182
+ "grad_norm": 0.6177130341529846,
1183
+ "learning_rate": 7.859905454932471e-06,
1184
+ "loss": 1.684,
1185
+ "step": 168
1186
+ },
1187
+ {
1188
+ "epoch": 0.8284313725490197,
1189
+ "grad_norm": 0.5750278830528259,
1190
+ "learning_rate": 7.440329910775273e-06,
1191
+ "loss": 2.0278,
1192
+ "step": 169
1193
+ },
1194
+ {
1195
+ "epoch": 0.8333333333333334,
1196
+ "grad_norm": 0.5838117599487305,
1197
+ "learning_rate": 7.031361128654401e-06,
1198
+ "loss": 1.9058,
1199
+ "step": 170
1200
+ },
1201
+ {
1202
+ "epoch": 0.8382352941176471,
1203
+ "grad_norm": 0.573087215423584,
1204
+ "learning_rate": 6.633101032164274e-06,
1205
+ "loss": 2.153,
1206
+ "step": 171
1207
+ },
1208
+ {
1209
+ "epoch": 0.8431372549019608,
1210
+ "grad_norm": 0.5336370468139648,
1211
+ "learning_rate": 6.2456488760703205e-06,
1212
+ "loss": 1.773,
1213
+ "step": 172
1214
+ },
1215
+ {
1216
+ "epoch": 0.8480392156862745,
1217
+ "grad_norm": 0.628755509853363,
1218
+ "learning_rate": 5.869101221572654e-06,
1219
+ "loss": 1.949,
1220
+ "step": 173
1221
+ },
1222
+ {
1223
+ "epoch": 0.8529411764705882,
1224
+ "grad_norm": 0.5883850455284119,
1225
+ "learning_rate": 5.5035519122409895e-06,
1226
+ "loss": 1.8569,
1227
+ "step": 174
1228
+ },
1229
+ {
1230
+ "epoch": 0.8578431372549019,
1231
+ "grad_norm": 0.5533519983291626,
1232
+ "learning_rate": 5.149092050626825e-06,
1233
+ "loss": 1.9951,
1234
+ "step": 175
1235
+ },
1236
+ {
1237
+ "epoch": 0.8627450980392157,
1238
+ "grad_norm": 0.5445258021354675,
1239
+ "learning_rate": 4.805809975558828e-06,
1240
+ "loss": 1.8357,
1241
+ "step": 176
1242
+ },
1243
+ {
1244
+ "epoch": 0.8676470588235294,
1245
+ "grad_norm": 0.7583712339401245,
1246
+ "learning_rate": 4.47379124012689e-06,
1247
+ "loss": 1.9937,
1248
+ "step": 177
1249
+ },
1250
+ {
1251
+ "epoch": 0.8725490196078431,
1252
+ "grad_norm": 0.550308883190155,
1253
+ "learning_rate": 4.153118590360561e-06,
1254
+ "loss": 1.7592,
1255
+ "step": 178
1256
+ },
1257
+ {
1258
+ "epoch": 0.8774509803921569,
1259
+ "grad_norm": 0.5869921445846558,
1260
+ "learning_rate": 3.843871944606969e-06,
1261
+ "loss": 1.8233,
1262
+ "step": 179
1263
+ },
1264
+ {
1265
+ "epoch": 0.8823529411764706,
1266
+ "grad_norm": 0.624453067779541,
1267
+ "learning_rate": 3.5461283736134722e-06,
1268
+ "loss": 1.7236,
1269
+ "step": 180
1270
+ },
1271
+ {
1272
+ "epoch": 0.8872549019607843,
1273
+ "grad_norm": 0.5934841632843018,
1274
+ "learning_rate": 3.2599620813200837e-06,
1275
+ "loss": 2.2794,
1276
+ "step": 181
1277
+ },
1278
+ {
1279
+ "epoch": 0.8921568627450981,
1280
+ "grad_norm": 0.6147695183753967,
1281
+ "learning_rate": 2.9854443863662262e-06,
1282
+ "loss": 2.0399,
1283
+ "step": 182
1284
+ },
1285
+ {
1286
+ "epoch": 0.8970588235294118,
1287
+ "grad_norm": 0.6072641611099243,
1288
+ "learning_rate": 2.722643704316652e-06,
1289
+ "loss": 2.0926,
1290
+ "step": 183
1291
+ },
1292
+ {
1293
+ "epoch": 0.9019607843137255,
1294
+ "grad_norm": 0.6701170206069946,
1295
+ "learning_rate": 2.4716255306108605e-06,
1296
+ "loss": 2.0332,
1297
+ "step": 184
1298
+ },
1299
+ {
1300
+ "epoch": 0.9068627450980392,
1301
+ "grad_norm": 0.6028889417648315,
1302
+ "learning_rate": 2.2324524242402613e-06,
1303
+ "loss": 2.066,
1304
+ "step": 185
1305
+ },
1306
+ {
1307
+ "epoch": 0.9117647058823529,
1308
+ "grad_norm": 0.5733740329742432,
1309
+ "learning_rate": 2.0051839921571448e-06,
1310
+ "loss": 2.119,
1311
+ "step": 186
1312
+ },
1313
+ {
1314
+ "epoch": 0.9166666666666666,
1315
+ "grad_norm": 0.6689417958259583,
1316
+ "learning_rate": 1.7898768744194162e-06,
1317
+ "loss": 2.1784,
1318
+ "step": 187
1319
+ },
1320
+ {
1321
+ "epoch": 0.9215686274509803,
1322
+ "grad_norm": 0.6812567114830017,
1323
+ "learning_rate": 1.5865847300746417e-06,
1324
+ "loss": 1.7479,
1325
+ "step": 188
1326
+ },
1327
+ {
1328
+ "epoch": 0.9264705882352942,
1329
+ "grad_norm": 0.6046686172485352,
1330
+ "learning_rate": 1.3953582237871521e-06,
1331
+ "loss": 1.942,
1332
+ "step": 189
1333
+ },
1334
+ {
1335
+ "epoch": 0.9313725490196079,
1336
+ "grad_norm": 0.6390747427940369,
1337
+ "learning_rate": 1.2162450132113201e-06,
1338
+ "loss": 2.1545,
1339
+ "step": 190
1340
+ },
1341
+ {
1342
+ "epoch": 0.9362745098039216,
1343
+ "grad_norm": 0.6433112621307373,
1344
+ "learning_rate": 1.049289737114273e-06,
1345
+ "loss": 1.9614,
1346
+ "step": 191
1347
+ },
1348
+ {
1349
+ "epoch": 0.9411764705882353,
1350
+ "grad_norm": 0.6422625780105591,
1351
+ "learning_rate": 8.945340042509797e-07,
1352
+ "loss": 2.0157,
1353
+ "step": 192
1354
+ },
1355
+ {
1356
+ "epoch": 0.946078431372549,
1357
+ "grad_norm": 0.696194052696228,
1358
+ "learning_rate": 7.520163829944804e-07,
1359
+ "loss": 2.3783,
1360
+ "step": 193
1361
+ },
1362
+ {
1363
+ "epoch": 0.9509803921568627,
1364
+ "grad_norm": 0.6186797618865967,
1365
+ "learning_rate": 6.217723917238128e-07,
1366
+ "loss": 2.0641,
1367
+ "step": 194
1368
+ },
1369
+ {
1370
+ "epoch": 0.9558823529411765,
1371
+ "grad_norm": 0.7474207878112793,
1372
+ "learning_rate": 5.038344899721436e-07,
1373
+ "loss": 2.3323,
1374
+ "step": 195
1375
+ },
1376
+ {
1377
+ "epoch": 0.9607843137254902,
1378
+ "grad_norm": 0.6354583501815796,
1379
+ "learning_rate": 3.9823207033710676e-07,
1380
+ "loss": 2.0189,
1381
+ "step": 196
1382
+ },
1383
+ {
1384
+ "epoch": 0.9656862745098039,
1385
+ "grad_norm": 0.6712233424186707,
1386
+ "learning_rate": 3.0499145115561176e-07,
1387
+ "loss": 2.1329,
1388
+ "step": 197
1389
+ },
1390
+ {
1391
+ "epoch": 0.9705882352941176,
1392
+ "grad_norm": 0.6209347248077393,
1393
+ "learning_rate": 2.2413586994470825e-07,
1394
+ "loss": 2.3842,
1395
+ "step": 198
1396
+ },
1397
+ {
1398
+ "epoch": 0.9754901960784313,
1399
+ "grad_norm": 0.6732835173606873,
1400
+ "learning_rate": 1.5568547761034004e-07,
1401
+ "loss": 2.4314,
1402
+ "step": 199
1403
+ },
1404
+ {
1405
+ "epoch": 0.9803921568627451,
1406
+ "grad_norm": 0.8073440194129944,
1407
+ "learning_rate": 9.965733342532924e-08,
1408
+ "loss": 2.5627,
1409
+ "step": 200
1410
+ },
1411
+ {
1412
+ "epoch": 0.9852941176470589,
1413
+ "grad_norm": 0.6623374223709106,
1414
+ "learning_rate": 5.606540077782163e-08,
1415
+ "loss": 1.9204,
1416
+ "step": 201
1417
+ },
1418
+ {
1419
+ "epoch": 0.9901960784313726,
1420
+ "grad_norm": 0.5475188493728638,
1421
+ "learning_rate": 2.4920543691309138e-08,
1422
+ "loss": 1.7823,
1423
+ "step": 202
1424
+ },
1425
+ {
1426
+ "epoch": 0.9950980392156863,
1427
+ "grad_norm": 0.5792528390884399,
1428
+ "learning_rate": 6.2305241171345395e-09,
1429
+ "loss": 1.8127,
1430
+ "step": 203
1431
+ },
1432
+ {
1433
+ "epoch": 1.0,
1434
+ "grad_norm": 0.741917610168457,
1435
+ "learning_rate": 0.0,
1436
+ "loss": 2.1431,
1437
+ "step": 204
1438
  }
1439
  ],
1440
  "logging_steps": 1,
 
1449
  "should_evaluate": false,
1450
  "should_log": false,
1451
  "should_save": true,
1452
+ "should_training_stop": true
1453
  },
1454
  "attributes": {}
1455
  }
1456
  },
1457
+ "total_flos": 2652053607677952.0,
1458
  "train_batch_size": 4,
1459
  "trial_name": null,
1460
  "trial_params": null