Training in progress, step 33800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1187,156 +1187,13 @@ You can finetune this model on your own dataset.
|
|
| 1187 |
</details>
|
| 1188 |
|
| 1189 |
### Training Logs
|
| 1190 |
-
<details><summary>Click to expand</summary>
|
| 1191 |
-
|
| 1192 |
| Epoch | Step | Training Loss |
|
| 1193 |
|:------:|:-----:|:-------------:|
|
| 1194 |
-
| 0.
|
| 1195 |
-
| 0.
|
| 1196 |
-
| 0.
|
| 1197 |
-
| 0.
|
| 1198 |
-
| 0.4709 | 26650 | 0.276 |
|
| 1199 |
-
| 0.4718 | 26700 | 0.2572 |
|
| 1200 |
-
| 0.4727 | 26750 | 0.2821 |
|
| 1201 |
-
| 0.4736 | 26800 | 0.2753 |
|
| 1202 |
-
| 0.4745 | 26850 | 0.2789 |
|
| 1203 |
-
| 0.4753 | 26900 | 0.5175 |
|
| 1204 |
-
| 0.4762 | 26950 | 0.3398 |
|
| 1205 |
-
| 0.4771 | 27000 | 0.3683 |
|
| 1206 |
-
| 0.4780 | 27050 | 0.341 |
|
| 1207 |
-
| 0.4789 | 27100 | 0.2753 |
|
| 1208 |
-
| 0.4798 | 27150 | 0.35 |
|
| 1209 |
-
| 0.4806 | 27200 | 0.3143 |
|
| 1210 |
-
| 0.4815 | 27250 | 0.3968 |
|
| 1211 |
-
| 0.4824 | 27300 | 0.3246 |
|
| 1212 |
-
| 0.4833 | 27350 | 0.3131 |
|
| 1213 |
-
| 0.4842 | 27400 | 0.3078 |
|
| 1214 |
-
| 0.4851 | 27450 | 0.419 |
|
| 1215 |
-
| 0.4859 | 27500 | 0.3959 |
|
| 1216 |
-
| 0.4868 | 27550 | 0.3754 |
|
| 1217 |
-
| 0.4877 | 27600 | 0.3163 |
|
| 1218 |
-
| 0.4886 | 27650 | 0.35 |
|
| 1219 |
-
| 0.4895 | 27700 | 0.3397 |
|
| 1220 |
-
| 0.4904 | 27750 | 0.3853 |
|
| 1221 |
-
| 0.4912 | 27800 | 0.2568 |
|
| 1222 |
-
| 0.4921 | 27850 | 0.3108 |
|
| 1223 |
-
| 0.4930 | 27900 | 0.4098 |
|
| 1224 |
-
| 0.4939 | 27950 | 0.3364 |
|
| 1225 |
-
| 0.4948 | 28000 | 0.3354 |
|
| 1226 |
-
| 0.4957 | 28050 | 0.2879 |
|
| 1227 |
-
| 0.4965 | 28100 | 0.3604 |
|
| 1228 |
-
| 0.4974 | 28150 | 0.2612 |
|
| 1229 |
-
| 0.4983 | 28200 | 0.3593 |
|
| 1230 |
-
| 0.4992 | 28250 | 0.2961 |
|
| 1231 |
-
| 0.5001 | 28300 | 0.3338 |
|
| 1232 |
-
| 0.5010 | 28350 | 0.3109 |
|
| 1233 |
-
| 0.5018 | 28400 | 0.3234 |
|
| 1234 |
-
| 0.5027 | 28450 | 0.3257 |
|
| 1235 |
-
| 0.5036 | 28500 | 0.4675 |
|
| 1236 |
-
| 0.5045 | 28550 | 0.4318 |
|
| 1237 |
-
| 0.5054 | 28600 | 0.3594 |
|
| 1238 |
-
| 0.5063 | 28650 | 0.3214 |
|
| 1239 |
-
| 0.5071 | 28700 | 0.2856 |
|
| 1240 |
-
| 0.5080 | 28750 | 0.3094 |
|
| 1241 |
-
| 0.5089 | 28800 | 0.3933 |
|
| 1242 |
-
| 0.5098 | 28850 | 0.3432 |
|
| 1243 |
-
| 0.5107 | 28900 | 0.3766 |
|
| 1244 |
-
| 0.5116 | 28950 | 0.3308 |
|
| 1245 |
-
| 0.5124 | 29000 | 0.3453 |
|
| 1246 |
-
| 0.5133 | 29050 | 0.2904 |
|
| 1247 |
-
| 0.5142 | 29100 | 0.2647 |
|
| 1248 |
-
| 0.5151 | 29150 | 0.4395 |
|
| 1249 |
-
| 0.5160 | 29200 | 0.295 |
|
| 1250 |
-
| 0.5169 | 29250 | 0.3927 |
|
| 1251 |
-
| 0.5178 | 29300 | 0.3492 |
|
| 1252 |
-
| 0.5186 | 29350 | 0.3304 |
|
| 1253 |
-
| 0.5195 | 29400 | 0.3557 |
|
| 1254 |
-
| 0.5204 | 29450 | 0.3389 |
|
| 1255 |
-
| 0.5213 | 29500 | 0.3322 |
|
| 1256 |
-
| 0.5222 | 29550 | 0.3053 |
|
| 1257 |
-
| 0.5231 | 29600 | 0.2486 |
|
| 1258 |
-
| 0.5239 | 29650 | 0.282 |
|
| 1259 |
-
| 0.5248 | 29700 | 0.3791 |
|
| 1260 |
-
| 0.5257 | 29750 | 0.3346 |
|
| 1261 |
-
| 0.5266 | 29800 | 0.2743 |
|
| 1262 |
-
| 0.5275 | 29850 | 0.2927 |
|
| 1263 |
-
| 0.5284 | 29900 | 0.3775 |
|
| 1264 |
-
| 0.5292 | 29950 | 0.3114 |
|
| 1265 |
-
| 0.5301 | 30000 | 0.2383 |
|
| 1266 |
-
| 0.5310 | 30050 | 0.3798 |
|
| 1267 |
-
| 0.5319 | 30100 | 0.3204 |
|
| 1268 |
-
| 0.5328 | 30150 | 0.2496 |
|
| 1269 |
-
| 0.5337 | 30200 | 0.4147 |
|
| 1270 |
-
| 0.5345 | 30250 | 0.3021 |
|
| 1271 |
-
| 0.5354 | 30300 | 0.2758 |
|
| 1272 |
-
| 0.5363 | 30350 | 0.3166 |
|
| 1273 |
-
| 0.5372 | 30400 | 0.35 |
|
| 1274 |
-
| 0.5381 | 30450 | 0.3391 |
|
| 1275 |
-
| 0.5390 | 30500 | 0.3576 |
|
| 1276 |
-
| 0.5398 | 30550 | 0.295 |
|
| 1277 |
-
| 0.5407 | 30600 | 0.3449 |
|
| 1278 |
-
| 0.5416 | 30650 | 0.3274 |
|
| 1279 |
-
| 0.5425 | 30700 | 0.3094 |
|
| 1280 |
-
| 0.5434 | 30750 | 0.3077 |
|
| 1281 |
-
| 0.5443 | 30800 | 0.3505 |
|
| 1282 |
-
| 0.5451 | 30850 | 0.3485 |
|
| 1283 |
-
| 0.5460 | 30900 | 0.331 |
|
| 1284 |
-
| 0.5469 | 30950 | 0.2846 |
|
| 1285 |
-
| 0.5478 | 31000 | 0.3647 |
|
| 1286 |
-
| 0.5487 | 31050 | 0.3475 |
|
| 1287 |
-
| 0.5496 | 31100 | 0.2833 |
|
| 1288 |
-
| 0.5504 | 31150 | 0.3 |
|
| 1289 |
-
| 0.5513 | 31200 | 0.3568 |
|
| 1290 |
-
| 0.5522 | 31250 | 0.3268 |
|
| 1291 |
-
| 0.5531 | 31300 | 0.4005 |
|
| 1292 |
-
| 0.5540 | 31350 | 0.2993 |
|
| 1293 |
-
| 0.5549 | 31400 | 0.3463 |
|
| 1294 |
-
| 0.5557 | 31450 | 0.3654 |
|
| 1295 |
-
| 0.5566 | 31500 | 0.3329 |
|
| 1296 |
-
| 0.5575 | 31550 | 0.2794 |
|
| 1297 |
-
| 0.5584 | 31600 | 0.4189 |
|
| 1298 |
-
| 0.5593 | 31650 | 0.3643 |
|
| 1299 |
-
| 0.5602 | 31700 | 0.3578 |
|
| 1300 |
-
| 0.5610 | 31750 | 0.3193 |
|
| 1301 |
-
| 0.5619 | 31800 | 0.327 |
|
| 1302 |
-
| 0.5628 | 31850 | 0.3429 |
|
| 1303 |
-
| 0.5637 | 31900 | 0.2994 |
|
| 1304 |
-
| 0.5646 | 31950 | 0.3219 |
|
| 1305 |
-
| 0.5655 | 32000 | 0.2902 |
|
| 1306 |
-
| 0.5663 | 32050 | 0.3896 |
|
| 1307 |
-
| 0.5672 | 32100 | 0.2491 |
|
| 1308 |
-
| 0.5681 | 32150 | 0.2663 |
|
| 1309 |
-
| 0.5690 | 32200 | 0.3433 |
|
| 1310 |
-
| 0.5699 | 32250 | 0.3375 |
|
| 1311 |
-
| 0.5708 | 32300 | 0.2891 |
|
| 1312 |
-
| 0.5716 | 32350 | 0.296 |
|
| 1313 |
-
| 0.5725 | 32400 | 0.2478 |
|
| 1314 |
-
| 0.5734 | 32450 | 0.3514 |
|
| 1315 |
-
| 0.5743 | 32500 | 0.2741 |
|
| 1316 |
-
| 0.5752 | 32550 | 0.3546 |
|
| 1317 |
-
| 0.5761 | 32600 | 0.3927 |
|
| 1318 |
-
| 0.5769 | 32650 | 0.2725 |
|
| 1319 |
-
| 0.5778 | 32700 | 0.3167 |
|
| 1320 |
-
| 0.5787 | 32750 | 0.3249 |
|
| 1321 |
-
| 0.5796 | 32800 | 0.2443 |
|
| 1322 |
-
| 0.5805 | 32850 | 0.4113 |
|
| 1323 |
-
| 0.5814 | 32900 | 0.3106 |
|
| 1324 |
-
| 0.5822 | 32950 | 0.2841 |
|
| 1325 |
-
| 0.5831 | 33000 | 0.2786 |
|
| 1326 |
-
| 0.5840 | 33050 | 0.3576 |
|
| 1327 |
-
| 0.5849 | 33100 | 0.2475 |
|
| 1328 |
-
| 0.5858 | 33150 | 0.348 |
|
| 1329 |
-
| 0.5867 | 33200 | 0.2779 |
|
| 1330 |
-
| 0.5875 | 33250 | 0.3166 |
|
| 1331 |
-
| 0.5884 | 33300 | 0.3448 |
|
| 1332 |
-
| 0.5893 | 33350 | 0.2409 |
|
| 1333 |
-
| 0.5902 | 33400 | 0.3313 |
|
| 1334 |
-
| 0.5911 | 33450 | 0.2981 |
|
| 1335 |
-
| 0.5920 | 33500 | 0.269 |
|
| 1336 |
-
| 0.5929 | 33550 | 0.4098 |
|
| 1337 |
-
| 0.5937 | 33600 | 0.2924 |
|
| 1338 |
|
| 1339 |
-
</details>
|
| 1340 |
|
| 1341 |
### Framework Versions
|
| 1342 |
- Python: 3.11.13
|
|
|
|
| 1187 |
</details>
|
| 1188 |
|
| 1189 |
### Training Logs
|
|
|
|
|
|
|
| 1190 |
| Epoch | Step | Training Loss |
|
| 1191 |
|:------:|:-----:|:-------------:|
|
| 1192 |
+
| 0.5946 | 33650 | 0.2952 |
|
| 1193 |
+
| 0.5955 | 33700 | 0.2754 |
|
| 1194 |
+
| 0.5964 | 33750 | 0.3434 |
|
| 1195 |
+
| 0.5973 | 33800 | 0.2541 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1196 |
|
|
|
|
| 1197 |
|
| 1198 |
### Framework Versions
|
| 1199 |
- Python: 3.11.13
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f44c393193a7debcaa9fe116b2e33229c62b31c7228329c28ad0491e8701e1a
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:620d6a0f0e8b3c0b610d1a9d8d426a0e427c22a7ec7ed38356be652403968d4e
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ba676fcdff1c56d78801dd51dede93231c1d56645e7877743bcba848aec097a
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b205c3d2c6273622b727175547a26240710159f4256bcd6246156ce73b10ee3f
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ff383a89d52f883cafae387eb5146463fb8074271a1417f846316599e03e648
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4712,6 +4712,34 @@
|
|
| 4712 |
"learning_rate": 2.258251359682708e-05,
|
| 4713 |
"loss": 0.2924,
|
| 4714 |
"step": 33600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4715 |
}
|
| 4716 |
],
|
| 4717 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.5972681168383666,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 33800,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4712 |
"learning_rate": 2.258251359682708e-05,
|
| 4713 |
"loss": 0.2924,
|
| 4714 |
"step": 33600
|
| 4715 |
+
},
|
| 4716 |
+
{
|
| 4717 |
+
"epoch": 0.594617518686717,
|
| 4718 |
+
"grad_norm": 2.0076584815979004,
|
| 4719 |
+
"learning_rate": 2.2533427578488545e-05,
|
| 4720 |
+
"loss": 0.2952,
|
| 4721 |
+
"step": 33650
|
| 4722 |
+
},
|
| 4723 |
+
{
|
| 4724 |
+
"epoch": 0.5955010514039335,
|
| 4725 |
+
"grad_norm": 1.203574299812317,
|
| 4726 |
+
"learning_rate": 2.2484341560150006e-05,
|
| 4727 |
+
"loss": 0.2754,
|
| 4728 |
+
"step": 33700
|
| 4729 |
+
},
|
| 4730 |
+
{
|
| 4731 |
+
"epoch": 0.59638458412115,
|
| 4732 |
+
"grad_norm": 2.815420150756836,
|
| 4733 |
+
"learning_rate": 2.243525554181147e-05,
|
| 4734 |
+
"loss": 0.3434,
|
| 4735 |
+
"step": 33750
|
| 4736 |
+
},
|
| 4737 |
+
{
|
| 4738 |
+
"epoch": 0.5972681168383666,
|
| 4739 |
+
"grad_norm": 1.487236499786377,
|
| 4740 |
+
"learning_rate": 2.2386169523472935e-05,
|
| 4741 |
+
"loss": 0.2541,
|
| 4742 |
+
"step": 33800
|
| 4743 |
}
|
| 4744 |
],
|
| 4745 |
"logging_steps": 50,
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5560
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25a21a534e4993b863994e64d84a120efcce8aac5f212cbacbdb8f1e5edfbb2e
|
| 3 |
size 5560
|