Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +3 -53
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 161515608
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05168bcf41d42782e54f2110629a07f6dfe491410f4a0c12e5c2ab3d90836271
|
| 3 |
size 161515608
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 323181259
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd295a28962bd0b73c87cd25e49b90c33ee4984b4d89fb83894543530a2dc2f7
|
| 3 |
size 323181259
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ef3bc6f4d35776f47009c7dd438a2c3372feb65dbb821c4678645626f6d1776
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 4.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1408,56 +1408,6 @@
|
|
| 1408 |
"mean_token_accuracy": 0.9439713805913925,
|
| 1409 |
"num_tokens": 251133.0,
|
| 1410 |
"step": 140
|
| 1411 |
-
},
|
| 1412 |
-
{
|
| 1413 |
-
"entropy": 0.7664961665868759,
|
| 1414 |
-
"epoch": 4.275862068965517,
|
| 1415 |
-
"grad_norm": 4.1875,
|
| 1416 |
-
"learning_rate": 2.0096189432334194e-06,
|
| 1417 |
-
"loss": 0.251,
|
| 1418 |
-
"mean_token_accuracy": 0.9318085461854935,
|
| 1419 |
-
"num_tokens": 253030.0,
|
| 1420 |
-
"step": 141
|
| 1421 |
-
},
|
| 1422 |
-
{
|
| 1423 |
-
"entropy": 0.7621353343129158,
|
| 1424 |
-
"epoch": 4.306513409961686,
|
| 1425 |
-
"grad_norm": 4.59375,
|
| 1426 |
-
"learning_rate": 1.8553997993420495e-06,
|
| 1427 |
-
"loss": 0.2019,
|
| 1428 |
-
"mean_token_accuracy": 0.9506975933909416,
|
| 1429 |
-
"num_tokens": 254683.0,
|
| 1430 |
-
"step": 142
|
| 1431 |
-
},
|
| 1432 |
-
{
|
| 1433 |
-
"entropy": 0.7942825853824615,
|
| 1434 |
-
"epoch": 4.337164750957855,
|
| 1435 |
-
"grad_norm": 7.5625,
|
| 1436 |
-
"learning_rate": 1.706946311531779e-06,
|
| 1437 |
-
"loss": 0.3202,
|
| 1438 |
-
"mean_token_accuracy": 0.9199245423078537,
|
| 1439 |
-
"num_tokens": 256339.0,
|
| 1440 |
-
"step": 143
|
| 1441 |
-
},
|
| 1442 |
-
{
|
| 1443 |
-
"entropy": 0.7943987771868706,
|
| 1444 |
-
"epoch": 4.3678160919540225,
|
| 1445 |
-
"grad_norm": 5.15625,
|
| 1446 |
-
"learning_rate": 1.5643235964088065e-06,
|
| 1447 |
-
"loss": 0.2463,
|
| 1448 |
-
"mean_token_accuracy": 0.9323071017861366,
|
| 1449 |
-
"num_tokens": 257877.0,
|
| 1450 |
-
"step": 144
|
| 1451 |
-
},
|
| 1452 |
-
{
|
| 1453 |
-
"entropy": 0.7164058461785316,
|
| 1454 |
-
"epoch": 4.398467432950191,
|
| 1455 |
-
"grad_norm": 13.4375,
|
| 1456 |
-
"learning_rate": 1.4275942130097097e-06,
|
| 1457 |
-
"loss": 0.1929,
|
| 1458 |
-
"mean_token_accuracy": 0.9492844417691231,
|
| 1459 |
-
"num_tokens": 259301.0,
|
| 1460 |
-
"step": 145
|
| 1461 |
}
|
| 1462 |
],
|
| 1463 |
"logging_steps": 1,
|
|
@@ -1477,7 +1427,7 @@
|
|
| 1477 |
"attributes": {}
|
| 1478 |
}
|
| 1479 |
},
|
| 1480 |
-
"total_flos":
|
| 1481 |
"train_batch_size": 2,
|
| 1482 |
"trial_name": null,
|
| 1483 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 4.245210727969349,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 140,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1408 |
"mean_token_accuracy": 0.9439713805913925,
|
| 1409 |
"num_tokens": 251133.0,
|
| 1410 |
"step": 140
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1411 |
}
|
| 1412 |
],
|
| 1413 |
"logging_steps": 1,
|
|
|
|
| 1427 |
"attributes": {}
|
| 1428 |
}
|
| 1429 |
},
|
| 1430 |
+
"total_flos": 6665368235175936.0,
|
| 1431 |
"train_batch_size": 2,
|
| 1432 |
"trial_name": null,
|
| 1433 |
"trial_params": null
|