Training in progress, step 1250, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1279323952
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d76b1ba4f1499da4a8ed5a8300e6dc366438ac57bf8279ee969fd0b2ca4728b
|
| 3 |
size 1279323952
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2558803194
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb84ed3f14cc528de2ab5492455762837c66a3b6250725b8ebe7d7239de5d41b
|
| 3 |
size 2558803194
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2e7a6cc60a56e9e41bc4d01760bda59a0858d99962b8232c0c24c34810dcfef
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0aaab3ece7f21f6134c7946ccfd6c2682813ad47096098a1e3a156edc7ad945a
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.9303643703460693,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1200",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 50,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -8607,6 +8607,364 @@
|
|
| 8607 |
"eval_samples_per_second": 5.662,
|
| 8608 |
"eval_steps_per_second": 1.891,
|
| 8609 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8610 |
}
|
| 8611 |
],
|
| 8612 |
"logging_steps": 1,
|
|
@@ -8621,7 +8979,7 @@
|
|
| 8621 |
"early_stopping_threshold": 0.0
|
| 8622 |
},
|
| 8623 |
"attributes": {
|
| 8624 |
-
"early_stopping_patience_counter":
|
| 8625 |
}
|
| 8626 |
},
|
| 8627 |
"TrainerControl": {
|
|
@@ -8635,7 +8993,7 @@
|
|
| 8635 |
"attributes": {}
|
| 8636 |
}
|
| 8637 |
},
|
| 8638 |
-
"total_flos": 3.
|
| 8639 |
"train_batch_size": 3,
|
| 8640 |
"trial_name": null,
|
| 8641 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.9303643703460693,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1200",
|
| 4 |
+
"epoch": 0.6558237145855194,
|
| 5 |
"eval_steps": 50,
|
| 6 |
+
"global_step": 1250,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 8607 |
"eval_samples_per_second": 5.662,
|
| 8608 |
"eval_steps_per_second": 1.891,
|
| 8609 |
"step": 1200
|
| 8610 |
+
},
|
| 8611 |
+
{
|
| 8612 |
+
"epoch": 0.630115424973767,
|
| 8613 |
+
"grad_norm": 0.6785674095153809,
|
| 8614 |
+
"learning_rate": 0.0001792738673354654,
|
| 8615 |
+
"loss": 8.3901,
|
| 8616 |
+
"step": 1201
|
| 8617 |
+
},
|
| 8618 |
+
{
|
| 8619 |
+
"epoch": 0.6306400839454355,
|
| 8620 |
+
"grad_norm": 0.6648474931716919,
|
| 8621 |
+
"learning_rate": 0.00017924030602439216,
|
| 8622 |
+
"loss": 7.8052,
|
| 8623 |
+
"step": 1202
|
| 8624 |
+
},
|
| 8625 |
+
{
|
| 8626 |
+
"epoch": 0.6311647429171039,
|
| 8627 |
+
"grad_norm": 0.6322881579399109,
|
| 8628 |
+
"learning_rate": 0.00017920672070963228,
|
| 8629 |
+
"loss": 8.0898,
|
| 8630 |
+
"step": 1203
|
| 8631 |
+
},
|
| 8632 |
+
{
|
| 8633 |
+
"epoch": 0.6316894018887723,
|
| 8634 |
+
"grad_norm": 0.6528663039207458,
|
| 8635 |
+
"learning_rate": 0.0001791731114013594,
|
| 8636 |
+
"loss": 8.0178,
|
| 8637 |
+
"step": 1204
|
| 8638 |
+
},
|
| 8639 |
+
{
|
| 8640 |
+
"epoch": 0.6322140608604407,
|
| 8641 |
+
"grad_norm": 0.7760594487190247,
|
| 8642 |
+
"learning_rate": 0.00017913947810975462,
|
| 8643 |
+
"loss": 7.8516,
|
| 8644 |
+
"step": 1205
|
| 8645 |
+
},
|
| 8646 |
+
{
|
| 8647 |
+
"epoch": 0.6327387198321092,
|
| 8648 |
+
"grad_norm": 0.7572141289710999,
|
| 8649 |
+
"learning_rate": 0.0001791058208450062,
|
| 8650 |
+
"loss": 7.5367,
|
| 8651 |
+
"step": 1206
|
| 8652 |
+
},
|
| 8653 |
+
{
|
| 8654 |
+
"epoch": 0.6332633788037776,
|
| 8655 |
+
"grad_norm": 0.952572762966156,
|
| 8656 |
+
"learning_rate": 0.00017907213961730972,
|
| 8657 |
+
"loss": 8.8389,
|
| 8658 |
+
"step": 1207
|
| 8659 |
+
},
|
| 8660 |
+
{
|
| 8661 |
+
"epoch": 0.633788037775446,
|
| 8662 |
+
"grad_norm": 0.8288649320602417,
|
| 8663 |
+
"learning_rate": 0.00017903843443686795,
|
| 8664 |
+
"loss": 8.2295,
|
| 8665 |
+
"step": 1208
|
| 8666 |
+
},
|
| 8667 |
+
{
|
| 8668 |
+
"epoch": 0.6343126967471143,
|
| 8669 |
+
"grad_norm": 0.9002505540847778,
|
| 8670 |
+
"learning_rate": 0.00017900470531389095,
|
| 8671 |
+
"loss": 8.2813,
|
| 8672 |
+
"step": 1209
|
| 8673 |
+
},
|
| 8674 |
+
{
|
| 8675 |
+
"epoch": 0.6348373557187827,
|
| 8676 |
+
"grad_norm": 0.8269932866096497,
|
| 8677 |
+
"learning_rate": 0.00017897095225859606,
|
| 8678 |
+
"loss": 8.9073,
|
| 8679 |
+
"step": 1210
|
| 8680 |
+
},
|
| 8681 |
+
{
|
| 8682 |
+
"epoch": 0.6353620146904512,
|
| 8683 |
+
"grad_norm": 0.8590414524078369,
|
| 8684 |
+
"learning_rate": 0.00017893717528120784,
|
| 8685 |
+
"loss": 7.6909,
|
| 8686 |
+
"step": 1211
|
| 8687 |
+
},
|
| 8688 |
+
{
|
| 8689 |
+
"epoch": 0.6358866736621196,
|
| 8690 |
+
"grad_norm": 1.1503770351409912,
|
| 8691 |
+
"learning_rate": 0.00017890337439195811,
|
| 8692 |
+
"loss": 8.3473,
|
| 8693 |
+
"step": 1212
|
| 8694 |
+
},
|
| 8695 |
+
{
|
| 8696 |
+
"epoch": 0.636411332633788,
|
| 8697 |
+
"grad_norm": 0.7925499081611633,
|
| 8698 |
+
"learning_rate": 0.00017886954960108587,
|
| 8699 |
+
"loss": 7.5837,
|
| 8700 |
+
"step": 1213
|
| 8701 |
+
},
|
| 8702 |
+
{
|
| 8703 |
+
"epoch": 0.6369359916054564,
|
| 8704 |
+
"grad_norm": 1.0551660060882568,
|
| 8705 |
+
"learning_rate": 0.0001788357009188375,
|
| 8706 |
+
"loss": 8.6293,
|
| 8707 |
+
"step": 1214
|
| 8708 |
+
},
|
| 8709 |
+
{
|
| 8710 |
+
"epoch": 0.6374606505771249,
|
| 8711 |
+
"grad_norm": 1.1928114891052246,
|
| 8712 |
+
"learning_rate": 0.0001788018283554665,
|
| 8713 |
+
"loss": 8.7884,
|
| 8714 |
+
"step": 1215
|
| 8715 |
+
},
|
| 8716 |
+
{
|
| 8717 |
+
"epoch": 0.6379853095487933,
|
| 8718 |
+
"grad_norm": 0.8815275430679321,
|
| 8719 |
+
"learning_rate": 0.00017876793192123365,
|
| 8720 |
+
"loss": 7.4191,
|
| 8721 |
+
"step": 1216
|
| 8722 |
+
},
|
| 8723 |
+
{
|
| 8724 |
+
"epoch": 0.6385099685204617,
|
| 8725 |
+
"grad_norm": 1.006833553314209,
|
| 8726 |
+
"learning_rate": 0.0001787340116264069,
|
| 8727 |
+
"loss": 7.6687,
|
| 8728 |
+
"step": 1217
|
| 8729 |
+
},
|
| 8730 |
+
{
|
| 8731 |
+
"epoch": 0.6390346274921301,
|
| 8732 |
+
"grad_norm": 0.9964754581451416,
|
| 8733 |
+
"learning_rate": 0.00017870006748126153,
|
| 8734 |
+
"loss": 7.8293,
|
| 8735 |
+
"step": 1218
|
| 8736 |
+
},
|
| 8737 |
+
{
|
| 8738 |
+
"epoch": 0.6395592864637986,
|
| 8739 |
+
"grad_norm": 1.082804799079895,
|
| 8740 |
+
"learning_rate": 0.00017866609949607995,
|
| 8741 |
+
"loss": 8.009,
|
| 8742 |
+
"step": 1219
|
| 8743 |
+
},
|
| 8744 |
+
{
|
| 8745 |
+
"epoch": 0.640083945435467,
|
| 8746 |
+
"grad_norm": 1.3665459156036377,
|
| 8747 |
+
"learning_rate": 0.0001786321076811519,
|
| 8748 |
+
"loss": 8.2669,
|
| 8749 |
+
"step": 1220
|
| 8750 |
+
},
|
| 8751 |
+
{
|
| 8752 |
+
"epoch": 0.6406086044071354,
|
| 8753 |
+
"grad_norm": 1.0634828805923462,
|
| 8754 |
+
"learning_rate": 0.0001785980920467742,
|
| 8755 |
+
"loss": 8.3186,
|
| 8756 |
+
"step": 1221
|
| 8757 |
+
},
|
| 8758 |
+
{
|
| 8759 |
+
"epoch": 0.6411332633788038,
|
| 8760 |
+
"grad_norm": 1.1793396472930908,
|
| 8761 |
+
"learning_rate": 0.000178564052603251,
|
| 8762 |
+
"loss": 8.2618,
|
| 8763 |
+
"step": 1222
|
| 8764 |
+
},
|
| 8765 |
+
{
|
| 8766 |
+
"epoch": 0.6416579223504721,
|
| 8767 |
+
"grad_norm": 1.1798900365829468,
|
| 8768 |
+
"learning_rate": 0.0001785299893608936,
|
| 8769 |
+
"loss": 8.0329,
|
| 8770 |
+
"step": 1223
|
| 8771 |
+
},
|
| 8772 |
+
{
|
| 8773 |
+
"epoch": 0.6421825813221406,
|
| 8774 |
+
"grad_norm": 1.1078875064849854,
|
| 8775 |
+
"learning_rate": 0.00017849590233002052,
|
| 8776 |
+
"loss": 7.5323,
|
| 8777 |
+
"step": 1224
|
| 8778 |
+
},
|
| 8779 |
+
{
|
| 8780 |
+
"epoch": 0.642707240293809,
|
| 8781 |
+
"grad_norm": 1.2947778701782227,
|
| 8782 |
+
"learning_rate": 0.0001784617915209575,
|
| 8783 |
+
"loss": 8.3191,
|
| 8784 |
+
"step": 1225
|
| 8785 |
+
},
|
| 8786 |
+
{
|
| 8787 |
+
"epoch": 0.6432318992654774,
|
| 8788 |
+
"grad_norm": 1.0576255321502686,
|
| 8789 |
+
"learning_rate": 0.00017842765694403752,
|
| 8790 |
+
"loss": 8.1193,
|
| 8791 |
+
"step": 1226
|
| 8792 |
+
},
|
| 8793 |
+
{
|
| 8794 |
+
"epoch": 0.6437565582371458,
|
| 8795 |
+
"grad_norm": 1.1417510509490967,
|
| 8796 |
+
"learning_rate": 0.00017839349860960068,
|
| 8797 |
+
"loss": 8.0367,
|
| 8798 |
+
"step": 1227
|
| 8799 |
+
},
|
| 8800 |
+
{
|
| 8801 |
+
"epoch": 0.6442812172088143,
|
| 8802 |
+
"grad_norm": 1.2275340557098389,
|
| 8803 |
+
"learning_rate": 0.0001783593165279943,
|
| 8804 |
+
"loss": 8.2202,
|
| 8805 |
+
"step": 1228
|
| 8806 |
+
},
|
| 8807 |
+
{
|
| 8808 |
+
"epoch": 0.6448058761804827,
|
| 8809 |
+
"grad_norm": 1.2881832122802734,
|
| 8810 |
+
"learning_rate": 0.00017832511070957295,
|
| 8811 |
+
"loss": 8.2294,
|
| 8812 |
+
"step": 1229
|
| 8813 |
+
},
|
| 8814 |
+
{
|
| 8815 |
+
"epoch": 0.6453305351521511,
|
| 8816 |
+
"grad_norm": 1.375793695449829,
|
| 8817 |
+
"learning_rate": 0.00017829088116469834,
|
| 8818 |
+
"loss": 7.1402,
|
| 8819 |
+
"step": 1230
|
| 8820 |
+
},
|
| 8821 |
+
{
|
| 8822 |
+
"epoch": 0.6458551941238195,
|
| 8823 |
+
"grad_norm": 1.2434371709823608,
|
| 8824 |
+
"learning_rate": 0.00017825662790373935,
|
| 8825 |
+
"loss": 8.8262,
|
| 8826 |
+
"step": 1231
|
| 8827 |
+
},
|
| 8828 |
+
{
|
| 8829 |
+
"epoch": 0.646379853095488,
|
| 8830 |
+
"grad_norm": 1.3222373723983765,
|
| 8831 |
+
"learning_rate": 0.00017822235093707206,
|
| 8832 |
+
"loss": 7.7723,
|
| 8833 |
+
"step": 1232
|
| 8834 |
+
},
|
| 8835 |
+
{
|
| 8836 |
+
"epoch": 0.6469045120671564,
|
| 8837 |
+
"grad_norm": 1.4988670349121094,
|
| 8838 |
+
"learning_rate": 0.0001781880502750798,
|
| 8839 |
+
"loss": 7.4396,
|
| 8840 |
+
"step": 1233
|
| 8841 |
+
},
|
| 8842 |
+
{
|
| 8843 |
+
"epoch": 0.6474291710388248,
|
| 8844 |
+
"grad_norm": 1.1858347654342651,
|
| 8845 |
+
"learning_rate": 0.00017815372592815297,
|
| 8846 |
+
"loss": 8.38,
|
| 8847 |
+
"step": 1234
|
| 8848 |
+
},
|
| 8849 |
+
{
|
| 8850 |
+
"epoch": 0.6479538300104932,
|
| 8851 |
+
"grad_norm": 1.3001600503921509,
|
| 8852 |
+
"learning_rate": 0.0001781193779066892,
|
| 8853 |
+
"loss": 7.4005,
|
| 8854 |
+
"step": 1235
|
| 8855 |
+
},
|
| 8856 |
+
{
|
| 8857 |
+
"epoch": 0.6484784889821616,
|
| 8858 |
+
"grad_norm": 1.302140712738037,
|
| 8859 |
+
"learning_rate": 0.0001780850062210933,
|
| 8860 |
+
"loss": 7.0935,
|
| 8861 |
+
"step": 1236
|
| 8862 |
+
},
|
| 8863 |
+
{
|
| 8864 |
+
"epoch": 0.64900314795383,
|
| 8865 |
+
"grad_norm": 1.4957777261734009,
|
| 8866 |
+
"learning_rate": 0.0001780506108817772,
|
| 8867 |
+
"loss": 8.0204,
|
| 8868 |
+
"step": 1237
|
| 8869 |
+
},
|
| 8870 |
+
{
|
| 8871 |
+
"epoch": 0.6495278069254984,
|
| 8872 |
+
"grad_norm": 1.4060813188552856,
|
| 8873 |
+
"learning_rate": 0.00017801619189916008,
|
| 8874 |
+
"loss": 7.5112,
|
| 8875 |
+
"step": 1238
|
| 8876 |
+
},
|
| 8877 |
+
{
|
| 8878 |
+
"epoch": 0.6500524658971668,
|
| 8879 |
+
"grad_norm": 1.3509002923965454,
|
| 8880 |
+
"learning_rate": 0.0001779817492836682,
|
| 8881 |
+
"loss": 7.5706,
|
| 8882 |
+
"step": 1239
|
| 8883 |
+
},
|
| 8884 |
+
{
|
| 8885 |
+
"epoch": 0.6505771248688352,
|
| 8886 |
+
"grad_norm": 1.6165887117385864,
|
| 8887 |
+
"learning_rate": 0.000177947283045735,
|
| 8888 |
+
"loss": 7.5412,
|
| 8889 |
+
"step": 1240
|
| 8890 |
+
},
|
| 8891 |
+
{
|
| 8892 |
+
"epoch": 0.6511017838405037,
|
| 8893 |
+
"grad_norm": 1.7650121450424194,
|
| 8894 |
+
"learning_rate": 0.00017791279319580113,
|
| 8895 |
+
"loss": 7.8366,
|
| 8896 |
+
"step": 1241
|
| 8897 |
+
},
|
| 8898 |
+
{
|
| 8899 |
+
"epoch": 0.6516264428121721,
|
| 8900 |
+
"grad_norm": 1.5572558641433716,
|
| 8901 |
+
"learning_rate": 0.00017787827974431427,
|
| 8902 |
+
"loss": 6.7198,
|
| 8903 |
+
"step": 1242
|
| 8904 |
+
},
|
| 8905 |
+
{
|
| 8906 |
+
"epoch": 0.6521511017838405,
|
| 8907 |
+
"grad_norm": 1.6787631511688232,
|
| 8908 |
+
"learning_rate": 0.00017784374270172942,
|
| 8909 |
+
"loss": 8.5223,
|
| 8910 |
+
"step": 1243
|
| 8911 |
+
},
|
| 8912 |
+
{
|
| 8913 |
+
"epoch": 0.6526757607555089,
|
| 8914 |
+
"grad_norm": 1.975051999092102,
|
| 8915 |
+
"learning_rate": 0.00017780918207850857,
|
| 8916 |
+
"loss": 8.3379,
|
| 8917 |
+
"step": 1244
|
| 8918 |
+
},
|
| 8919 |
+
{
|
| 8920 |
+
"epoch": 0.6532004197271774,
|
| 8921 |
+
"grad_norm": 1.9721925258636475,
|
| 8922 |
+
"learning_rate": 0.00017777459788512095,
|
| 8923 |
+
"loss": 8.3842,
|
| 8924 |
+
"step": 1245
|
| 8925 |
+
},
|
| 8926 |
+
{
|
| 8927 |
+
"epoch": 0.6537250786988458,
|
| 8928 |
+
"grad_norm": 2.151292324066162,
|
| 8929 |
+
"learning_rate": 0.00017773999013204284,
|
| 8930 |
+
"loss": 6.9369,
|
| 8931 |
+
"step": 1246
|
| 8932 |
+
},
|
| 8933 |
+
{
|
| 8934 |
+
"epoch": 0.6542497376705142,
|
| 8935 |
+
"grad_norm": 1.9202648401260376,
|
| 8936 |
+
"learning_rate": 0.00017770535882975783,
|
| 8937 |
+
"loss": 7.0776,
|
| 8938 |
+
"step": 1247
|
| 8939 |
+
},
|
| 8940 |
+
{
|
| 8941 |
+
"epoch": 0.6547743966421826,
|
| 8942 |
+
"grad_norm": 2.297982931137085,
|
| 8943 |
+
"learning_rate": 0.0001776707039887564,
|
| 8944 |
+
"loss": 7.2104,
|
| 8945 |
+
"step": 1248
|
| 8946 |
+
},
|
| 8947 |
+
{
|
| 8948 |
+
"epoch": 0.655299055613851,
|
| 8949 |
+
"grad_norm": 2.4272403717041016,
|
| 8950 |
+
"learning_rate": 0.00017763602561953636,
|
| 8951 |
+
"loss": 7.0545,
|
| 8952 |
+
"step": 1249
|
| 8953 |
+
},
|
| 8954 |
+
{
|
| 8955 |
+
"epoch": 0.6558237145855194,
|
| 8956 |
+
"grad_norm": 5.506582736968994,
|
| 8957 |
+
"learning_rate": 0.00017760132373260254,
|
| 8958 |
+
"loss": 7.7152,
|
| 8959 |
+
"step": 1250
|
| 8960 |
+
},
|
| 8961 |
+
{
|
| 8962 |
+
"epoch": 0.6558237145855194,
|
| 8963 |
+
"eval_loss": 1.9324959516525269,
|
| 8964 |
+
"eval_runtime": 82.4867,
|
| 8965 |
+
"eval_samples_per_second": 5.662,
|
| 8966 |
+
"eval_steps_per_second": 1.891,
|
| 8967 |
+
"step": 1250
|
| 8968 |
}
|
| 8969 |
],
|
| 8970 |
"logging_steps": 1,
|
|
|
|
| 8979 |
"early_stopping_threshold": 0.0
|
| 8980 |
},
|
| 8981 |
"attributes": {
|
| 8982 |
+
"early_stopping_patience_counter": 1
|
| 8983 |
}
|
| 8984 |
},
|
| 8985 |
"TrainerControl": {
|
|
|
|
| 8993 |
"attributes": {}
|
| 8994 |
}
|
| 8995 |
},
|
| 8996 |
+
"total_flos": 3.336984962782986e+17,
|
| 8997 |
"train_batch_size": 3,
|
| 8998 |
"trial_name": null,
|
| 8999 |
"trial_params": null
|