Training in progress, step 18000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891558696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e87cb14acd4e6fe6bcad897a8f042f31acae6fb2e34f61ed68b17e469e4f393
|
| 3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1783272762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e64bb265a77f08e4da4240e30c68e0a0ae7eb01df9530a76fcba45020acfd6b
|
| 3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea1c5d0777adfd9f21aec03650885c94d8b0325360d164ecc81c3a16d777cc03
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4bc3c023f1bacc02e7db4990bce2636fd592e49ea544612a30431a586cc170fc
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.0821109265089035,
|
| 3 |
"best_model_checkpoint": "./fine-tuned/checkpoint-17500",
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2737,6 +2737,84 @@
|
|
| 2737 |
"eval_samples_per_second": 22.71,
|
| 2738 |
"eval_steps_per_second": 5.677,
|
| 2739 |
"step": 17500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2740 |
}
|
| 2741 |
],
|
| 2742 |
"logging_steps": 50,
|
|
@@ -2756,7 +2834,7 @@
|
|
| 2756 |
"attributes": {}
|
| 2757 |
}
|
| 2758 |
},
|
| 2759 |
-
"total_flos": 4.
|
| 2760 |
"train_batch_size": 4,
|
| 2761 |
"trial_name": null,
|
| 2762 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.0821109265089035,
|
| 3 |
"best_model_checkpoint": "./fine-tuned/checkpoint-17500",
|
| 4 |
+
"epoch": 1.44,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 18000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2737 |
"eval_samples_per_second": 22.71,
|
| 2738 |
"eval_steps_per_second": 5.677,
|
| 2739 |
"step": 17500
|
| 2740 |
+
},
|
| 2741 |
+
{
|
| 2742 |
+
"epoch": 1.404,
|
| 2743 |
+
"grad_norm": 0.1634778082370758,
|
| 2744 |
+
"learning_rate": 8.9448e-06,
|
| 2745 |
+
"loss": 0.0532,
|
| 2746 |
+
"step": 17550
|
| 2747 |
+
},
|
| 2748 |
+
{
|
| 2749 |
+
"epoch": 1.408,
|
| 2750 |
+
"grad_norm": 0.12161219865083694,
|
| 2751 |
+
"learning_rate": 8.8848e-06,
|
| 2752 |
+
"loss": 0.0541,
|
| 2753 |
+
"step": 17600
|
| 2754 |
+
},
|
| 2755 |
+
{
|
| 2756 |
+
"epoch": 1.412,
|
| 2757 |
+
"grad_norm": 0.15347328782081604,
|
| 2758 |
+
"learning_rate": 8.824799999999999e-06,
|
| 2759 |
+
"loss": 0.0535,
|
| 2760 |
+
"step": 17650
|
| 2761 |
+
},
|
| 2762 |
+
{
|
| 2763 |
+
"epoch": 1.416,
|
| 2764 |
+
"grad_norm": 0.07917541265487671,
|
| 2765 |
+
"learning_rate": 8.7648e-06,
|
| 2766 |
+
"loss": 0.0552,
|
| 2767 |
+
"step": 17700
|
| 2768 |
+
},
|
| 2769 |
+
{
|
| 2770 |
+
"epoch": 1.42,
|
| 2771 |
+
"grad_norm": 0.15406325459480286,
|
| 2772 |
+
"learning_rate": 8.7048e-06,
|
| 2773 |
+
"loss": 0.0584,
|
| 2774 |
+
"step": 17750
|
| 2775 |
+
},
|
| 2776 |
+
{
|
| 2777 |
+
"epoch": 1.424,
|
| 2778 |
+
"grad_norm": 0.21300417184829712,
|
| 2779 |
+
"learning_rate": 8.6448e-06,
|
| 2780 |
+
"loss": 0.0565,
|
| 2781 |
+
"step": 17800
|
| 2782 |
+
},
|
| 2783 |
+
{
|
| 2784 |
+
"epoch": 1.428,
|
| 2785 |
+
"grad_norm": 0.20691223442554474,
|
| 2786 |
+
"learning_rate": 8.584800000000001e-06,
|
| 2787 |
+
"loss": 0.0504,
|
| 2788 |
+
"step": 17850
|
| 2789 |
+
},
|
| 2790 |
+
{
|
| 2791 |
+
"epoch": 1.432,
|
| 2792 |
+
"grad_norm": 0.1366143375635147,
|
| 2793 |
+
"learning_rate": 8.5248e-06,
|
| 2794 |
+
"loss": 0.0573,
|
| 2795 |
+
"step": 17900
|
| 2796 |
+
},
|
| 2797 |
+
{
|
| 2798 |
+
"epoch": 1.436,
|
| 2799 |
+
"grad_norm": 0.11994505673646927,
|
| 2800 |
+
"learning_rate": 8.4648e-06,
|
| 2801 |
+
"loss": 0.0576,
|
| 2802 |
+
"step": 17950
|
| 2803 |
+
},
|
| 2804 |
+
{
|
| 2805 |
+
"epoch": 1.44,
|
| 2806 |
+
"grad_norm": 0.06461376696825027,
|
| 2807 |
+
"learning_rate": 8.404800000000001e-06,
|
| 2808 |
+
"loss": 0.0595,
|
| 2809 |
+
"step": 18000
|
| 2810 |
+
},
|
| 2811 |
+
{
|
| 2812 |
+
"epoch": 1.44,
|
| 2813 |
+
"eval_loss": 0.08217783272266388,
|
| 2814 |
+
"eval_runtime": 88.0706,
|
| 2815 |
+
"eval_samples_per_second": 22.709,
|
| 2816 |
+
"eval_steps_per_second": 5.677,
|
| 2817 |
+
"step": 18000
|
| 2818 |
}
|
| 2819 |
],
|
| 2820 |
"logging_steps": 50,
|
|
|
|
| 2834 |
"attributes": {}
|
| 2835 |
}
|
| 2836 |
},
|
| 2837 |
+
"total_flos": 4.384496812032e+16,
|
| 2838 |
"train_batch_size": 4,
|
| 2839 |
"trial_name": null,
|
| 2840 |
"trial_params": null
|