Training in progress, step 3750, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 264070024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39266e7c64916445e7e1ffff7265128efcb14a748bf4d12222a6942b9efa8b29
|
| 3 |
size 264070024
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 510816186
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f0e7d5ff1fdaf9d83fb8c2078034a345f95639cc95b7cc4e41a9b5157c9280f
|
| 3 |
size 510816186
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c28553b4def846f33c1be5403cbde93da158c64099cf7041a0cc043a46e7afc1
|
| 3 |
size 14180
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1256
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fee260ff98ca59616802fa3adc6624edb5c906343c2fcd860cb04b54c36a948
|
| 3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.084647297859192,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-2850",
|
| 4 |
-
"epoch": 2.
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2719,6 +2719,119 @@
|
|
| 2719 |
"eval_samples_per_second": 14.759,
|
| 2720 |
"eval_steps_per_second": 14.759,
|
| 2721 |
"step": 3600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2722 |
}
|
| 2723 |
],
|
| 2724 |
"logging_steps": 10,
|
|
@@ -2738,7 +2851,7 @@
|
|
| 2738 |
"attributes": {}
|
| 2739 |
}
|
| 2740 |
},
|
| 2741 |
-
"total_flos": 4.
|
| 2742 |
"train_batch_size": 16,
|
| 2743 |
"trial_name": null,
|
| 2744 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.084647297859192,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-2850",
|
| 4 |
+
"epoch": 2.624212736179146,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 3750,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2719 |
"eval_samples_per_second": 14.759,
|
| 2720 |
"eval_steps_per_second": 14.759,
|
| 2721 |
"step": 3600
|
| 2722 |
+
},
|
| 2723 |
+
{
|
| 2724 |
+
"epoch": 2.526242127361791,
|
| 2725 |
+
"grad_norm": 5.060881614685059,
|
| 2726 |
+
"learning_rate": 1.8575520105593817e-05,
|
| 2727 |
+
"loss": 0.3923,
|
| 2728 |
+
"step": 3610
|
| 2729 |
+
},
|
| 2730 |
+
{
|
| 2731 |
+
"epoch": 2.5332400279916025,
|
| 2732 |
+
"grad_norm": 6.2594709396362305,
|
| 2733 |
+
"learning_rate": 1.8326822041411524e-05,
|
| 2734 |
+
"loss": 0.4288,
|
| 2735 |
+
"step": 3620
|
| 2736 |
+
},
|
| 2737 |
+
{
|
| 2738 |
+
"epoch": 2.540237928621414,
|
| 2739 |
+
"grad_norm": 3.7940304279327393,
|
| 2740 |
+
"learning_rate": 1.807942593751973e-05,
|
| 2741 |
+
"loss": 0.3647,
|
| 2742 |
+
"step": 3630
|
| 2743 |
+
},
|
| 2744 |
+
{
|
| 2745 |
+
"epoch": 2.5472358292512247,
|
| 2746 |
+
"grad_norm": 5.75860595703125,
|
| 2747 |
+
"learning_rate": 1.783334196340331e-05,
|
| 2748 |
+
"loss": 0.3543,
|
| 2749 |
+
"step": 3640
|
| 2750 |
+
},
|
| 2751 |
+
{
|
| 2752 |
+
"epoch": 2.5542337298810356,
|
| 2753 |
+
"grad_norm": 4.745769500732422,
|
| 2754 |
+
"learning_rate": 1.758858023461059e-05,
|
| 2755 |
+
"loss": 0.3768,
|
| 2756 |
+
"step": 3650
|
| 2757 |
+
},
|
| 2758 |
+
{
|
| 2759 |
+
"epoch": 2.561231630510847,
|
| 2760 |
+
"grad_norm": 5.631198406219482,
|
| 2761 |
+
"learning_rate": 1.7345150812337564e-05,
|
| 2762 |
+
"loss": 0.3826,
|
| 2763 |
+
"step": 3660
|
| 2764 |
+
},
|
| 2765 |
+
{
|
| 2766 |
+
"epoch": 2.5682295311406578,
|
| 2767 |
+
"grad_norm": 5.964677333831787,
|
| 2768 |
+
"learning_rate": 1.7103063703014372e-05,
|
| 2769 |
+
"loss": 0.3529,
|
| 2770 |
+
"step": 3670
|
| 2771 |
+
},
|
| 2772 |
+
{
|
| 2773 |
+
"epoch": 2.575227431770469,
|
| 2774 |
+
"grad_norm": 5.345946311950684,
|
| 2775 |
+
"learning_rate": 1.6862328857893854e-05,
|
| 2776 |
+
"loss": 0.3153,
|
| 2777 |
+
"step": 3680
|
| 2778 |
+
},
|
| 2779 |
+
{
|
| 2780 |
+
"epoch": 2.58222533240028,
|
| 2781 |
+
"grad_norm": 4.739876747131348,
|
| 2782 |
+
"learning_rate": 1.66229561726426e-05,
|
| 2783 |
+
"loss": 0.3521,
|
| 2784 |
+
"step": 3690
|
| 2785 |
+
},
|
| 2786 |
+
{
|
| 2787 |
+
"epoch": 2.589223233030091,
|
| 2788 |
+
"grad_norm": 5.451272964477539,
|
| 2789 |
+
"learning_rate": 1.6384955486934156e-05,
|
| 2790 |
+
"loss": 0.3648,
|
| 2791 |
+
"step": 3700
|
| 2792 |
+
},
|
| 2793 |
+
{
|
| 2794 |
+
"epoch": 2.596221133659902,
|
| 2795 |
+
"grad_norm": 5.133406162261963,
|
| 2796 |
+
"learning_rate": 1.614833658404454e-05,
|
| 2797 |
+
"loss": 0.4097,
|
| 2798 |
+
"step": 3710
|
| 2799 |
+
},
|
| 2800 |
+
{
|
| 2801 |
+
"epoch": 2.603219034289713,
|
| 2802 |
+
"grad_norm": 5.587733745574951,
|
| 2803 |
+
"learning_rate": 1.5913109190450032e-05,
|
| 2804 |
+
"loss": 0.3669,
|
| 2805 |
+
"step": 3720
|
| 2806 |
+
},
|
| 2807 |
+
{
|
| 2808 |
+
"epoch": 2.6102169349195243,
|
| 2809 |
+
"grad_norm": 4.743875026702881,
|
| 2810 |
+
"learning_rate": 1.567928297542749e-05,
|
| 2811 |
+
"loss": 0.3723,
|
| 2812 |
+
"step": 3730
|
| 2813 |
+
},
|
| 2814 |
+
{
|
| 2815 |
+
"epoch": 2.617214835549335,
|
| 2816 |
+
"grad_norm": 5.686123847961426,
|
| 2817 |
+
"learning_rate": 1.544686755065677e-05,
|
| 2818 |
+
"loss": 0.3921,
|
| 2819 |
+
"step": 3740
|
| 2820 |
+
},
|
| 2821 |
+
{
|
| 2822 |
+
"epoch": 2.624212736179146,
|
| 2823 |
+
"grad_norm": 6.688653469085693,
|
| 2824 |
+
"learning_rate": 1.5215872469825682e-05,
|
| 2825 |
+
"loss": 0.4218,
|
| 2826 |
+
"step": 3750
|
| 2827 |
+
},
|
| 2828 |
+
{
|
| 2829 |
+
"epoch": 2.624212736179146,
|
| 2830 |
+
"eval_loss": 1.1890102624893188,
|
| 2831 |
+
"eval_runtime": 33.4721,
|
| 2832 |
+
"eval_samples_per_second": 14.938,
|
| 2833 |
+
"eval_steps_per_second": 14.938,
|
| 2834 |
+
"step": 3750
|
| 2835 |
}
|
| 2836 |
],
|
| 2837 |
"logging_steps": 10,
|
|
|
|
| 2851 |
"attributes": {}
|
| 2852 |
}
|
| 2853 |
},
|
| 2854 |
+
"total_flos": 4.372563304182989e+17,
|
| 2855 |
"train_batch_size": 16,
|
| 2856 |
"trial_name": null,
|
| 2857 |
"trial_params": null
|