Training in progress, step 3000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5294f50c0baae12880a019af7aff602de0021ca7f152917dde18bdf22ba93937
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b7e62ea9f776c4c1909b99bd4b2be719e11c652940cf5f69aa1e349ee14b1fc
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84ddf8abb9ec9ea656e462efff79374386d297b7f7fbb9fe2e12d28f7e1de152
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8b1ff2305e39132563133ecd5a3bc22eb8aea6f062c680ce8b5a9a3f47c0580
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2762,6 +2762,151 @@
|
|
| 2762 |
"EMA_steps_per_second": 19.037,
|
| 2763 |
"epoch": 123.91304347826087,
|
| 2764 |
"step": 2850
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2765 |
}
|
| 2766 |
],
|
| 2767 |
"logging_steps": 10,
|
|
@@ -2781,7 +2926,7 @@
|
|
| 2781 |
"attributes": {}
|
| 2782 |
}
|
| 2783 |
},
|
| 2784 |
-
"total_flos": 7.
|
| 2785 |
"train_batch_size": 4,
|
| 2786 |
"trial_name": null,
|
| 2787 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 130.43478260869566,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 3000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2762 |
"EMA_steps_per_second": 19.037,
|
| 2763 |
"epoch": 123.91304347826087,
|
| 2764 |
"step": 2850
|
| 2765 |
+
},
|
| 2766 |
+
{
|
| 2767 |
+
"epoch": 124.34782608695652,
|
| 2768 |
+
"grad_norm": 1.6300952434539795,
|
| 2769 |
+
"learning_rate": 3.910072953123827e-06,
|
| 2770 |
+
"loss": 0.2256,
|
| 2771 |
+
"step": 2860
|
| 2772 |
+
},
|
| 2773 |
+
{
|
| 2774 |
+
"epoch": 124.78260869565217,
|
| 2775 |
+
"grad_norm": 1.5945820808410645,
|
| 2776 |
+
"learning_rate": 3.910067916178865e-06,
|
| 2777 |
+
"loss": 0.2304,
|
| 2778 |
+
"step": 2870
|
| 2779 |
+
},
|
| 2780 |
+
{
|
| 2781 |
+
"epoch": 125.21739130434783,
|
| 2782 |
+
"grad_norm": 2.0118942260742188,
|
| 2783 |
+
"learning_rate": 3.9100621043246675e-06,
|
| 2784 |
+
"loss": 0.2693,
|
| 2785 |
+
"step": 2880
|
| 2786 |
+
},
|
| 2787 |
+
{
|
| 2788 |
+
"epoch": 125.65217391304348,
|
| 2789 |
+
"grad_norm": 2.1449036598205566,
|
| 2790 |
+
"learning_rate": 3.910055517563539e-06,
|
| 2791 |
+
"loss": 0.2454,
|
| 2792 |
+
"step": 2890
|
| 2793 |
+
},
|
| 2794 |
+
{
|
| 2795 |
+
"epoch": 126.08695652173913,
|
| 2796 |
+
"grad_norm": 2.3814568519592285,
|
| 2797 |
+
"learning_rate": 3.9100481558980905e-06,
|
| 2798 |
+
"loss": 0.2517,
|
| 2799 |
+
"step": 2900
|
| 2800 |
+
},
|
| 2801 |
+
{
|
| 2802 |
+
"epoch": 126.52173913043478,
|
| 2803 |
+
"grad_norm": 1.680646300315857,
|
| 2804 |
+
"learning_rate": 3.91004001933124e-06,
|
| 2805 |
+
"loss": 0.2023,
|
| 2806 |
+
"step": 2910
|
| 2807 |
+
},
|
| 2808 |
+
{
|
| 2809 |
+
"epoch": 126.95652173913044,
|
| 2810 |
+
"grad_norm": 1.567590355873108,
|
| 2811 |
+
"learning_rate": 3.9100311078662124e-06,
|
| 2812 |
+
"loss": 0.2903,
|
| 2813 |
+
"step": 2920
|
| 2814 |
+
},
|
| 2815 |
+
{
|
| 2816 |
+
"epoch": 127.3913043478261,
|
| 2817 |
+
"grad_norm": 2.0478575229644775,
|
| 2818 |
+
"learning_rate": 3.9100214215065405e-06,
|
| 2819 |
+
"loss": 0.2554,
|
| 2820 |
+
"step": 2930
|
| 2821 |
+
},
|
| 2822 |
+
{
|
| 2823 |
+
"epoch": 127.82608695652173,
|
| 2824 |
+
"grad_norm": 2.724403142929077,
|
| 2825 |
+
"learning_rate": 3.910010960256062e-06,
|
| 2826 |
+
"loss": 0.2195,
|
| 2827 |
+
"step": 2940
|
| 2828 |
+
},
|
| 2829 |
+
{
|
| 2830 |
+
"epoch": 128.2608695652174,
|
| 2831 |
+
"grad_norm": 2.3156094551086426,
|
| 2832 |
+
"learning_rate": 3.909999724118925e-06,
|
| 2833 |
+
"loss": 0.2952,
|
| 2834 |
+
"step": 2950
|
| 2835 |
+
},
|
| 2836 |
+
{
|
| 2837 |
+
"epoch": 128.69565217391303,
|
| 2838 |
+
"grad_norm": 1.6324609518051147,
|
| 2839 |
+
"learning_rate": 3.909987713099583e-06,
|
| 2840 |
+
"loss": 0.2409,
|
| 2841 |
+
"step": 2960
|
| 2842 |
+
},
|
| 2843 |
+
{
|
| 2844 |
+
"epoch": 129.1304347826087,
|
| 2845 |
+
"grad_norm": 1.66539466381073,
|
| 2846 |
+
"learning_rate": 3.909974927202796e-06,
|
| 2847 |
+
"loss": 0.2029,
|
| 2848 |
+
"step": 2970
|
| 2849 |
+
},
|
| 2850 |
+
{
|
| 2851 |
+
"epoch": 129.56521739130434,
|
| 2852 |
+
"grad_norm": 1.877989649772644,
|
| 2853 |
+
"learning_rate": 3.909961366433632e-06,
|
| 2854 |
+
"loss": 0.2407,
|
| 2855 |
+
"step": 2980
|
| 2856 |
+
},
|
| 2857 |
+
{
|
| 2858 |
+
"epoch": 130.0,
|
| 2859 |
+
"grad_norm": 5.461711406707764,
|
| 2860 |
+
"learning_rate": 3.909947030797467e-06,
|
| 2861 |
+
"loss": 0.2466,
|
| 2862 |
+
"step": 2990
|
| 2863 |
+
},
|
| 2864 |
+
{
|
| 2865 |
+
"epoch": 130.43478260869566,
|
| 2866 |
+
"grad_norm": 2.4120867252349854,
|
| 2867 |
+
"learning_rate": 3.909931920299982e-06,
|
| 2868 |
+
"loss": 0.2372,
|
| 2869 |
+
"step": 3000
|
| 2870 |
+
},
|
| 2871 |
+
{
|
| 2872 |
+
"epoch": 130.43478260869566,
|
| 2873 |
+
"eval_loss": 0.9418841600418091,
|
| 2874 |
+
"eval_runtime": 0.4153,
|
| 2875 |
+
"eval_samples_per_second": 24.08,
|
| 2876 |
+
"eval_steps_per_second": 24.08,
|
| 2877 |
+
"step": 3000
|
| 2878 |
+
},
|
| 2879 |
+
{
|
| 2880 |
+
"Start_State_loss": 0.861186683177948,
|
| 2881 |
+
"Start_State_runtime": 0.4056,
|
| 2882 |
+
"Start_State_samples_per_second": 24.655,
|
| 2883 |
+
"Start_State_steps_per_second": 24.655,
|
| 2884 |
+
"epoch": 130.43478260869566,
|
| 2885 |
+
"step": 3000
|
| 2886 |
+
},
|
| 2887 |
+
{
|
| 2888 |
+
"Raw_Model_loss": 0.9418841600418091,
|
| 2889 |
+
"Raw_Model_runtime": 0.4028,
|
| 2890 |
+
"Raw_Model_samples_per_second": 24.829,
|
| 2891 |
+
"Raw_Model_steps_per_second": 24.829,
|
| 2892 |
+
"epoch": 130.43478260869566,
|
| 2893 |
+
"step": 3000
|
| 2894 |
+
},
|
| 2895 |
+
{
|
| 2896 |
+
"SWA_loss": 0.7738855481147766,
|
| 2897 |
+
"SWA_runtime": 0.4063,
|
| 2898 |
+
"SWA_samples_per_second": 24.613,
|
| 2899 |
+
"SWA_steps_per_second": 24.613,
|
| 2900 |
+
"epoch": 130.43478260869566,
|
| 2901 |
+
"step": 3000
|
| 2902 |
+
},
|
| 2903 |
+
{
|
| 2904 |
+
"EMA_loss": 0.8603588938713074,
|
| 2905 |
+
"EMA_runtime": 0.4125,
|
| 2906 |
+
"EMA_samples_per_second": 24.244,
|
| 2907 |
+
"EMA_steps_per_second": 24.244,
|
| 2908 |
+
"epoch": 130.43478260869566,
|
| 2909 |
+
"step": 3000
|
| 2910 |
}
|
| 2911 |
],
|
| 2912 |
"logging_steps": 10,
|
|
|
|
| 2926 |
"attributes": {}
|
| 2927 |
}
|
| 2928 |
},
|
| 2929 |
+
"total_flos": 7.725240940312166e+16,
|
| 2930 |
"train_batch_size": 4,
|
| 2931 |
"trial_name": null,
|
| 2932 |
"trial_params": null
|