Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +7 -1057
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 92309112
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5529bbd8b02900353e5a9edb1b0cc3a12d5828ce3583f1e939e8a5cd7869147d
|
| 3 |
size 92309112
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 184765003
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abc81f39457613379a6b53d39a8e9a20485a39bc6441c72daab7e852c4611bd3
|
| 3 |
size 184765003
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96d2f22d26bc65f3aeedce5509461616d5bf62bde9362cbb9270a9fe00a8d63a
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1217dd157c01a1c43f8d1f2eafc858dc7730cb63e7c08068881fa71d637b5c4a
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 1,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2708,1056 +2708,6 @@
|
|
| 2708 |
"eval_samples_per_second": 8.823,
|
| 2709 |
"eval_steps_per_second": 1.176,
|
| 2710 |
"step": 180
|
| 2711 |
-
},
|
| 2712 |
-
{
|
| 2713 |
-
"epoch": 9.526315789473685,
|
| 2714 |
-
"grad_norm": 0.5609657764434814,
|
| 2715 |
-
"learning_rate": 0.0001017050638176612,
|
| 2716 |
-
"loss": 0.0328,
|
| 2717 |
-
"step": 181
|
| 2718 |
-
},
|
| 2719 |
-
{
|
| 2720 |
-
"epoch": 9.526315789473685,
|
| 2721 |
-
"eval_loss": 0.03608579561114311,
|
| 2722 |
-
"eval_runtime": 3.3972,
|
| 2723 |
-
"eval_samples_per_second": 8.831,
|
| 2724 |
-
"eval_steps_per_second": 1.177,
|
| 2725 |
-
"step": 181
|
| 2726 |
-
},
|
| 2727 |
-
{
|
| 2728 |
-
"epoch": 9.578947368421053,
|
| 2729 |
-
"grad_norm": 0.48078685998916626,
|
| 2730 |
-
"learning_rate": 9.902775451932386e-05,
|
| 2731 |
-
"loss": 0.0216,
|
| 2732 |
-
"step": 182
|
| 2733 |
-
},
|
| 2734 |
-
{
|
| 2735 |
-
"epoch": 9.578947368421053,
|
| 2736 |
-
"eval_loss": 0.0358748622238636,
|
| 2737 |
-
"eval_runtime": 3.3946,
|
| 2738 |
-
"eval_samples_per_second": 8.837,
|
| 2739 |
-
"eval_steps_per_second": 1.178,
|
| 2740 |
-
"step": 182
|
| 2741 |
-
},
|
| 2742 |
-
{
|
| 2743 |
-
"epoch": 9.631578947368421,
|
| 2744 |
-
"grad_norm": 0.5188214182853699,
|
| 2745 |
-
"learning_rate": 9.637742601134286e-05,
|
| 2746 |
-
"loss": 0.0438,
|
| 2747 |
-
"step": 183
|
| 2748 |
-
},
|
| 2749 |
-
{
|
| 2750 |
-
"epoch": 9.631578947368421,
|
| 2751 |
-
"eval_loss": 0.03486837074160576,
|
| 2752 |
-
"eval_runtime": 3.3974,
|
| 2753 |
-
"eval_samples_per_second": 8.83,
|
| 2754 |
-
"eval_steps_per_second": 1.177,
|
| 2755 |
-
"step": 183
|
| 2756 |
-
},
|
| 2757 |
-
{
|
| 2758 |
-
"epoch": 9.68421052631579,
|
| 2759 |
-
"grad_norm": 0.7200556993484497,
|
| 2760 |
-
"learning_rate": 9.375455194341214e-05,
|
| 2761 |
-
"loss": 0.0663,
|
| 2762 |
-
"step": 184
|
| 2763 |
-
},
|
| 2764 |
-
{
|
| 2765 |
-
"epoch": 9.68421052631579,
|
| 2766 |
-
"eval_loss": 0.03245267644524574,
|
| 2767 |
-
"eval_runtime": 3.4008,
|
| 2768 |
-
"eval_samples_per_second": 8.822,
|
| 2769 |
-
"eval_steps_per_second": 1.176,
|
| 2770 |
-
"step": 184
|
| 2771 |
-
},
|
| 2772 |
-
{
|
| 2773 |
-
"epoch": 9.736842105263158,
|
| 2774 |
-
"grad_norm": 0.6560045480728149,
|
| 2775 |
-
"learning_rate": 9.11596010587441e-05,
|
| 2776 |
-
"loss": 0.064,
|
| 2777 |
-
"step": 185
|
| 2778 |
-
},
|
| 2779 |
-
{
|
| 2780 |
-
"epoch": 9.736842105263158,
|
| 2781 |
-
"eval_loss": 0.029578620567917824,
|
| 2782 |
-
"eval_runtime": 3.4019,
|
| 2783 |
-
"eval_samples_per_second": 8.819,
|
| 2784 |
-
"eval_steps_per_second": 1.176,
|
| 2785 |
-
"step": 185
|
| 2786 |
-
},
|
| 2787 |
-
{
|
| 2788 |
-
"epoch": 9.789473684210526,
|
| 2789 |
-
"grad_norm": 0.5027221441268921,
|
| 2790 |
-
"learning_rate": 8.85930371102994e-05,
|
| 2791 |
-
"loss": 0.0416,
|
| 2792 |
-
"step": 186
|
| 2793 |
-
},
|
| 2794 |
-
{
|
| 2795 |
-
"epoch": 9.789473684210526,
|
| 2796 |
-
"eval_loss": 0.026809442788362503,
|
| 2797 |
-
"eval_runtime": 3.4005,
|
| 2798 |
-
"eval_samples_per_second": 8.822,
|
| 2799 |
-
"eval_steps_per_second": 1.176,
|
| 2800 |
-
"step": 186
|
| 2801 |
-
},
|
| 2802 |
-
{
|
| 2803 |
-
"epoch": 9.842105263157894,
|
| 2804 |
-
"grad_norm": 0.4124845564365387,
|
| 2805 |
-
"learning_rate": 8.605531877790762e-05,
|
| 2806 |
-
"loss": 0.0335,
|
| 2807 |
-
"step": 187
|
| 2808 |
-
},
|
| 2809 |
-
{
|
| 2810 |
-
"epoch": 9.842105263157894,
|
| 2811 |
-
"eval_loss": 0.02500898391008377,
|
| 2812 |
-
"eval_runtime": 3.4021,
|
| 2813 |
-
"eval_samples_per_second": 8.818,
|
| 2814 |
-
"eval_steps_per_second": 1.176,
|
| 2815 |
-
"step": 187
|
| 2816 |
-
},
|
| 2817 |
-
{
|
| 2818 |
-
"epoch": 9.894736842105264,
|
| 2819 |
-
"grad_norm": 0.5714792013168335,
|
| 2820 |
-
"learning_rate": 8.354689958629513e-05,
|
| 2821 |
-
"loss": 0.0491,
|
| 2822 |
-
"step": 188
|
| 2823 |
-
},
|
| 2824 |
-
{
|
| 2825 |
-
"epoch": 9.894736842105264,
|
| 2826 |
-
"eval_loss": 0.022844497114419937,
|
| 2827 |
-
"eval_runtime": 3.401,
|
| 2828 |
-
"eval_samples_per_second": 8.821,
|
| 2829 |
-
"eval_steps_per_second": 1.176,
|
| 2830 |
-
"step": 188
|
| 2831 |
-
},
|
| 2832 |
-
{
|
| 2833 |
-
"epoch": 9.947368421052632,
|
| 2834 |
-
"grad_norm": 0.48736098408699036,
|
| 2835 |
-
"learning_rate": 8.106822782403376e-05,
|
| 2836 |
-
"loss": 0.018,
|
| 2837 |
-
"step": 189
|
| 2838 |
-
},
|
| 2839 |
-
{
|
| 2840 |
-
"epoch": 9.947368421052632,
|
| 2841 |
-
"eval_loss": 0.021435970440506935,
|
| 2842 |
-
"eval_runtime": 3.4008,
|
| 2843 |
-
"eval_samples_per_second": 8.821,
|
| 2844 |
-
"eval_steps_per_second": 1.176,
|
| 2845 |
-
"step": 189
|
| 2846 |
-
},
|
| 2847 |
-
{
|
| 2848 |
-
"epoch": 10.0,
|
| 2849 |
-
"grad_norm": 0.5927891731262207,
|
| 2850 |
-
"learning_rate": 7.861974646342596e-05,
|
| 2851 |
-
"loss": 0.0388,
|
| 2852 |
-
"step": 190
|
| 2853 |
-
},
|
| 2854 |
-
{
|
| 2855 |
-
"epoch": 10.0,
|
| 2856 |
-
"eval_loss": 0.019742580130696297,
|
| 2857 |
-
"eval_runtime": 3.4006,
|
| 2858 |
-
"eval_samples_per_second": 8.822,
|
| 2859 |
-
"eval_steps_per_second": 1.176,
|
| 2860 |
-
"step": 190
|
| 2861 |
-
},
|
| 2862 |
-
{
|
| 2863 |
-
"epoch": 10.052631578947368,
|
| 2864 |
-
"grad_norm": 0.3376651108264923,
|
| 2865 |
-
"learning_rate": 7.620189308133943e-05,
|
| 2866 |
-
"loss": 0.0196,
|
| 2867 |
-
"step": 191
|
| 2868 |
-
},
|
| 2869 |
-
{
|
| 2870 |
-
"epoch": 10.052631578947368,
|
| 2871 |
-
"eval_loss": 0.018559806048870087,
|
| 2872 |
-
"eval_runtime": 3.388,
|
| 2873 |
-
"eval_samples_per_second": 8.855,
|
| 2874 |
-
"eval_steps_per_second": 1.181,
|
| 2875 |
-
"step": 191
|
| 2876 |
-
},
|
| 2877 |
-
{
|
| 2878 |
-
"epoch": 10.105263157894736,
|
| 2879 |
-
"grad_norm": 0.3613579273223877,
|
| 2880 |
-
"learning_rate": 7.381509978100626e-05,
|
| 2881 |
-
"loss": 0.0172,
|
| 2882 |
-
"step": 192
|
| 2883 |
-
},
|
| 2884 |
-
{
|
| 2885 |
-
"epoch": 10.105263157894736,
|
| 2886 |
-
"eval_loss": 0.017322294414043427,
|
| 2887 |
-
"eval_runtime": 3.3891,
|
| 2888 |
-
"eval_samples_per_second": 8.852,
|
| 2889 |
-
"eval_steps_per_second": 1.18,
|
| 2890 |
-
"step": 192
|
| 2891 |
-
},
|
| 2892 |
-
{
|
| 2893 |
-
"epoch": 10.157894736842104,
|
| 2894 |
-
"grad_norm": 0.2621256411075592,
|
| 2895 |
-
"learning_rate": 7.145979311479986e-05,
|
| 2896 |
-
"loss": 0.0159,
|
| 2897 |
-
"step": 193
|
| 2898 |
-
},
|
| 2899 |
-
{
|
| 2900 |
-
"epoch": 10.157894736842104,
|
| 2901 |
-
"eval_loss": 0.016333211213350296,
|
| 2902 |
-
"eval_runtime": 3.4014,
|
| 2903 |
-
"eval_samples_per_second": 8.82,
|
| 2904 |
-
"eval_steps_per_second": 1.176,
|
| 2905 |
-
"step": 193
|
| 2906 |
-
},
|
| 2907 |
-
{
|
| 2908 |
-
"epoch": 10.210526315789474,
|
| 2909 |
-
"grad_norm": 0.24995078146457672,
|
| 2910 |
-
"learning_rate": 6.913639400800489e-05,
|
| 2911 |
-
"loss": 0.0132,
|
| 2912 |
-
"step": 194
|
| 2913 |
-
},
|
| 2914 |
-
{
|
| 2915 |
-
"epoch": 10.210526315789474,
|
| 2916 |
-
"eval_loss": 0.015769897028803825,
|
| 2917 |
-
"eval_runtime": 3.401,
|
| 2918 |
-
"eval_samples_per_second": 8.821,
|
| 2919 |
-
"eval_steps_per_second": 1.176,
|
| 2920 |
-
"step": 194
|
| 2921 |
-
},
|
| 2922 |
-
{
|
| 2923 |
-
"epoch": 10.263157894736842,
|
| 2924 |
-
"grad_norm": 0.38419196009635925,
|
| 2925 |
-
"learning_rate": 6.684531768359173e-05,
|
| 2926 |
-
"loss": 0.0196,
|
| 2927 |
-
"step": 195
|
| 2928 |
-
},
|
| 2929 |
-
{
|
| 2930 |
-
"epoch": 10.263157894736842,
|
| 2931 |
-
"eval_loss": 0.015028283931314945,
|
| 2932 |
-
"eval_runtime": 3.4,
|
| 2933 |
-
"eval_samples_per_second": 8.824,
|
| 2934 |
-
"eval_steps_per_second": 1.176,
|
| 2935 |
-
"step": 195
|
| 2936 |
-
},
|
| 2937 |
-
{
|
| 2938 |
-
"epoch": 10.31578947368421,
|
| 2939 |
-
"grad_norm": 0.23766584694385529,
|
| 2940 |
-
"learning_rate": 6.458697358801061e-05,
|
| 2941 |
-
"loss": 0.009,
|
| 2942 |
-
"step": 196
|
| 2943 |
-
},
|
| 2944 |
-
{
|
| 2945 |
-
"epoch": 10.31578947368421,
|
| 2946 |
-
"eval_loss": 0.014445771463215351,
|
| 2947 |
-
"eval_runtime": 3.3979,
|
| 2948 |
-
"eval_samples_per_second": 8.829,
|
| 2949 |
-
"eval_steps_per_second": 1.177,
|
| 2950 |
-
"step": 196
|
| 2951 |
-
},
|
| 2952 |
-
{
|
| 2953 |
-
"epoch": 10.368421052631579,
|
| 2954 |
-
"grad_norm": 0.2710660398006439,
|
| 2955 |
-
"learning_rate": 6.236176531801813e-05,
|
| 2956 |
-
"loss": 0.0096,
|
| 2957 |
-
"step": 197
|
| 2958 |
-
},
|
| 2959 |
-
{
|
| 2960 |
-
"epoch": 10.368421052631579,
|
| 2961 |
-
"eval_loss": 0.01395699568092823,
|
| 2962 |
-
"eval_runtime": 3.3981,
|
| 2963 |
-
"eval_samples_per_second": 8.828,
|
| 2964 |
-
"eval_steps_per_second": 1.177,
|
| 2965 |
-
"step": 197
|
| 2966 |
-
},
|
| 2967 |
-
{
|
| 2968 |
-
"epoch": 10.421052631578947,
|
| 2969 |
-
"grad_norm": 0.20278970897197723,
|
| 2970 |
-
"learning_rate": 6.017009054854858e-05,
|
| 2971 |
-
"loss": 0.0087,
|
| 2972 |
-
"step": 198
|
| 2973 |
-
},
|
| 2974 |
-
{
|
| 2975 |
-
"epoch": 10.421052631578947,
|
| 2976 |
-
"eval_loss": 0.013656516559422016,
|
| 2977 |
-
"eval_runtime": 3.4043,
|
| 2978 |
-
"eval_samples_per_second": 8.812,
|
| 2979 |
-
"eval_steps_per_second": 1.175,
|
| 2980 |
-
"step": 198
|
| 2981 |
-
},
|
| 2982 |
-
{
|
| 2983 |
-
"epoch": 10.473684210526315,
|
| 2984 |
-
"grad_norm": 0.3319687247276306,
|
| 2985 |
-
"learning_rate": 5.801234096164468e-05,
|
| 2986 |
-
"loss": 0.016,
|
| 2987 |
-
"step": 199
|
| 2988 |
-
},
|
| 2989 |
-
{
|
| 2990 |
-
"epoch": 10.473684210526315,
|
| 2991 |
-
"eval_loss": 0.012863567098975182,
|
| 2992 |
-
"eval_runtime": 3.403,
|
| 2993 |
-
"eval_samples_per_second": 8.816,
|
| 2994 |
-
"eval_steps_per_second": 1.175,
|
| 2995 |
-
"step": 199
|
| 2996 |
-
},
|
| 2997 |
-
{
|
| 2998 |
-
"epoch": 10.526315789473685,
|
| 2999 |
-
"grad_norm": 0.25473591685295105,
|
| 3000 |
-
"learning_rate": 5.58889021764582e-05,
|
| 3001 |
-
"loss": 0.0105,
|
| 3002 |
-
"step": 200
|
| 3003 |
-
},
|
| 3004 |
-
{
|
| 3005 |
-
"epoch": 10.526315789473685,
|
| 3006 |
-
"eval_loss": 0.012198278680443764,
|
| 3007 |
-
"eval_runtime": 3.3999,
|
| 3008 |
-
"eval_samples_per_second": 8.824,
|
| 3009 |
-
"eval_steps_per_second": 1.177,
|
| 3010 |
-
"step": 200
|
| 3011 |
-
},
|
| 3012 |
-
{
|
| 3013 |
-
"epoch": 10.578947368421053,
|
| 3014 |
-
"grad_norm": 0.3705623745918274,
|
| 3015 |
-
"learning_rate": 5.3800153680334754e-05,
|
| 3016 |
-
"loss": 0.0134,
|
| 3017 |
-
"step": 201
|
| 3018 |
-
},
|
| 3019 |
-
{
|
| 3020 |
-
"epoch": 10.578947368421053,
|
| 3021 |
-
"eval_loss": 0.011488989926874638,
|
| 3022 |
-
"eval_runtime": 3.3917,
|
| 3023 |
-
"eval_samples_per_second": 8.845,
|
| 3024 |
-
"eval_steps_per_second": 1.179,
|
| 3025 |
-
"step": 201
|
| 3026 |
-
},
|
| 3027 |
-
{
|
| 3028 |
-
"epoch": 10.631578947368421,
|
| 3029 |
-
"grad_norm": 0.24455586075782776,
|
| 3030 |
-
"learning_rate": 5.17464687609942e-05,
|
| 3031 |
-
"loss": 0.0112,
|
| 3032 |
-
"step": 202
|
| 3033 |
-
},
|
| 3034 |
-
{
|
| 3035 |
-
"epoch": 10.631578947368421,
|
| 3036 |
-
"eval_loss": 0.010651330463588238,
|
| 3037 |
-
"eval_runtime": 3.3998,
|
| 3038 |
-
"eval_samples_per_second": 8.824,
|
| 3039 |
-
"eval_steps_per_second": 1.177,
|
| 3040 |
-
"step": 202
|
| 3041 |
-
},
|
| 3042 |
-
{
|
| 3043 |
-
"epoch": 10.68421052631579,
|
| 3044 |
-
"grad_norm": 0.2879987955093384,
|
| 3045 |
-
"learning_rate": 4.97282144398192e-05,
|
| 3046 |
-
"loss": 0.0108,
|
| 3047 |
-
"step": 203
|
| 3048 |
-
},
|
| 3049 |
-
{
|
| 3050 |
-
"epoch": 10.68421052631579,
|
| 3051 |
-
"eval_loss": 0.010258635506033897,
|
| 3052 |
-
"eval_runtime": 3.4041,
|
| 3053 |
-
"eval_samples_per_second": 8.813,
|
| 3054 |
-
"eval_steps_per_second": 1.175,
|
| 3055 |
-
"step": 203
|
| 3056 |
-
},
|
| 3057 |
-
{
|
| 3058 |
-
"epoch": 10.736842105263158,
|
| 3059 |
-
"grad_norm": 0.2595934569835663,
|
| 3060 |
-
"learning_rate": 4.7745751406263163e-05,
|
| 3061 |
-
"loss": 0.0116,
|
| 3062 |
-
"step": 204
|
| 3063 |
-
},
|
| 3064 |
-
{
|
| 3065 |
-
"epoch": 10.736842105263158,
|
| 3066 |
-
"eval_loss": 0.009770309552550316,
|
| 3067 |
-
"eval_runtime": 3.4083,
|
| 3068 |
-
"eval_samples_per_second": 8.802,
|
| 3069 |
-
"eval_steps_per_second": 1.174,
|
| 3070 |
-
"step": 204
|
| 3071 |
-
},
|
| 3072 |
-
{
|
| 3073 |
-
"epoch": 10.789473684210526,
|
| 3074 |
-
"grad_norm": 0.3026018738746643,
|
| 3075 |
-
"learning_rate": 4.5799433953390616e-05,
|
| 3076 |
-
"loss": 0.0116,
|
| 3077 |
-
"step": 205
|
| 3078 |
-
},
|
| 3079 |
-
{
|
| 3080 |
-
"epoch": 10.789473684210526,
|
| 3081 |
-
"eval_loss": 0.00936987716704607,
|
| 3082 |
-
"eval_runtime": 3.4054,
|
| 3083 |
-
"eval_samples_per_second": 8.81,
|
| 3084 |
-
"eval_steps_per_second": 1.175,
|
| 3085 |
-
"step": 205
|
| 3086 |
-
},
|
| 3087 |
-
{
|
| 3088 |
-
"epoch": 10.842105263157894,
|
| 3089 |
-
"grad_norm": 0.4068312644958496,
|
| 3090 |
-
"learning_rate": 4.388960991455998e-05,
|
| 3091 |
-
"loss": 0.0109,
|
| 3092 |
-
"step": 206
|
| 3093 |
-
},
|
| 3094 |
-
{
|
| 3095 |
-
"epoch": 10.842105263157894,
|
| 3096 |
-
"eval_loss": 0.008922109380364418,
|
| 3097 |
-
"eval_runtime": 3.4117,
|
| 3098 |
-
"eval_samples_per_second": 8.793,
|
| 3099 |
-
"eval_steps_per_second": 1.172,
|
| 3100 |
-
"step": 206
|
| 3101 |
-
},
|
| 3102 |
-
{
|
| 3103 |
-
"epoch": 10.894736842105264,
|
| 3104 |
-
"grad_norm": 0.3379729688167572,
|
| 3105 |
-
"learning_rate": 4.2016620601260796e-05,
|
| 3106 |
-
"loss": 0.015,
|
| 3107 |
-
"step": 207
|
| 3108 |
-
},
|
| 3109 |
-
{
|
| 3110 |
-
"epoch": 10.894736842105264,
|
| 3111 |
-
"eval_loss": 0.008320866152644157,
|
| 3112 |
-
"eval_runtime": 3.4039,
|
| 3113 |
-
"eval_samples_per_second": 8.813,
|
| 3114 |
-
"eval_steps_per_second": 1.175,
|
| 3115 |
-
"step": 207
|
| 3116 |
-
},
|
| 3117 |
-
{
|
| 3118 |
-
"epoch": 10.947368421052632,
|
| 3119 |
-
"grad_norm": 0.2505350410938263,
|
| 3120 |
-
"learning_rate": 4.0180800742117244e-05,
|
| 3121 |
-
"loss": 0.008,
|
| 3122 |
-
"step": 208
|
| 3123 |
-
},
|
| 3124 |
-
{
|
| 3125 |
-
"epoch": 10.947368421052632,
|
| 3126 |
-
"eval_loss": 0.007898358628153801,
|
| 3127 |
-
"eval_runtime": 3.3962,
|
| 3128 |
-
"eval_samples_per_second": 8.833,
|
| 3129 |
-
"eval_steps_per_second": 1.178,
|
| 3130 |
-
"step": 208
|
| 3131 |
-
},
|
| 3132 |
-
{
|
| 3133 |
-
"epoch": 11.0,
|
| 3134 |
-
"grad_norm": 0.36052215099334717,
|
| 3135 |
-
"learning_rate": 3.838247842306716e-05,
|
| 3136 |
-
"loss": 0.0133,
|
| 3137 |
-
"step": 209
|
| 3138 |
-
},
|
| 3139 |
-
{
|
| 3140 |
-
"epoch": 11.0,
|
| 3141 |
-
"eval_loss": 0.007371651474386454,
|
| 3142 |
-
"eval_runtime": 3.3973,
|
| 3143 |
-
"eval_samples_per_second": 8.831,
|
| 3144 |
-
"eval_steps_per_second": 1.177,
|
| 3145 |
-
"step": 209
|
| 3146 |
-
},
|
| 3147 |
-
{
|
| 3148 |
-
"epoch": 11.052631578947368,
|
| 3149 |
-
"grad_norm": 0.12308855354785919,
|
| 3150 |
-
"learning_rate": 3.662197502872885e-05,
|
| 3151 |
-
"loss": 0.0051,
|
| 3152 |
-
"step": 210
|
| 3153 |
-
},
|
| 3154 |
-
{
|
| 3155 |
-
"epoch": 11.052631578947368,
|
| 3156 |
-
"eval_loss": 0.006998243276029825,
|
| 3157 |
-
"eval_runtime": 3.4004,
|
| 3158 |
-
"eval_samples_per_second": 8.822,
|
| 3159 |
-
"eval_steps_per_second": 1.176,
|
| 3160 |
-
"step": 210
|
| 3161 |
-
},
|
| 3162 |
-
{
|
| 3163 |
-
"epoch": 11.105263157894736,
|
| 3164 |
-
"grad_norm": 0.12299831211566925,
|
| 3165 |
-
"learning_rate": 3.489960518496521e-05,
|
| 3166 |
-
"loss": 0.0065,
|
| 3167 |
-
"step": 211
|
| 3168 |
-
},
|
| 3169 |
-
{
|
| 3170 |
-
"epoch": 11.105263157894736,
|
| 3171 |
-
"eval_loss": 0.006782620679587126,
|
| 3172 |
-
"eval_runtime": 3.4059,
|
| 3173 |
-
"eval_samples_per_second": 8.808,
|
| 3174 |
-
"eval_steps_per_second": 1.174,
|
| 3175 |
-
"step": 211
|
| 3176 |
-
},
|
| 3177 |
-
{
|
| 3178 |
-
"epoch": 11.157894736842104,
|
| 3179 |
-
"grad_norm": 0.12273000180721283,
|
| 3180 |
-
"learning_rate": 3.321567670265568e-05,
|
| 3181 |
-
"loss": 0.0059,
|
| 3182 |
-
"step": 212
|
| 3183 |
-
},
|
| 3184 |
-
{
|
| 3185 |
-
"epoch": 11.157894736842104,
|
| 3186 |
-
"eval_loss": 0.006513877771794796,
|
| 3187 |
-
"eval_runtime": 3.3943,
|
| 3188 |
-
"eval_samples_per_second": 8.838,
|
| 3189 |
-
"eval_steps_per_second": 1.178,
|
| 3190 |
-
"step": 212
|
| 3191 |
-
},
|
| 3192 |
-
{
|
| 3193 |
-
"epoch": 11.210526315789474,
|
| 3194 |
-
"grad_norm": 0.11980213969945908,
|
| 3195 |
-
"learning_rate": 3.157049052268662e-05,
|
| 3196 |
-
"loss": 0.0051,
|
| 3197 |
-
"step": 213
|
| 3198 |
-
},
|
| 3199 |
-
{
|
| 3200 |
-
"epoch": 11.210526315789474,
|
| 3201 |
-
"eval_loss": 0.006208530627191067,
|
| 3202 |
-
"eval_runtime": 3.4058,
|
| 3203 |
-
"eval_samples_per_second": 8.809,
|
| 3204 |
-
"eval_steps_per_second": 1.174,
|
| 3205 |
-
"step": 213
|
| 3206 |
-
},
|
| 3207 |
-
{
|
| 3208 |
-
"epoch": 11.263157894736842,
|
| 3209 |
-
"grad_norm": 0.14820842444896698,
|
| 3210 |
-
"learning_rate": 2.9964340662168772e-05,
|
| 3211 |
-
"loss": 0.005,
|
| 3212 |
-
"step": 214
|
| 3213 |
-
},
|
| 3214 |
-
{
|
| 3215 |
-
"epoch": 11.263157894736842,
|
| 3216 |
-
"eval_loss": 0.006144699640572071,
|
| 3217 |
-
"eval_runtime": 3.4009,
|
| 3218 |
-
"eval_samples_per_second": 8.821,
|
| 3219 |
-
"eval_steps_per_second": 1.176,
|
| 3220 |
-
"step": 214
|
| 3221 |
-
},
|
| 3222 |
-
{
|
| 3223 |
-
"epoch": 11.31578947368421,
|
| 3224 |
-
"grad_norm": 0.09703250229358673,
|
| 3225 |
-
"learning_rate": 2.8397514161892484e-05,
|
| 3226 |
-
"loss": 0.0047,
|
| 3227 |
-
"step": 215
|
| 3228 |
-
},
|
| 3229 |
-
{
|
| 3230 |
-
"epoch": 11.31578947368421,
|
| 3231 |
-
"eval_loss": 0.00596656883135438,
|
| 3232 |
-
"eval_runtime": 3.4079,
|
| 3233 |
-
"eval_samples_per_second": 8.803,
|
| 3234 |
-
"eval_steps_per_second": 1.174,
|
| 3235 |
-
"step": 215
|
| 3236 |
-
},
|
| 3237 |
-
{
|
| 3238 |
-
"epoch": 11.368421052631579,
|
| 3239 |
-
"grad_norm": 0.1398313045501709,
|
| 3240 |
-
"learning_rate": 2.687029103502972e-05,
|
| 3241 |
-
"loss": 0.0058,
|
| 3242 |
-
"step": 216
|
| 3243 |
-
},
|
| 3244 |
-
{
|
| 3245 |
-
"epoch": 11.368421052631579,
|
| 3246 |
-
"eval_loss": 0.0058633070439100266,
|
| 3247 |
-
"eval_runtime": 3.403,
|
| 3248 |
-
"eval_samples_per_second": 8.816,
|
| 3249 |
-
"eval_steps_per_second": 1.175,
|
| 3250 |
-
"step": 216
|
| 3251 |
-
},
|
| 3252 |
-
{
|
| 3253 |
-
"epoch": 11.421052631578947,
|
| 3254 |
-
"grad_norm": 0.12219510972499847,
|
| 3255 |
-
"learning_rate": 2.5382944217091723e-05,
|
| 3256 |
-
"loss": 0.0059,
|
| 3257 |
-
"step": 217
|
| 3258 |
-
},
|
| 3259 |
-
{
|
| 3260 |
-
"epoch": 11.421052631578947,
|
| 3261 |
-
"eval_loss": 0.0056641846895217896,
|
| 3262 |
-
"eval_runtime": 3.4055,
|
| 3263 |
-
"eval_samples_per_second": 8.809,
|
| 3264 |
-
"eval_steps_per_second": 1.175,
|
| 3265 |
-
"step": 217
|
| 3266 |
-
},
|
| 3267 |
-
{
|
| 3268 |
-
"epoch": 11.473684210526315,
|
| 3269 |
-
"grad_norm": 0.10808281600475311,
|
| 3270 |
-
"learning_rate": 2.3935739517151916e-05,
|
| 3271 |
-
"loss": 0.005,
|
| 3272 |
-
"step": 218
|
| 3273 |
-
},
|
| 3274 |
-
{
|
| 3275 |
-
"epoch": 11.473684210526315,
|
| 3276 |
-
"eval_loss": 0.005585065111517906,
|
| 3277 |
-
"eval_runtime": 3.3987,
|
| 3278 |
-
"eval_samples_per_second": 8.827,
|
| 3279 |
-
"eval_steps_per_second": 1.177,
|
| 3280 |
-
"step": 218
|
| 3281 |
-
},
|
| 3282 |
-
{
|
| 3283 |
-
"epoch": 11.526315789473685,
|
| 3284 |
-
"grad_norm": 0.19032533466815948,
|
| 3285 |
-
"learning_rate": 2.2528935570342164e-05,
|
| 3286 |
-
"loss": 0.0063,
|
| 3287 |
-
"step": 219
|
| 3288 |
-
},
|
| 3289 |
-
{
|
| 3290 |
-
"epoch": 11.526315789473685,
|
| 3291 |
-
"eval_loss": 0.005458400584757328,
|
| 3292 |
-
"eval_runtime": 3.4008,
|
| 3293 |
-
"eval_samples_per_second": 8.822,
|
| 3294 |
-
"eval_steps_per_second": 1.176,
|
| 3295 |
-
"step": 219
|
| 3296 |
-
},
|
| 3297 |
-
{
|
| 3298 |
-
"epoch": 11.578947368421053,
|
| 3299 |
-
"grad_norm": 0.09316842257976532,
|
| 3300 |
-
"learning_rate": 2.1162783791631057e-05,
|
| 3301 |
-
"loss": 0.004,
|
| 3302 |
-
"step": 220
|
| 3303 |
-
},
|
| 3304 |
-
{
|
| 3305 |
-
"epoch": 11.578947368421053,
|
| 3306 |
-
"eval_loss": 0.0053214430809021,
|
| 3307 |
-
"eval_runtime": 3.3972,
|
| 3308 |
-
"eval_samples_per_second": 8.831,
|
| 3309 |
-
"eval_steps_per_second": 1.177,
|
| 3310 |
-
"step": 220
|
| 3311 |
-
},
|
| 3312 |
-
{
|
| 3313 |
-
"epoch": 11.631578947368421,
|
| 3314 |
-
"grad_norm": 0.13419128954410553,
|
| 3315 |
-
"learning_rate": 1.9837528330892778e-05,
|
| 3316 |
-
"loss": 0.0053,
|
| 3317 |
-
"step": 221
|
| 3318 |
-
},
|
| 3319 |
-
{
|
| 3320 |
-
"epoch": 11.631578947368421,
|
| 3321 |
-
"eval_loss": 0.00523610832169652,
|
| 3322 |
-
"eval_runtime": 3.3774,
|
| 3323 |
-
"eval_samples_per_second": 8.883,
|
| 3324 |
-
"eval_steps_per_second": 1.184,
|
| 3325 |
-
"step": 221
|
| 3326 |
-
},
|
| 3327 |
-
{
|
| 3328 |
-
"epoch": 11.68421052631579,
|
| 3329 |
-
"grad_norm": 0.1483260989189148,
|
| 3330 |
-
"learning_rate": 1.8553406029274188e-05,
|
| 3331 |
-
"loss": 0.0063,
|
| 3332 |
-
"step": 222
|
| 3333 |
-
},
|
| 3334 |
-
{
|
| 3335 |
-
"epoch": 11.68421052631579,
|
| 3336 |
-
"eval_loss": 0.0051864017732441425,
|
| 3337 |
-
"eval_runtime": 3.3864,
|
| 3338 |
-
"eval_samples_per_second": 8.859,
|
| 3339 |
-
"eval_steps_per_second": 1.181,
|
| 3340 |
-
"step": 222
|
| 3341 |
-
},
|
| 3342 |
-
{
|
| 3343 |
-
"epoch": 11.736842105263158,
|
| 3344 |
-
"grad_norm": 0.15016067028045654,
|
| 3345 |
-
"learning_rate": 1.7310646376867885e-05,
|
| 3346 |
-
"loss": 0.0067,
|
| 3347 |
-
"step": 223
|
| 3348 |
-
},
|
| 3349 |
-
{
|
| 3350 |
-
"epoch": 11.736842105263158,
|
| 3351 |
-
"eval_loss": 0.0051628886722028255,
|
| 3352 |
-
"eval_runtime": 3.399,
|
| 3353 |
-
"eval_samples_per_second": 8.826,
|
| 3354 |
-
"eval_steps_per_second": 1.177,
|
| 3355 |
-
"step": 223
|
| 3356 |
-
},
|
| 3357 |
-
{
|
| 3358 |
-
"epoch": 11.789473684210526,
|
| 3359 |
-
"grad_norm": 0.0965675637125969,
|
| 3360 |
-
"learning_rate": 1.6109471471699556e-05,
|
| 3361 |
-
"loss": 0.0052,
|
| 3362 |
-
"step": 224
|
| 3363 |
-
},
|
| 3364 |
-
{
|
| 3365 |
-
"epoch": 11.789473684210526,
|
| 3366 |
-
"eval_loss": 0.005002335179597139,
|
| 3367 |
-
"eval_runtime": 3.4012,
|
| 3368 |
-
"eval_samples_per_second": 8.82,
|
| 3369 |
-
"eval_steps_per_second": 1.176,
|
| 3370 |
-
"step": 224
|
| 3371 |
-
},
|
| 3372 |
-
{
|
| 3373 |
-
"epoch": 11.842105263157894,
|
| 3374 |
-
"grad_norm": 0.1401059329509735,
|
| 3375 |
-
"learning_rate": 1.4950095980035772e-05,
|
| 3376 |
-
"loss": 0.0055,
|
| 3377 |
-
"step": 225
|
| 3378 |
-
},
|
| 3379 |
-
{
|
| 3380 |
-
"epoch": 11.842105263157894,
|
| 3381 |
-
"eval_loss": 0.004974076058715582,
|
| 3382 |
-
"eval_runtime": 3.4045,
|
| 3383 |
-
"eval_samples_per_second": 8.812,
|
| 3384 |
-
"eval_steps_per_second": 1.175,
|
| 3385 |
-
"step": 225
|
| 3386 |
-
},
|
| 3387 |
-
{
|
| 3388 |
-
"epoch": 11.894736842105264,
|
| 3389 |
-
"grad_norm": 0.08175503462553024,
|
| 3390 |
-
"learning_rate": 1.3832727098020331e-05,
|
| 3391 |
-
"loss": 0.0037,
|
| 3392 |
-
"step": 226
|
| 3393 |
-
},
|
| 3394 |
-
{
|
| 3395 |
-
"epoch": 11.894736842105264,
|
| 3396 |
-
"eval_loss": 0.004897472448647022,
|
| 3397 |
-
"eval_runtime": 3.4065,
|
| 3398 |
-
"eval_samples_per_second": 8.807,
|
| 3399 |
-
"eval_steps_per_second": 1.174,
|
| 3400 |
-
"step": 226
|
| 3401 |
-
},
|
| 3402 |
-
{
|
| 3403 |
-
"epoch": 11.947368421052632,
|
| 3404 |
-
"grad_norm": 0.14667555689811707,
|
| 3405 |
-
"learning_rate": 1.2757564514645492e-05,
|
| 3406 |
-
"loss": 0.0047,
|
| 3407 |
-
"step": 227
|
| 3408 |
-
},
|
| 3409 |
-
{
|
| 3410 |
-
"epoch": 11.947368421052632,
|
| 3411 |
-
"eval_loss": 0.004857571795582771,
|
| 3412 |
-
"eval_runtime": 3.4021,
|
| 3413 |
-
"eval_samples_per_second": 8.818,
|
| 3414 |
-
"eval_steps_per_second": 1.176,
|
| 3415 |
-
"step": 227
|
| 3416 |
-
},
|
| 3417 |
-
{
|
| 3418 |
-
"epoch": 12.0,
|
| 3419 |
-
"grad_norm": 0.07701026648283005,
|
| 3420 |
-
"learning_rate": 1.1724800376064798e-05,
|
| 3421 |
-
"loss": 0.0036,
|
| 3422 |
-
"step": 228
|
| 3423 |
-
},
|
| 3424 |
-
{
|
| 3425 |
-
"epoch": 12.0,
|
| 3426 |
-
"eval_loss": 0.004770983941853046,
|
| 3427 |
-
"eval_runtime": 3.4001,
|
| 3428 |
-
"eval_samples_per_second": 8.823,
|
| 3429 |
-
"eval_steps_per_second": 1.176,
|
| 3430 |
-
"step": 228
|
| 3431 |
-
},
|
| 3432 |
-
{
|
| 3433 |
-
"epoch": 12.052631578947368,
|
| 3434 |
-
"grad_norm": 0.11114013940095901,
|
| 3435 |
-
"learning_rate": 1.0734619251253963e-05,
|
| 3436 |
-
"loss": 0.0057,
|
| 3437 |
-
"step": 229
|
| 3438 |
-
},
|
| 3439 |
-
{
|
| 3440 |
-
"epoch": 12.052631578947368,
|
| 3441 |
-
"eval_loss": 0.004740286152809858,
|
| 3442 |
-
"eval_runtime": 3.4009,
|
| 3443 |
-
"eval_samples_per_second": 8.821,
|
| 3444 |
-
"eval_steps_per_second": 1.176,
|
| 3445 |
-
"step": 229
|
| 3446 |
-
},
|
| 3447 |
-
{
|
| 3448 |
-
"epoch": 12.105263157894736,
|
| 3449 |
-
"grad_norm": 0.07092595100402832,
|
| 3450 |
-
"learning_rate": 9.78719809902598e-06,
|
| 3451 |
-
"loss": 0.0035,
|
| 3452 |
-
"step": 230
|
| 3453 |
-
},
|
| 3454 |
-
{
|
| 3455 |
-
"epoch": 12.105263157894736,
|
| 3456 |
-
"eval_loss": 0.004716214258223772,
|
| 3457 |
-
"eval_runtime": 3.4053,
|
| 3458 |
-
"eval_samples_per_second": 8.81,
|
| 3459 |
-
"eval_steps_per_second": 1.175,
|
| 3460 |
-
"step": 230
|
| 3461 |
-
},
|
| 3462 |
-
{
|
| 3463 |
-
"epoch": 12.157894736842104,
|
| 3464 |
-
"grad_norm": 0.12435787171125412,
|
| 3465 |
-
"learning_rate": 8.882706236405884e-06,
|
| 3466 |
-
"loss": 0.0054,
|
| 3467 |
-
"step": 231
|
| 3468 |
-
},
|
| 3469 |
-
{
|
| 3470 |
-
"epoch": 12.157894736842104,
|
| 3471 |
-
"eval_loss": 0.004733518231660128,
|
| 3472 |
-
"eval_runtime": 3.3993,
|
| 3473 |
-
"eval_samples_per_second": 8.825,
|
| 3474 |
-
"eval_steps_per_second": 1.177,
|
| 3475 |
-
"step": 231
|
| 3476 |
-
},
|
| 3477 |
-
{
|
| 3478 |
-
"epoch": 12.210526315789474,
|
| 3479 |
-
"grad_norm": 0.12049361318349838,
|
| 3480 |
-
"learning_rate": 8.02130530837189e-06,
|
| 3481 |
-
"loss": 0.0053,
|
| 3482 |
-
"step": 232
|
| 3483 |
-
},
|
| 3484 |
-
{
|
| 3485 |
-
"epoch": 12.210526315789474,
|
| 3486 |
-
"eval_loss": 0.004637454636394978,
|
| 3487 |
-
"eval_runtime": 3.4013,
|
| 3488 |
-
"eval_samples_per_second": 8.82,
|
| 3489 |
-
"eval_steps_per_second": 1.176,
|
| 3490 |
-
"step": 232
|
| 3491 |
-
},
|
| 3492 |
-
{
|
| 3493 |
-
"epoch": 12.263157894736842,
|
| 3494 |
-
"grad_norm": 0.06943191587924957,
|
| 3495 |
-
"learning_rate": 7.203149258967034e-06,
|
| 3496 |
-
"loss": 0.0039,
|
| 3497 |
-
"step": 233
|
| 3498 |
-
},
|
| 3499 |
-
{
|
| 3500 |
-
"epoch": 12.263157894736842,
|
| 3501 |
-
"eval_loss": 0.004599397070705891,
|
| 3502 |
-
"eval_runtime": 3.4029,
|
| 3503 |
-
"eval_samples_per_second": 8.816,
|
| 3504 |
-
"eval_steps_per_second": 1.175,
|
| 3505 |
-
"step": 233
|
| 3506 |
-
},
|
| 3507 |
-
{
|
| 3508 |
-
"epoch": 12.31578947368421,
|
| 3509 |
-
"grad_norm": 0.10378482937812805,
|
| 3510 |
-
"learning_rate": 6.428384303787282e-06,
|
| 3511 |
-
"loss": 0.0053,
|
| 3512 |
-
"step": 234
|
| 3513 |
-
},
|
| 3514 |
-
{
|
| 3515 |
-
"epoch": 12.31578947368421,
|
| 3516 |
-
"eval_loss": 0.0046176365576684475,
|
| 3517 |
-
"eval_runtime": 3.4039,
|
| 3518 |
-
"eval_samples_per_second": 8.813,
|
| 3519 |
-
"eval_steps_per_second": 1.175,
|
| 3520 |
-
"step": 234
|
| 3521 |
-
},
|
| 3522 |
-
{
|
| 3523 |
-
"epoch": 12.368421052631579,
|
| 3524 |
-
"grad_norm": 0.08170512318611145,
|
| 3525 |
-
"learning_rate": 5.697148903850868e-06,
|
| 3526 |
-
"loss": 0.0046,
|
| 3527 |
-
"step": 235
|
| 3528 |
-
},
|
| 3529 |
-
{
|
| 3530 |
-
"epoch": 12.368421052631579,
|
| 3531 |
-
"eval_loss": 0.00459822965785861,
|
| 3532 |
-
"eval_runtime": 3.404,
|
| 3533 |
-
"eval_samples_per_second": 8.813,
|
| 3534 |
-
"eval_steps_per_second": 1.175,
|
| 3535 |
-
"step": 235
|
| 3536 |
-
},
|
| 3537 |
-
{
|
| 3538 |
-
"epoch": 12.421052631578947,
|
| 3539 |
-
"grad_norm": 0.09477739036083221,
|
| 3540 |
-
"learning_rate": 5.009573740853312e-06,
|
| 3541 |
-
"loss": 0.0047,
|
| 3542 |
-
"step": 236
|
| 3543 |
-
},
|
| 3544 |
-
{
|
| 3545 |
-
"epoch": 12.421052631578947,
|
| 3546 |
-
"eval_loss": 0.004573486745357513,
|
| 3547 |
-
"eval_runtime": 3.4032,
|
| 3548 |
-
"eval_samples_per_second": 8.815,
|
| 3549 |
-
"eval_steps_per_second": 1.175,
|
| 3550 |
-
"step": 236
|
| 3551 |
-
},
|
| 3552 |
-
{
|
| 3553 |
-
"epoch": 12.473684210526315,
|
| 3554 |
-
"grad_norm": 0.0745476633310318,
|
| 3555 |
-
"learning_rate": 4.365781693813048e-06,
|
| 3556 |
-
"loss": 0.004,
|
| 3557 |
-
"step": 237
|
| 3558 |
-
},
|
| 3559 |
-
{
|
| 3560 |
-
"epoch": 12.473684210526315,
|
| 3561 |
-
"eval_loss": 0.004487224388867617,
|
| 3562 |
-
"eval_runtime": 3.4067,
|
| 3563 |
-
"eval_samples_per_second": 8.806,
|
| 3564 |
-
"eval_steps_per_second": 1.174,
|
| 3565 |
-
"step": 237
|
| 3566 |
-
},
|
| 3567 |
-
{
|
| 3568 |
-
"epoch": 12.526315789473685,
|
| 3569 |
-
"grad_norm": 0.13931944966316223,
|
| 3570 |
-
"learning_rate": 3.765887817111069e-06,
|
| 3571 |
-
"loss": 0.0065,
|
| 3572 |
-
"step": 238
|
| 3573 |
-
},
|
| 3574 |
-
{
|
| 3575 |
-
"epoch": 12.526315789473685,
|
| 3576 |
-
"eval_loss": 0.004524969030171633,
|
| 3577 |
-
"eval_runtime": 3.4058,
|
| 3578 |
-
"eval_samples_per_second": 8.808,
|
| 3579 |
-
"eval_steps_per_second": 1.174,
|
| 3580 |
-
"step": 238
|
| 3581 |
-
},
|
| 3582 |
-
{
|
| 3583 |
-
"epoch": 12.578947368421053,
|
| 3584 |
-
"grad_norm": 0.056376032531261444,
|
| 3585 |
-
"learning_rate": 3.2099993199292688e-06,
|
| 3586 |
-
"loss": 0.0026,
|
| 3587 |
-
"step": 239
|
| 3588 |
-
},
|
| 3589 |
-
{
|
| 3590 |
-
"epoch": 12.578947368421053,
|
| 3591 |
-
"eval_loss": 0.0044847470708191395,
|
| 3592 |
-
"eval_runtime": 3.3996,
|
| 3593 |
-
"eval_samples_per_second": 8.825,
|
| 3594 |
-
"eval_steps_per_second": 1.177,
|
| 3595 |
-
"step": 239
|
| 3596 |
-
},
|
| 3597 |
-
{
|
| 3598 |
-
"epoch": 12.631578947368421,
|
| 3599 |
-
"grad_norm": 0.07375714182853699,
|
| 3600 |
-
"learning_rate": 2.698215547090599e-06,
|
| 3601 |
-
"loss": 0.004,
|
| 3602 |
-
"step": 240
|
| 3603 |
-
},
|
| 3604 |
-
{
|
| 3605 |
-
"epoch": 12.631578947368421,
|
| 3606 |
-
"eval_loss": 0.004458704963326454,
|
| 3607 |
-
"eval_runtime": 3.3998,
|
| 3608 |
-
"eval_samples_per_second": 8.824,
|
| 3609 |
-
"eval_steps_per_second": 1.177,
|
| 3610 |
-
"step": 240
|
| 3611 |
-
},
|
| 3612 |
-
{
|
| 3613 |
-
"epoch": 12.68421052631579,
|
| 3614 |
-
"grad_norm": 0.06447097659111023,
|
| 3615 |
-
"learning_rate": 2.230627961304993e-06,
|
| 3616 |
-
"loss": 0.0032,
|
| 3617 |
-
"step": 241
|
| 3618 |
-
},
|
| 3619 |
-
{
|
| 3620 |
-
"epoch": 12.68421052631579,
|
| 3621 |
-
"eval_loss": 0.0044786701910197735,
|
| 3622 |
-
"eval_runtime": 3.3934,
|
| 3623 |
-
"eval_samples_per_second": 8.841,
|
| 3624 |
-
"eval_steps_per_second": 1.179,
|
| 3625 |
-
"step": 241
|
| 3626 |
-
},
|
| 3627 |
-
{
|
| 3628 |
-
"epoch": 12.736842105263158,
|
| 3629 |
-
"grad_norm": 0.1086612269282341,
|
| 3630 |
-
"learning_rate": 1.807320126823414e-06,
|
| 3631 |
-
"loss": 0.0042,
|
| 3632 |
-
"step": 242
|
| 3633 |
-
},
|
| 3634 |
-
{
|
| 3635 |
-
"epoch": 12.736842105263158,
|
| 3636 |
-
"eval_loss": 0.004519260488450527,
|
| 3637 |
-
"eval_runtime": 3.3977,
|
| 3638 |
-
"eval_samples_per_second": 8.83,
|
| 3639 |
-
"eval_steps_per_second": 1.177,
|
| 3640 |
-
"step": 242
|
| 3641 |
-
},
|
| 3642 |
-
{
|
| 3643 |
-
"epoch": 12.789473684210526,
|
| 3644 |
-
"grad_norm": 0.052398040890693665,
|
| 3645 |
-
"learning_rate": 1.4283676945041346e-06,
|
| 3646 |
-
"loss": 0.0024,
|
| 3647 |
-
"step": 243
|
| 3648 |
-
},
|
| 3649 |
-
{
|
| 3650 |
-
"epoch": 12.789473684210526,
|
| 3651 |
-
"eval_loss": 0.004430453758686781,
|
| 3652 |
-
"eval_runtime": 3.4008,
|
| 3653 |
-
"eval_samples_per_second": 8.821,
|
| 3654 |
-
"eval_steps_per_second": 1.176,
|
| 3655 |
-
"step": 243
|
| 3656 |
-
},
|
| 3657 |
-
{
|
| 3658 |
-
"epoch": 12.842105263157894,
|
| 3659 |
-
"grad_norm": 0.10231564193964005,
|
| 3660 |
-
"learning_rate": 1.0938383882926617e-06,
|
| 3661 |
-
"loss": 0.003,
|
| 3662 |
-
"step": 244
|
| 3663 |
-
},
|
| 3664 |
-
{
|
| 3665 |
-
"epoch": 12.842105263157894,
|
| 3666 |
-
"eval_loss": 0.0044572907499969006,
|
| 3667 |
-
"eval_runtime": 3.4036,
|
| 3668 |
-
"eval_samples_per_second": 8.814,
|
| 3669 |
-
"eval_steps_per_second": 1.175,
|
| 3670 |
-
"step": 244
|
| 3671 |
-
},
|
| 3672 |
-
{
|
| 3673 |
-
"epoch": 12.894736842105264,
|
| 3674 |
-
"grad_norm": 0.1136302798986435,
|
| 3675 |
-
"learning_rate": 8.037919931187243e-07,
|
| 3676 |
-
"loss": 0.0028,
|
| 3677 |
-
"step": 245
|
| 3678 |
-
},
|
| 3679 |
-
{
|
| 3680 |
-
"epoch": 12.894736842105264,
|
| 3681 |
-
"eval_loss": 0.0044529978185892105,
|
| 3682 |
-
"eval_runtime": 3.4025,
|
| 3683 |
-
"eval_samples_per_second": 8.817,
|
| 3684 |
-
"eval_steps_per_second": 1.176,
|
| 3685 |
-
"step": 245
|
| 3686 |
-
},
|
| 3687 |
-
{
|
| 3688 |
-
"epoch": 12.947368421052632,
|
| 3689 |
-
"grad_norm": 0.08841534703969955,
|
| 3690 |
-
"learning_rate": 5.582803442117091e-07,
|
| 3691 |
-
"loss": 0.0034,
|
| 3692 |
-
"step": 246
|
| 3693 |
-
},
|
| 3694 |
-
{
|
| 3695 |
-
"epoch": 12.947368421052632,
|
| 3696 |
-
"eval_loss": 0.004437682218849659,
|
| 3697 |
-
"eval_runtime": 3.3982,
|
| 3698 |
-
"eval_samples_per_second": 8.828,
|
| 3699 |
-
"eval_steps_per_second": 1.177,
|
| 3700 |
-
"step": 246
|
| 3701 |
-
},
|
| 3702 |
-
{
|
| 3703 |
-
"epoch": 13.0,
|
| 3704 |
-
"grad_norm": 0.09434516727924347,
|
| 3705 |
-
"learning_rate": 3.5734731783715333e-07,
|
| 3706 |
-
"loss": 0.0051,
|
| 3707 |
-
"step": 247
|
| 3708 |
-
},
|
| 3709 |
-
{
|
| 3710 |
-
"epoch": 13.0,
|
| 3711 |
-
"eval_loss": 0.0044091795571148396,
|
| 3712 |
-
"eval_runtime": 3.4027,
|
| 3713 |
-
"eval_samples_per_second": 8.817,
|
| 3714 |
-
"eval_steps_per_second": 1.176,
|
| 3715 |
-
"step": 247
|
| 3716 |
-
},
|
| 3717 |
-
{
|
| 3718 |
-
"epoch": 13.052631578947368,
|
| 3719 |
-
"grad_norm": 0.11519359052181244,
|
| 3720 |
-
"learning_rate": 2.0102882345540696e-07,
|
| 3721 |
-
"loss": 0.0041,
|
| 3722 |
-
"step": 248
|
| 3723 |
-
},
|
| 3724 |
-
{
|
| 3725 |
-
"epoch": 13.052631578947368,
|
| 3726 |
-
"eval_loss": 0.004471189342439175,
|
| 3727 |
-
"eval_runtime": 3.3961,
|
| 3728 |
-
"eval_samples_per_second": 8.834,
|
| 3729 |
-
"eval_steps_per_second": 1.178,
|
| 3730 |
-
"step": 248
|
| 3731 |
-
},
|
| 3732 |
-
{
|
| 3733 |
-
"epoch": 13.105263157894736,
|
| 3734 |
-
"grad_norm": 0.054617173969745636,
|
| 3735 |
-
"learning_rate": 8.935279730407086e-08,
|
| 3736 |
-
"loss": 0.0026,
|
| 3737 |
-
"step": 249
|
| 3738 |
-
},
|
| 3739 |
-
{
|
| 3740 |
-
"epoch": 13.105263157894736,
|
| 3741 |
-
"eval_loss": 0.004416502080857754,
|
| 3742 |
-
"eval_runtime": 3.4007,
|
| 3743 |
-
"eval_samples_per_second": 8.822,
|
| 3744 |
-
"eval_steps_per_second": 1.176,
|
| 3745 |
-
"step": 249
|
| 3746 |
-
},
|
| 3747 |
-
{
|
| 3748 |
-
"epoch": 13.157894736842104,
|
| 3749 |
-
"grad_norm": 0.0668402761220932,
|
| 3750 |
-
"learning_rate": 2.2339197405490953e-08,
|
| 3751 |
-
"loss": 0.0035,
|
| 3752 |
-
"step": 250
|
| 3753 |
-
},
|
| 3754 |
-
{
|
| 3755 |
-
"epoch": 13.157894736842104,
|
| 3756 |
-
"eval_loss": 0.004414246417582035,
|
| 3757 |
-
"eval_runtime": 3.3991,
|
| 3758 |
-
"eval_samples_per_second": 8.826,
|
| 3759 |
-
"eval_steps_per_second": 1.177,
|
| 3760 |
-
"step": 250
|
| 3761 |
}
|
| 3762 |
],
|
| 3763 |
"logging_steps": 1,
|
|
@@ -3772,12 +2722,12 @@
|
|
| 3772 |
"should_evaluate": false,
|
| 3773 |
"should_log": false,
|
| 3774 |
"should_save": true,
|
| 3775 |
-
"should_training_stop":
|
| 3776 |
},
|
| 3777 |
"attributes": {}
|
| 3778 |
}
|
| 3779 |
},
|
| 3780 |
-
"total_flos":
|
| 3781 |
"train_batch_size": 1,
|
| 3782 |
"trial_name": null,
|
| 3783 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 180,
|
| 3 |
+
"best_metric": 0.037015657871961594,
|
| 4 |
+
"best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-180",
|
| 5 |
+
"epoch": 9.473684210526315,
|
| 6 |
"eval_steps": 1,
|
| 7 |
+
"global_step": 180,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2708 |
"eval_samples_per_second": 8.823,
|
| 2709 |
"eval_steps_per_second": 1.176,
|
| 2710 |
"step": 180
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2711 |
}
|
| 2712 |
],
|
| 2713 |
"logging_steps": 1,
|
|
|
|
| 2722 |
"should_evaluate": false,
|
| 2723 |
"should_log": false,
|
| 2724 |
"should_save": true,
|
| 2725 |
+
"should_training_stop": false
|
| 2726 |
},
|
| 2727 |
"attributes": {}
|
| 2728 |
}
|
| 2729 |
},
|
| 2730 |
+
"total_flos": 7311440876433408.0,
|
| 2731 |
"train_batch_size": 1,
|
| 2732 |
"trial_name": null,
|
| 2733 |
"trial_params": null
|