Training in progress, step 400, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4969539560
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67708b17aa8ae31fb85a04177716cbfbb8b7425f052006d954bae0522c6f8bee
|
| 3 |
size 4969539560
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1912795688
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee66416d8bfe4367c4aeb4cf6600df9a1ca0e261e23a542692948c4fb197ec85
|
| 3 |
size 1912795688
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 16,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2888,6 +2888,126 @@
|
|
| 2888 |
"eval_samples_per_second": 17.094,
|
| 2889 |
"eval_steps_per_second": 17.094,
|
| 2890 |
"step": 384
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2891 |
}
|
| 2892 |
],
|
| 2893 |
"logging_steps": 1,
|
|
@@ -2902,12 +3022,12 @@
|
|
| 2902 |
"should_evaluate": false,
|
| 2903 |
"should_log": false,
|
| 2904 |
"should_save": true,
|
| 2905 |
-
"should_training_stop":
|
| 2906 |
},
|
| 2907 |
"attributes": {}
|
| 2908 |
}
|
| 2909 |
},
|
| 2910 |
-
"total_flos": 1.
|
| 2911 |
"train_batch_size": 1,
|
| 2912 |
"trial_name": null,
|
| 2913 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 100.0,
|
| 6 |
"eval_steps": 16,
|
| 7 |
+
"global_step": 400,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2888 |
"eval_samples_per_second": 17.094,
|
| 2889 |
"eval_steps_per_second": 17.094,
|
| 2890 |
"step": 384
|
| 2891 |
+
},
|
| 2892 |
+
{
|
| 2893 |
+
"epoch": 96.2909090909091,
|
| 2894 |
+
"grad_norm": 3.1493375301361084,
|
| 2895 |
+
"learning_rate": 1.0437936906629333e-08,
|
| 2896 |
+
"loss": 0.5571,
|
| 2897 |
+
"step": 385
|
| 2898 |
+
},
|
| 2899 |
+
{
|
| 2900 |
+
"epoch": 96.58181818181818,
|
| 2901 |
+
"grad_norm": 3.1544456481933594,
|
| 2902 |
+
"learning_rate": 1.0384981238178533e-08,
|
| 2903 |
+
"loss": 0.7043,
|
| 2904 |
+
"step": 386
|
| 2905 |
+
},
|
| 2906 |
+
{
|
| 2907 |
+
"epoch": 96.87272727272727,
|
| 2908 |
+
"grad_norm": 3.5196638107299805,
|
| 2909 |
+
"learning_rate": 1.033542317614051e-08,
|
| 2910 |
+
"loss": 0.6956,
|
| 2911 |
+
"step": 387
|
| 2912 |
+
},
|
| 2913 |
+
{
|
| 2914 |
+
"epoch": 97.0,
|
| 2915 |
+
"grad_norm": 2.829664707183838,
|
| 2916 |
+
"learning_rate": 1.0289266494553564e-08,
|
| 2917 |
+
"loss": 0.5839,
|
| 2918 |
+
"step": 388
|
| 2919 |
+
},
|
| 2920 |
+
{
|
| 2921 |
+
"epoch": 97.2909090909091,
|
| 2922 |
+
"grad_norm": 3.240220308303833,
|
| 2923 |
+
"learning_rate": 1.0246514708427701e-08,
|
| 2924 |
+
"loss": 0.629,
|
| 2925 |
+
"step": 389
|
| 2926 |
+
},
|
| 2927 |
+
{
|
| 2928 |
+
"epoch": 97.58181818181818,
|
| 2929 |
+
"grad_norm": 3.419234275817871,
|
| 2930 |
+
"learning_rate": 1.0207171073476952e-08,
|
| 2931 |
+
"loss": 0.7125,
|
| 2932 |
+
"step": 390
|
| 2933 |
+
},
|
| 2934 |
+
{
|
| 2935 |
+
"epoch": 97.87272727272727,
|
| 2936 |
+
"grad_norm": 3.266242742538452,
|
| 2937 |
+
"learning_rate": 1.017123858587145e-08,
|
| 2938 |
+
"loss": 0.7004,
|
| 2939 |
+
"step": 391
|
| 2940 |
+
},
|
| 2941 |
+
{
|
| 2942 |
+
"epoch": 98.0,
|
| 2943 |
+
"grad_norm": 2.8885867595672607,
|
| 2944 |
+
"learning_rate": 1.0138719982009241e-08,
|
| 2945 |
+
"loss": 0.4986,
|
| 2946 |
+
"step": 392
|
| 2947 |
+
},
|
| 2948 |
+
{
|
| 2949 |
+
"epoch": 98.2909090909091,
|
| 2950 |
+
"grad_norm": 3.4574053287506104,
|
| 2951 |
+
"learning_rate": 1.0109617738307912e-08,
|
| 2952 |
+
"loss": 0.7095,
|
| 2953 |
+
"step": 393
|
| 2954 |
+
},
|
| 2955 |
+
{
|
| 2956 |
+
"epoch": 98.58181818181818,
|
| 2957 |
+
"grad_norm": 3.2674267292022705,
|
| 2958 |
+
"learning_rate": 1.0083934071015988e-08,
|
| 2959 |
+
"loss": 0.5806,
|
| 2960 |
+
"step": 394
|
| 2961 |
+
},
|
| 2962 |
+
{
|
| 2963 |
+
"epoch": 98.87272727272727,
|
| 2964 |
+
"grad_norm": 2.897749423980713,
|
| 2965 |
+
"learning_rate": 1.0061670936044179e-08,
|
| 2966 |
+
"loss": 0.6434,
|
| 2967 |
+
"step": 395
|
| 2968 |
+
},
|
| 2969 |
+
{
|
| 2970 |
+
"epoch": 99.0,
|
| 2971 |
+
"grad_norm": 3.9228150844573975,
|
| 2972 |
+
"learning_rate": 1.0042830028816398e-08,
|
| 2973 |
+
"loss": 0.7094,
|
| 2974 |
+
"step": 396
|
| 2975 |
+
},
|
| 2976 |
+
{
|
| 2977 |
+
"epoch": 99.2909090909091,
|
| 2978 |
+
"grad_norm": 2.946876287460327,
|
| 2979 |
+
"learning_rate": 1.002741278414069e-08,
|
| 2980 |
+
"loss": 0.5678,
|
| 2981 |
+
"step": 397
|
| 2982 |
+
},
|
| 2983 |
+
{
|
| 2984 |
+
"epoch": 99.58181818181818,
|
| 2985 |
+
"grad_norm": 2.9825222492218018,
|
| 2986 |
+
"learning_rate": 1.0015420376099922e-08,
|
| 2987 |
+
"loss": 0.6347,
|
| 2988 |
+
"step": 398
|
| 2989 |
+
},
|
| 2990 |
+
{
|
| 2991 |
+
"epoch": 99.87272727272727,
|
| 2992 |
+
"grad_norm": 3.46803879737854,
|
| 2993 |
+
"learning_rate": 1.0006853717962394e-08,
|
| 2994 |
+
"loss": 0.7428,
|
| 2995 |
+
"step": 399
|
| 2996 |
+
},
|
| 2997 |
+
{
|
| 2998 |
+
"epoch": 100.0,
|
| 2999 |
+
"grad_norm": 4.149415969848633,
|
| 3000 |
+
"learning_rate": 1.0001713462112291e-08,
|
| 3001 |
+
"loss": 0.6465,
|
| 3002 |
+
"step": 400
|
| 3003 |
+
},
|
| 3004 |
+
{
|
| 3005 |
+
"epoch": 100.0,
|
| 3006 |
+
"eval_loss": 0.6363555192947388,
|
| 3007 |
+
"eval_runtime": 0.7511,
|
| 3008 |
+
"eval_samples_per_second": 17.308,
|
| 3009 |
+
"eval_steps_per_second": 17.308,
|
| 3010 |
+
"step": 400
|
| 3011 |
}
|
| 3012 |
],
|
| 3013 |
"logging_steps": 1,
|
|
|
|
| 3022 |
"should_evaluate": false,
|
| 3023 |
"should_log": false,
|
| 3024 |
"should_save": true,
|
| 3025 |
+
"should_training_stop": true
|
| 3026 |
},
|
| 3027 |
"attributes": {}
|
| 3028 |
}
|
| 3029 |
},
|
| 3030 |
+
"total_flos": 1.34153286008832e+17,
|
| 3031 |
"train_batch_size": 1,
|
| 3032 |
"trial_name": null,
|
| 3033 |
"trial_params": null
|