Training in progress, step 37000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 223144592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82e580b76e08cdfb8448ca6d1803c64c049f1cef861419e8036adf1677312206
|
| 3 |
size 223144592
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 281574266
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51fc0f82c59c2b71d10f095d4baf2da47474fd2fe1bbcbc826cc6c39a81ee2ec
|
| 3 |
size 281574266
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f328fe058de8a228ddebf585a5f24f49207d162996c245c7975352b2646f7ee
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29c8852b8972fe5d4076d17cae40f3bcca1d0af379559b08bc3be9a35e8e8830
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5779eae6320dd873f29bcf99b0bddfe558494d35dfd606a3b158d47caa27415
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 28000,
|
| 3 |
"best_metric": 0.18110816386678455,
|
| 4 |
"best_model_checkpoint": "./distil-whisper/checkpoint-28000",
|
| 5 |
-
"epoch": 21.
|
| 6 |
"eval_steps": 1000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2852,6 +2852,85 @@
|
|
| 2852 |
"eval_steps_per_second": 0.427,
|
| 2853 |
"eval_wer": 0.18818155025051578,
|
| 2854 |
"step": 36000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2855 |
}
|
| 2856 |
],
|
| 2857 |
"logging_steps": 100,
|
|
@@ -2871,7 +2950,7 @@
|
|
| 2871 |
"attributes": {}
|
| 2872 |
}
|
| 2873 |
},
|
| 2874 |
-
"total_flos": 4.
|
| 2875 |
"train_batch_size": 8,
|
| 2876 |
"trial_name": null,
|
| 2877 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 28000,
|
| 3 |
"best_metric": 0.18110816386678455,
|
| 4 |
"best_model_checkpoint": "./distil-whisper/checkpoint-28000",
|
| 5 |
+
"epoch": 21.675454012888107,
|
| 6 |
"eval_steps": 1000,
|
| 7 |
+
"global_step": 37000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2852 |
"eval_steps_per_second": 0.427,
|
| 2853 |
"eval_wer": 0.18818155025051578,
|
| 2854 |
"step": 36000
|
| 2855 |
+
},
|
| 2856 |
+
{
|
| 2857 |
+
"epoch": 21.148213239601642,
|
| 2858 |
+
"grad_norm": 6.068084239959717,
|
| 2859 |
+
"learning_rate": 3.972580558104388e-06,
|
| 2860 |
+
"loss": 0.7155,
|
| 2861 |
+
"step": 36100
|
| 2862 |
+
},
|
| 2863 |
+
{
|
| 2864 |
+
"epoch": 21.20679554774458,
|
| 2865 |
+
"grad_norm": 4.509976863861084,
|
| 2866 |
+
"learning_rate": 3.702704161494036e-06,
|
| 2867 |
+
"loss": 0.6615,
|
| 2868 |
+
"step": 36200
|
| 2869 |
+
},
|
| 2870 |
+
{
|
| 2871 |
+
"epoch": 21.26537785588752,
|
| 2872 |
+
"grad_norm": 6.346072673797607,
|
| 2873 |
+
"learning_rate": 3.4328277648836835e-06,
|
| 2874 |
+
"loss": 0.6682,
|
| 2875 |
+
"step": 36300
|
| 2876 |
+
},
|
| 2877 |
+
{
|
| 2878 |
+
"epoch": 21.323960164030463,
|
| 2879 |
+
"grad_norm": 6.095388889312744,
|
| 2880 |
+
"learning_rate": 3.1629513682733306e-06,
|
| 2881 |
+
"loss": 0.6588,
|
| 2882 |
+
"step": 36400
|
| 2883 |
+
},
|
| 2884 |
+
{
|
| 2885 |
+
"epoch": 21.382542472173405,
|
| 2886 |
+
"grad_norm": 5.079260349273682,
|
| 2887 |
+
"learning_rate": 2.8930749716629784e-06,
|
| 2888 |
+
"loss": 0.67,
|
| 2889 |
+
"step": 36500
|
| 2890 |
+
},
|
| 2891 |
+
{
|
| 2892 |
+
"epoch": 21.441124780316343,
|
| 2893 |
+
"grad_norm": 4.323718547821045,
|
| 2894 |
+
"learning_rate": 2.623198575052626e-06,
|
| 2895 |
+
"loss": 0.6606,
|
| 2896 |
+
"step": 36600
|
| 2897 |
+
},
|
| 2898 |
+
{
|
| 2899 |
+
"epoch": 21.499707088459285,
|
| 2900 |
+
"grad_norm": 5.659894943237305,
|
| 2901 |
+
"learning_rate": 2.3533221784422738e-06,
|
| 2902 |
+
"loss": 0.69,
|
| 2903 |
+
"step": 36700
|
| 2904 |
+
},
|
| 2905 |
+
{
|
| 2906 |
+
"epoch": 21.558289396602227,
|
| 2907 |
+
"grad_norm": 7.08272647857666,
|
| 2908 |
+
"learning_rate": 2.083445781831921e-06,
|
| 2909 |
+
"loss": 0.6506,
|
| 2910 |
+
"step": 36800
|
| 2911 |
+
},
|
| 2912 |
+
{
|
| 2913 |
+
"epoch": 21.61687170474517,
|
| 2914 |
+
"grad_norm": 4.9979119300842285,
|
| 2915 |
+
"learning_rate": 1.8135693852215685e-06,
|
| 2916 |
+
"loss": 0.7215,
|
| 2917 |
+
"step": 36900
|
| 2918 |
+
},
|
| 2919 |
+
{
|
| 2920 |
+
"epoch": 21.675454012888107,
|
| 2921 |
+
"grad_norm": 6.872857093811035,
|
| 2922 |
+
"learning_rate": 1.5436929886112161e-06,
|
| 2923 |
+
"loss": 0.7014,
|
| 2924 |
+
"step": 37000
|
| 2925 |
+
},
|
| 2926 |
+
{
|
| 2927 |
+
"epoch": 21.675454012888107,
|
| 2928 |
+
"eval_loss": 0.08349551260471344,
|
| 2929 |
+
"eval_runtime": 147.5946,
|
| 2930 |
+
"eval_samples_per_second": 3.388,
|
| 2931 |
+
"eval_steps_per_second": 0.427,
|
| 2932 |
+
"eval_wer": 0.1861184792219275,
|
| 2933 |
+
"step": 37000
|
| 2934 |
}
|
| 2935 |
],
|
| 2936 |
"logging_steps": 100,
|
|
|
|
| 2950 |
"attributes": {}
|
| 2951 |
}
|
| 2952 |
},
|
| 2953 |
+
"total_flos": 4.812697871843328e+19,
|
| 2954 |
"train_batch_size": 8,
|
| 2955 |
"trial_name": null,
|
| 2956 |
"trial_params": null
|