Training in progress, epoch 49, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 223144592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46f26b7c7b98aea584520e7cb43e0c742a2fab82d01bbc510e14658ecd7060c0
|
| 3 |
size 223144592
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 281574266
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9a9b2d74d1927f905921e170cea4cd074caeff6c9fa3e6c6457198a1efed480
|
| 3 |
size 281574266
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb264940fbf03f94d8e47f1772b5a686e13ccf938b92dfbcbf596e8b0d470855
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:130318fbfb45bf27f44198c9d3acab280dc9bda99dd669b2a413ca9672551f5e
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7aba72fb2d8e27204901b5d6a919d20063e1ff32ba82923c6be3270dacccacab
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 17738,
|
| 3 |
"best_metric": 0.2120542292956086,
|
| 4 |
"best_model_checkpoint": "./distil-whisper/checkpoint-17738",
|
| 5 |
-
"epoch": 49.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1882,6 +1882,43 @@
|
|
| 1882 |
"eval_steps_per_second": 0.421,
|
| 1883 |
"eval_wer": 0.2163277335691129,
|
| 1884 |
"step": 19758
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1885 |
}
|
| 1886 |
],
|
| 1887 |
"logging_steps": 100,
|
|
@@ -1896,12 +1933,12 @@
|
|
| 1896 |
"should_evaluate": false,
|
| 1897 |
"should_log": false,
|
| 1898 |
"should_save": true,
|
| 1899 |
-
"should_training_stop":
|
| 1900 |
},
|
| 1901 |
"attributes": {}
|
| 1902 |
}
|
| 1903 |
},
|
| 1904 |
-
"total_flos": 2.
|
| 1905 |
"train_batch_size": 8,
|
| 1906 |
"trial_name": null,
|
| 1907 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 17738,
|
| 3 |
"best_metric": 0.2120542292956086,
|
| 4 |
"best_model_checkpoint": "./distil-whisper/checkpoint-17738",
|
| 5 |
+
"epoch": 49.972101673899566,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 20150,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1882 |
"eval_steps_per_second": 0.421,
|
| 1883 |
"eval_wer": 0.2163277335691129,
|
| 1884 |
"step": 19758
|
| 1885 |
+
},
|
| 1886 |
+
{
|
| 1887 |
+
"epoch": 49.104153750774955,
|
| 1888 |
+
"grad_norm": 19.84515380859375,
|
| 1889 |
+
"learning_rate": 1.8575063613231552e-06,
|
| 1890 |
+
"loss": 1.4737,
|
| 1891 |
+
"step": 19800
|
| 1892 |
+
},
|
| 1893 |
+
{
|
| 1894 |
+
"epoch": 49.3521388716677,
|
| 1895 |
+
"grad_norm": 14.313343048095703,
|
| 1896 |
+
"learning_rate": 1.3486005089058526e-06,
|
| 1897 |
+
"loss": 1.5536,
|
| 1898 |
+
"step": 19900
|
| 1899 |
+
},
|
| 1900 |
+
{
|
| 1901 |
+
"epoch": 49.60012399256045,
|
| 1902 |
+
"grad_norm": 13.140170097351074,
|
| 1903 |
+
"learning_rate": 8.396946564885497e-07,
|
| 1904 |
+
"loss": 1.4557,
|
| 1905 |
+
"step": 20000
|
| 1906 |
+
},
|
| 1907 |
+
{
|
| 1908 |
+
"epoch": 49.84810911345319,
|
| 1909 |
+
"grad_norm": 17.50598907470703,
|
| 1910 |
+
"learning_rate": 3.3078880407124687e-07,
|
| 1911 |
+
"loss": 1.5171,
|
| 1912 |
+
"step": 20100
|
| 1913 |
+
},
|
| 1914 |
+
{
|
| 1915 |
+
"epoch": 49.972101673899566,
|
| 1916 |
+
"eval_loss": 0.10252001881599426,
|
| 1917 |
+
"eval_runtime": 149.0596,
|
| 1918 |
+
"eval_samples_per_second": 3.354,
|
| 1919 |
+
"eval_steps_per_second": 0.423,
|
| 1920 |
+
"eval_wer": 0.2120542292956086,
|
| 1921 |
+
"step": 20150
|
| 1922 |
}
|
| 1923 |
],
|
| 1924 |
"logging_steps": 100,
|
|
|
|
| 1933 |
"should_evaluate": false,
|
| 1934 |
"should_log": false,
|
| 1935 |
"should_save": true,
|
| 1936 |
+
"should_training_stop": true
|
| 1937 |
},
|
| 1938 |
"attributes": {}
|
| 1939 |
}
|
| 1940 |
},
|
| 1941 |
+
"total_flos": 2.61896007057408e+19,
|
| 1942 |
"train_batch_size": 8,
|
| 1943 |
"trial_name": null,
|
| 1944 |
"trial_params": null
|