Training in progress, step 36000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 223144592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2cb8dabf9a7be0aaf98fdca21d8221dc07d657cdc9101214ce22d983671a048
|
| 3 |
size 223144592
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 281574266
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e6212c976ed04ebbd236c6446302224a7aefbce6d3836bd327b9b5c728fad7f
|
| 3 |
size 281574266
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3de3a67d18a339a861c6ed23882a029410556f39281d29ad2ad4c3bc7f88fa73
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3dc7dd0f418853d90ca435bb584a7c3ec5f7940e05d8ede71e5502af2d6925a
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8feb46fcf2857c50ea4a63a9573ecc3d64407b5246f4262683176b26b9e57b06
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 28000,
|
| 3 |
"best_metric": 0.18110816386678455,
|
| 4 |
"best_model_checkpoint": "./distil-whisper/checkpoint-28000",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 1000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2773,6 +2773,85 @@
|
|
| 2773 |
"eval_steps_per_second": 0.427,
|
| 2774 |
"eval_wer": 0.1843501326259947,
|
| 2775 |
"step": 35000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2776 |
}
|
| 2777 |
],
|
| 2778 |
"logging_steps": 100,
|
|
@@ -2792,7 +2871,7 @@
|
|
| 2792 |
"attributes": {}
|
| 2793 |
}
|
| 2794 |
},
|
| 2795 |
-
"total_flos": 4.
|
| 2796 |
"train_batch_size": 8,
|
| 2797 |
"trial_name": null,
|
| 2798 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 28000,
|
| 3 |
"best_metric": 0.18110816386678455,
|
| 4 |
"best_model_checkpoint": "./distil-whisper/checkpoint-28000",
|
| 5 |
+
"epoch": 21.0896309314587,
|
| 6 |
"eval_steps": 1000,
|
| 7 |
+
"global_step": 36000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2773 |
"eval_steps_per_second": 0.427,
|
| 2774 |
"eval_wer": 0.1843501326259947,
|
| 2775 |
"step": 35000
|
| 2776 |
+
},
|
| 2777 |
+
{
|
| 2778 |
+
"epoch": 20.56239015817223,
|
| 2779 |
+
"grad_norm": 6.862576007843018,
|
| 2780 |
+
"learning_rate": 6.668645760241809e-06,
|
| 2781 |
+
"loss": 0.6942,
|
| 2782 |
+
"step": 35100
|
| 2783 |
+
},
|
| 2784 |
+
{
|
| 2785 |
+
"epoch": 20.620972466315173,
|
| 2786 |
+
"grad_norm": 10.506976127624512,
|
| 2787 |
+
"learning_rate": 6.3987693636314575e-06,
|
| 2788 |
+
"loss": 0.6863,
|
| 2789 |
+
"step": 35200
|
| 2790 |
+
},
|
| 2791 |
+
{
|
| 2792 |
+
"epoch": 20.679554774458115,
|
| 2793 |
+
"grad_norm": 6.648481845855713,
|
| 2794 |
+
"learning_rate": 6.128892967021105e-06,
|
| 2795 |
+
"loss": 0.6987,
|
| 2796 |
+
"step": 35300
|
| 2797 |
+
},
|
| 2798 |
+
{
|
| 2799 |
+
"epoch": 20.738137082601053,
|
| 2800 |
+
"grad_norm": 7.607233047485352,
|
| 2801 |
+
"learning_rate": 5.8590165704107524e-06,
|
| 2802 |
+
"loss": 0.689,
|
| 2803 |
+
"step": 35400
|
| 2804 |
+
},
|
| 2805 |
+
{
|
| 2806 |
+
"epoch": 20.796719390743995,
|
| 2807 |
+
"grad_norm": 6.110599040985107,
|
| 2808 |
+
"learning_rate": 5.589140173800399e-06,
|
| 2809 |
+
"loss": 0.7004,
|
| 2810 |
+
"step": 35500
|
| 2811 |
+
},
|
| 2812 |
+
{
|
| 2813 |
+
"epoch": 20.855301698886937,
|
| 2814 |
+
"grad_norm": 5.632733345031738,
|
| 2815 |
+
"learning_rate": 5.319263777190047e-06,
|
| 2816 |
+
"loss": 0.7299,
|
| 2817 |
+
"step": 35600
|
| 2818 |
+
},
|
| 2819 |
+
{
|
| 2820 |
+
"epoch": 20.91388400702988,
|
| 2821 |
+
"grad_norm": 5.644909381866455,
|
| 2822 |
+
"learning_rate": 5.052086144545798e-06,
|
| 2823 |
+
"loss": 0.6749,
|
| 2824 |
+
"step": 35700
|
| 2825 |
+
},
|
| 2826 |
+
{
|
| 2827 |
+
"epoch": 20.972466315172817,
|
| 2828 |
+
"grad_norm": 5.2816386222839355,
|
| 2829 |
+
"learning_rate": 4.782209747935446e-06,
|
| 2830 |
+
"loss": 0.671,
|
| 2831 |
+
"step": 35800
|
| 2832 |
+
},
|
| 2833 |
+
{
|
| 2834 |
+
"epoch": 21.03104862331576,
|
| 2835 |
+
"grad_norm": 4.735965728759766,
|
| 2836 |
+
"learning_rate": 4.512333351325093e-06,
|
| 2837 |
+
"loss": 0.7137,
|
| 2838 |
+
"step": 35900
|
| 2839 |
+
},
|
| 2840 |
+
{
|
| 2841 |
+
"epoch": 21.0896309314587,
|
| 2842 |
+
"grad_norm": 6.491783618927002,
|
| 2843 |
+
"learning_rate": 4.242456954714741e-06,
|
| 2844 |
+
"loss": 0.6927,
|
| 2845 |
+
"step": 36000
|
| 2846 |
+
},
|
| 2847 |
+
{
|
| 2848 |
+
"epoch": 21.0896309314587,
|
| 2849 |
+
"eval_loss": 0.08344998210668564,
|
| 2850 |
+
"eval_runtime": 147.4098,
|
| 2851 |
+
"eval_samples_per_second": 3.392,
|
| 2852 |
+
"eval_steps_per_second": 0.427,
|
| 2853 |
+
"eval_wer": 0.18818155025051578,
|
| 2854 |
+
"step": 36000
|
| 2855 |
}
|
| 2856 |
],
|
| 2857 |
"logging_steps": 100,
|
|
|
|
| 2871 |
"attributes": {}
|
| 2872 |
}
|
| 2873 |
},
|
| 2874 |
+
"total_flos": 4.682615727587328e+19,
|
| 2875 |
"train_batch_size": 8,
|
| 2876 |
"trial_name": null,
|
| 2877 |
"trial_params": null
|