Commit ·
5804643
1
Parent(s): 46accd2
Upload whisper_python_am_et.ipynb
Browse files- whisper_python_am_et.ipynb +11 -10
whisper_python_am_et.ipynb
CHANGED
|
@@ -628,12 +628,13 @@
|
|
| 628 |
},
|
| 629 |
{
|
| 630 |
"cell_type": "code",
|
| 631 |
-
"execution_count":
|
| 632 |
"metadata": {
|
| 633 |
"id": "iN2MgL5gYgmB"
|
| 634 |
},
|
| 635 |
"outputs": [],
|
| 636 |
"source": [
|
|
|
|
| 637 |
"!echo 'python run_speech_recognition_seq2seq_streaming.py \\\n",
|
| 638 |
" --model_name_or_path=\"openai/whisper-small\" \\\n",
|
| 639 |
" --dataset_name=\"google/fleurs\" \\\n",
|
|
@@ -642,7 +643,7 @@
|
|
| 642 |
" --train_split_name=\"train+validation\" \\\n",
|
| 643 |
" --eval_split_name=\"test\" \\\n",
|
| 644 |
" --model_index_name=\"Whisper Small Amharic FLEURS\" \\\n",
|
| 645 |
-
" --max_steps=\"
|
| 646 |
" --output_dir=\"./whisper-small-amet\" \\\n",
|
| 647 |
" --per_device_train_batch_size=\"64\" \\\n",
|
| 648 |
" --per_device_eval_batch_size=\"32\" \\\n",
|
|
@@ -651,9 +652,9 @@
|
|
| 651 |
" --learning_rate=\"1e-5\" \\\n",
|
| 652 |
" --warmup_steps=\"500\" \\\n",
|
| 653 |
" --evaluation_strategy=\"steps\" \\\n",
|
| 654 |
-
" --eval_steps=\"
|
| 655 |
" --save_strategy=\"steps\" \\\n",
|
| 656 |
-
" --save_steps=\"
|
| 657 |
" --generation_max_length=\"225\" \\\n",
|
| 658 |
" --length_column_name=\"input_length\" \\\n",
|
| 659 |
" --max_duration_in_seconds=\"30\" \\\n",
|
|
@@ -662,22 +663,22 @@
|
|
| 662 |
" --report_to=\"tensorboard\" \\\n",
|
| 663 |
" --metric_for_best_model=\"wer\" \\\n",
|
| 664 |
" --greater_is_better=\"False\" \\\n",
|
| 665 |
-
" --load_best_model_at_end \\\n",
|
| 666 |
" --gradient_checkpointing \\\n",
|
| 667 |
" --fp16 \\\n",
|
| 668 |
" --overwrite_output_dir \\\n",
|
| 669 |
" --do_train \\\n",
|
| 670 |
-
" --do_eval=False \\\n",
|
| 671 |
" --predict_with_generate \\\n",
|
| 672 |
-
" --do_normalize_eval \\\n",
|
| 673 |
" --use_auth_token \\\n",
|
| 674 |
" --no_streaming \\\n",
|
| 675 |
-
" --push_to_hub=True' >> run.sh"
|
| 676 |
]
|
| 677 |
},
|
| 678 |
{
|
| 679 |
"cell_type": "code",
|
| 680 |
-
"execution_count":
|
| 681 |
"metadata": {
|
| 682 |
"colab": {
|
| 683 |
"base_uri": "https://localhost:8080/"
|
|
@@ -709,7 +710,7 @@
|
|
| 709 |
"metadata": {},
|
| 710 |
"outputs": [],
|
| 711 |
"source": [
|
| 712 |
-
"!python run_eval_whisper_streaming.py --model_id=\"openai/whisper-small\" --dataset=\"google/fleurs\" --config=\"am_et\" --device=0 --language=\"am\""
|
| 713 |
]
|
| 714 |
},
|
| 715 |
{
|
|
|
|
| 628 |
},
|
| 629 |
{
|
| 630 |
"cell_type": "code",
|
| 631 |
+
"execution_count": 23,
|
| 632 |
"metadata": {
|
| 633 |
"id": "iN2MgL5gYgmB"
|
| 634 |
},
|
| 635 |
"outputs": [],
|
| 636 |
"source": [
|
| 637 |
+
"!rm run.sh\n",
|
| 638 |
"!echo 'python run_speech_recognition_seq2seq_streaming.py \\\n",
|
| 639 |
" --model_name_or_path=\"openai/whisper-small\" \\\n",
|
| 640 |
" --dataset_name=\"google/fleurs\" \\\n",
|
|
|
|
| 643 |
" --train_split_name=\"train+validation\" \\\n",
|
| 644 |
" --eval_split_name=\"test\" \\\n",
|
| 645 |
" --model_index_name=\"Whisper Small Amharic FLEURS\" \\\n",
|
| 646 |
+
" --max_steps=\"1000\" \\\n",
|
| 647 |
" --output_dir=\"./whisper-small-amet\" \\\n",
|
| 648 |
" --per_device_train_batch_size=\"64\" \\\n",
|
| 649 |
" --per_device_eval_batch_size=\"32\" \\\n",
|
|
|
|
| 652 |
" --learning_rate=\"1e-5\" \\\n",
|
| 653 |
" --warmup_steps=\"500\" \\\n",
|
| 654 |
" --evaluation_strategy=\"steps\" \\\n",
|
| 655 |
+
" --eval_steps=\"10000\" \\\n",
|
| 656 |
" --save_strategy=\"steps\" \\\n",
|
| 657 |
+
" --save_steps=\"100\" \\\n",
|
| 658 |
" --generation_max_length=\"225\" \\\n",
|
| 659 |
" --length_column_name=\"input_length\" \\\n",
|
| 660 |
" --max_duration_in_seconds=\"30\" \\\n",
|
|
|
|
| 663 |
" --report_to=\"tensorboard\" \\\n",
|
| 664 |
" --metric_for_best_model=\"wer\" \\\n",
|
| 665 |
" --greater_is_better=\"False\" \\\n",
|
| 666 |
+
" --load_best_model_at_end=\"False\" \\\n",
|
| 667 |
" --gradient_checkpointing \\\n",
|
| 668 |
" --fp16 \\\n",
|
| 669 |
" --overwrite_output_dir \\\n",
|
| 670 |
" --do_train \\\n",
|
| 671 |
+
" --do_eval=\"False\" \\\n",
|
| 672 |
" --predict_with_generate \\\n",
|
| 673 |
+
" --do_normalize_eval=\"False\" \\\n",
|
| 674 |
" --use_auth_token \\\n",
|
| 675 |
" --no_streaming \\\n",
|
| 676 |
+
" --push_to_hub=\"True\"' >> run.sh"
|
| 677 |
]
|
| 678 |
},
|
| 679 |
{
|
| 680 |
"cell_type": "code",
|
| 681 |
+
"execution_count": 15,
|
| 682 |
"metadata": {
|
| 683 |
"colab": {
|
| 684 |
"base_uri": "https://localhost:8080/"
|
|
|
|
| 710 |
"metadata": {},
|
| 711 |
"outputs": [],
|
| 712 |
"source": [
|
| 713 |
+
"!python run_eval_whisper_streaming.py --model_id=\"openai/whisper-small\" --dataset=\"google/fleurs\" --config=\"am_et\" --batch_size=32 --max_eval_samples=64 --device=0 --language=\"am\""
|
| 714 |
]
|
| 715 |
},
|
| 716 |
{
|