drmeeseeks
/

whisper-small-amet

@@ -628,12 +628,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
    "metadata": {
     "id": "iN2MgL5gYgmB"
    },
    "outputs": [],
    "source": [
     "!echo 'python run_speech_recognition_seq2seq_streaming.py \\\n",
     "          --model_name_or_path=\"openai/whisper-small\" \\\n",
     "          --dataset_name=\"google/fleurs\" \\\n",
@@ -642,7 +643,7 @@
     "          --train_split_name=\"train+validation\" \\\n",
     "          --eval_split_name=\"test\" \\\n",
     "          --model_index_name=\"Whisper Small Amharic FLEURS\" \\\n",
-    "          --max_steps=\"5000\" \\\n",
     "          --output_dir=\"./whisper-small-amet\" \\\n",
     "          --per_device_train_batch_size=\"64\" \\\n",
     "          --per_device_eval_batch_size=\"32\" \\\n",
@@ -651,9 +652,9 @@
     "          --learning_rate=\"1e-5\" \\\n",
     "          --warmup_steps=\"500\" \\\n",
     "          --evaluation_strategy=\"steps\" \\\n",
-    "          --eval_steps=\"5000\" \\\n",
     "          --save_strategy=\"steps\" \\\n",
-    "          --save_steps=\"5000\" \\\n",
     "          --generation_max_length=\"225\" \\\n",
     "          --length_column_name=\"input_length\" \\\n",
     "          --max_duration_in_seconds=\"30\" \\\n",
@@ -662,22 +663,22 @@
     "          --report_to=\"tensorboard\" \\\n",
     "          --metric_for_best_model=\"wer\" \\\n",
     "          --greater_is_better=\"False\" \\\n",
-    "          --load_best_model_at_end \\\n",
     "          --gradient_checkpointing \\\n",
     "          --fp16 \\\n",
     "          --overwrite_output_dir \\\n",
     "          --do_train \\\n",
-    "          --do_eval=False \\\n",
     "          --predict_with_generate \\\n",
-    "          --do_normalize_eval \\\n",
     "          --use_auth_token \\\n",
     "          --no_streaming \\\n",
-    "          --push_to_hub=True' >> run.sh"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -709,7 +710,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!python run_eval_whisper_streaming.py --model_id=\"openai/whisper-small\" --dataset=\"google/fleurs\" --config=\"am_et\" --device=0 --language=\"am\""
    ]
   },
   {

   },
   {
    "cell_type": "code",
+   "execution_count": 23,
    "metadata": {
     "id": "iN2MgL5gYgmB"
    },
    "outputs": [],
    "source": [
+    "!rm run.sh\n",
     "!echo 'python run_speech_recognition_seq2seq_streaming.py \\\n",
     "          --model_name_or_path=\"openai/whisper-small\" \\\n",
     "          --dataset_name=\"google/fleurs\" \\\n",
     "          --train_split_name=\"train+validation\" \\\n",
     "          --eval_split_name=\"test\" \\\n",
     "          --model_index_name=\"Whisper Small Amharic FLEURS\" \\\n",
+    "          --max_steps=\"1000\" \\\n",
     "          --output_dir=\"./whisper-small-amet\" \\\n",
     "          --per_device_train_batch_size=\"64\" \\\n",
     "          --per_device_eval_batch_size=\"32\" \\\n",
     "          --learning_rate=\"1e-5\" \\\n",
     "          --warmup_steps=\"500\" \\\n",
     "          --evaluation_strategy=\"steps\" \\\n",
+    "          --eval_steps=\"10000\" \\\n",
     "          --save_strategy=\"steps\" \\\n",
+    "          --save_steps=\"100\" \\\n",
     "          --generation_max_length=\"225\" \\\n",
     "          --length_column_name=\"input_length\" \\\n",
     "          --max_duration_in_seconds=\"30\" \\\n",
     "          --report_to=\"tensorboard\" \\\n",
     "          --metric_for_best_model=\"wer\" \\\n",
     "          --greater_is_better=\"False\" \\\n",
+    "          --load_best_model_at_end=\"False\" \\\n",
     "          --gradient_checkpointing \\\n",
     "          --fp16 \\\n",
     "          --overwrite_output_dir \\\n",
     "          --do_train \\\n",
+    "          --do_eval=\"False\" \\\n",
     "          --predict_with_generate \\\n",
+    "          --do_normalize_eval=\"False\" \\\n",
     "          --use_auth_token \\\n",
     "          --no_streaming \\\n",
+    "          --push_to_hub=\"True\"' >> run.sh"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
    "metadata": {},
    "outputs": [],
    "source": [
+    "!python run_eval_whisper_streaming.py --model_id=\"openai/whisper-small\" --dataset=\"google/fleurs\" --config=\"am_et\" --batch_size=32 --max_eval_samples=64 --device=0 --language=\"am\""
    ]
   },
   {