Training in progress, step 2000

Browse files

Files changed (5) hide show

asr.ipynb +19 -235
generation_config.json +263 -0
model.safetensors +1 -1
runs/Dec07_01-11-40_smurf/events.out.tfevents.1701904301.smurf.965337.0 +3 -0
training_args.bin +1 -1

asr.ipynb CHANGED Viewed

@@ -5,17 +5,10 @@
    "execution_count": 1,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The history saving thread hit an unexpected error (OperationalError('disk I/O error')).History will not be written to the database.\n"
-     ]
-    },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "05a1ccb40c874574a7deb70f8d70f58f",
        "version_major": 2,
        "version_minor": 0
       },
@@ -370,16 +363,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
     "from transformers import Seq2SeqTrainingArguments\n",
     "\n",
     "training_args = Seq2SeqTrainingArguments(\n",
-    "    output_dir=\"./\",  # change to a repo name of your choice\n",
     "    per_device_train_batch_size=4,\n",
-    "    gradient_accumulation_steps=1,  # increase by 2x for every 2x decrease in batch size\n",
     "    learning_rate=1e-5,\n",
     "    warmup_steps=500,\n",
     "    max_steps=1000,\n",
@@ -389,8 +382,8 @@
     "    per_device_eval_batch_size=2,\n",
     "    predict_with_generate=True,\n",
     "    generation_max_length=225,\n",
-    "    save_steps=400,\n",
-    "    eval_steps=200,\n",
     "    logging_steps=25,\n",
     "    report_to=[\"tensorboard\"],\n",
     "    load_best_model_at_end=True,\n",
@@ -402,7 +395,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -421,25 +414,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "ename": "ValueError",
-     "evalue": "Can't find a valid checkpoint at ./checkpoint-800",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[1;32m/u/11/zhangz13/unix/zipei/ID2223_NEW/ID2223_TopGaming/Lab2/whisper_hi_test/asr.ipynb Cell 19\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2Bwake/u/11/zhangz13/unix/zipei/ID2223_NEW/ID2223_TopGaming/Lab2/whisper_hi_test/asr.ipynb#X24sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m trainer\u001b[39m.\u001b[39;49mtrain(resume_from_checkpoint\u001b[39m=\u001b[39;49m \u001b[39mTrue\u001b[39;49;00m)\n",
-      "File \u001b[0;32m~/.conda/envs/id23/lib/python3.8/site-packages/transformers/trainer.py:1531\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1523\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNo valid checkpoint found in output directory (\u001b[39m\u001b[39m{\u001b[39;00margs\u001b[39m.\u001b[39moutput_dir\u001b[39m}\u001b[39;00m\u001b[39m)\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m   1525\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[1;32m   1526\u001b[0m     resume_from_checkpoint \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m   1527\u001b[0m     \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m is_sagemaker_mp_enabled()\n\u001b[1;32m   1528\u001b[0m     \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_deepspeed_enabled\n\u001b[1;32m   1529\u001b[0m     \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_fsdp_enabled\n\u001b[1;32m   1530\u001b[0m ):\n\u001b[0;32m-> 1531\u001b[0m     \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_load_from_checkpoint(resume_from_checkpoint)\n\u001b[1;32m   1533\u001b[0m \u001b[39m# If model was re-initialized, put it on the right device and update self.model_wrapped\u001b[39;00m\n\u001b[1;32m   1534\u001b[0m \u001b[39mif\u001b[39;00m model_reloaded:\n",
-      "File \u001b[0;32m~/.conda/envs/id23/lib/python3.8/site-packages/transformers/trainer.py:2064\u001b[0m, in \u001b[0;36mTrainer._load_from_checkpoint\u001b[0;34m(self, resume_from_checkpoint, model)\u001b[0m\n\u001b[1;32m   2048\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mCheckpoint found at \u001b[39m\u001b[39m{\u001b[39;00mresume_from_checkpoint\u001b[39m}\u001b[39;00m\u001b[39m is only supported when using PyTorch FSDP\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m   2050\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\n\u001b[1;32m   2051\u001b[0m     \u001b[39many\u001b[39m(\n\u001b[1;32m   2052\u001b[0m         os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39misfile(f)\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   2062\u001b[0m     \u001b[39mor\u001b[39;00m is_fsdp_ckpt\n\u001b[1;32m   2063\u001b[0m ):\n\u001b[0;32m-> 2064\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mCan\u001b[39m\u001b[39m'\u001b[39m\u001b[39mt find a valid checkpoint at \u001b[39m\u001b[39m{\u001b[39;00mresume_from_checkpoint\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[1;32m   2066\u001b[0m logger\u001b[39m.\u001b[39minfo(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mLoading model from \u001b[39m\u001b[39m{\u001b[39;00mresume_from_checkpoint\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m   2068\u001b[0m \u001b[39mif\u001b[39;00m os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39misfile(config_file):\n",
-      "\u001b[0;31mValueError\u001b[0m: Can't find a valid checkpoint at ./checkpoint-800"
      ]
     }
    ],
    "source": [
-    "trainer.train(resume_from_checkpoint= True)"
    ]
   },
   {
@@ -447,99 +439,6 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('whisper-small-hi/tokenizer_config.json',\n",
-       " 'whisper-small-hi/special_tokens_map.json',\n",
-       " 'whisper-small-hi/vocab.json',\n",
-       " 'whisper-small-hi/merges.txt',\n",
-       " 'whisper-small-hi/normalizer.json',\n",
-       " 'whisper-small-hi/added_tokens.json')"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "save_directory = 'zipei/ID2223_NEW/ID2223_TopGaming/Lab2/whisper_hi_test/whisper-small-hi'\n",
-    "tokenizer.save_pretrained('whisper-small-hi')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "84635a07e5714975a3b3e597745d4475",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model.safetensors:   0%|          | 0.00/967M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b452f2feb12346cfb790a8503ddcf338",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "30ea510a97454ff1bbb2cb3ad89e6a5e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "training_args.bin:   0%|          | 0.00/4.73k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'https://huggingface.co/Zipei-KTH/whisper-small-hi/tree/main/'"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "kwargs = {\n",
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
@@ -559,129 +458,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "42153c06c029428e965a889029e38309",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model.safetensors:   0%|          | 0.00/967M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "eb251857ff3d4ab18e5c58e82735d97d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "preprocessor_config.json:   0%|          | 0.00/339 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "84d90e9074864b3ab818e6293595882e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer_config.json:   0%|          | 0.00/283k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ea9fda628bb44dadadc000213fe5ee03",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8d6eb5580e334be6aadc646e59f467b1",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "normalizer.json:   0%|          | 0.00/52.7k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "68286349166b4fd4bfba369fb8e9540a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "added_tokens.json:   0%|          | 0.00/34.6k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "86bd824130ab4f85af758b62f365f739",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "special_tokens_map.json:   0%|          | 0.00/2.19k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "ename": "TypeError",
-     "evalue": "expected str, bytes or os.PathLike object, not NoneType",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[1;32m/u/11/zhangz13/unix/zipei/ID2223_NEW/ID2223_TopGaming/Lab2/whisper_hi_test/whisper-small-hi/asr.ipynb Cell 24\u001b[0m line \u001b[0;36m4\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2Bwake/u/11/zhangz13/unix/zipei/ID2223_NEW/ID2223_TopGaming/Lab2/whisper_hi_test/whisper-small-hi/asr.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m WhisperForConditionalGeneration, WhisperProcessor\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2Bwake/u/11/zhangz13/unix/zipei/ID2223_NEW/ID2223_TopGaming/Lab2/whisper_hi_test/whisper-small-hi/asr.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a>\u001b[0m model \u001b[39m=\u001b[39m WhisperForConditionalGeneration\u001b[39m.\u001b[39mfrom_pretrained(\u001b[39m\"\u001b[39m\u001b[39mZipei-KTH/whisper_hi_test\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2Bwake/u/11/zhangz13/unix/zipei/ID2223_NEW/ID2223_TopGaming/Lab2/whisper_hi_test/whisper-small-hi/asr.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a>\u001b[0m processor \u001b[39m=\u001b[39m WhisperProcessor\u001b[39m.\u001b[39;49mfrom_pretrained(\u001b[39m\"\u001b[39;49m\u001b[39mZipei-KTH/whisper_hi_test\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n",
-      "File \u001b[0;32m~/.conda/envs/id23/lib/python3.8/site-packages/transformers/processing_utils.py:228\u001b[0m, in \u001b[0;36mProcessorMixin.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)\u001b[0m\n\u001b[1;32m    225\u001b[0m \u001b[39mif\u001b[39;00m token \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    226\u001b[0m     kwargs[\u001b[39m\"\u001b[39m\u001b[39mtoken\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m token\n\u001b[0;32m--> 228\u001b[0m args \u001b[39m=\u001b[39m \u001b[39mcls\u001b[39;49m\u001b[39m.\u001b[39;49m_get_arguments_from_pretrained(pretrained_model_name_or_path, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m    229\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m(\u001b[39m*\u001b[39margs)\n",
-      "File \u001b[0;32m~/.conda/envs/id23/lib/python3.8/site-packages/transformers/processing_utils.py:272\u001b[0m, in \u001b[0;36mProcessorMixin._get_arguments_from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[1;32m    269\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    270\u001b[0m         attribute_class \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(transformers_module, class_name)\n\u001b[0;32m--> 272\u001b[0m     args\u001b[39m.\u001b[39mappend(attribute_class\u001b[39m.\u001b[39;49mfrom_pretrained(pretrained_model_name_or_path, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs))\n\u001b[1;32m    273\u001b[0m \u001b[39mreturn\u001b[39;00m args\n",
-      "File \u001b[0;32m~/.conda/envs/id23/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:2024\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)\u001b[0m\n\u001b[1;32m   2021\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[1;32m   2022\u001b[0m         logger\u001b[39m.\u001b[39minfo(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mloading file \u001b[39m\u001b[39m{\u001b[39;00mfile_path\u001b[39m}\u001b[39;00m\u001b[39m from cache at \u001b[39m\u001b[39m{\u001b[39;00mresolved_vocab_files[file_id]\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[0;32m-> 2024\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39;49m\u001b[39m.\u001b[39;49m_from_pretrained(\n\u001b[1;32m   2025\u001b[0m     resolved_vocab_files,\n\u001b[1;32m   2026\u001b[0m     pretrained_model_name_or_path,\n\u001b[1;32m   2027\u001b[0m     init_configuration,\n\u001b[1;32m   2028\u001b[0m     \u001b[39m*\u001b[39;49minit_inputs,\n\u001b[1;32m   2029\u001b[0m     token\u001b[39m=\u001b[39;49mtoken,\n\u001b[1;32m   2030\u001b[0m     cache_dir\u001b[39m=\u001b[39;49mcache_dir,\n\u001b[1;32m   2031\u001b[0m     local_files_only\u001b[39m=\u001b[39;49mlocal_files_only,\n\u001b[1;32m   2032\u001b[0m     _commit_hash\u001b[39m=\u001b[39;49mcommit_hash,\n\u001b[1;32m   2033\u001b[0m     _is_local\u001b[39m=\u001b[39;49mis_local,\n\u001b[1;32m   2034\u001b[0m     \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs,\n\u001b[1;32m   2035\u001b[0m )\n",
-      "File \u001b[0;32m~/.conda/envs/id23/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:2256\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase._from_pretrained\u001b[0;34m(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)\u001b[0m\n\u001b[1;32m   2254\u001b[0m \u001b[39m# Instantiate the tokenizer.\u001b[39;00m\n\u001b[1;32m   2255\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 2256\u001b[0m     tokenizer \u001b[39m=\u001b[39m \u001b[39mcls\u001b[39;49m(\u001b[39m*\u001b[39;49minit_inputs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49minit_kwargs)\n\u001b[1;32m   2257\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mOSError\u001b[39;00m:\n\u001b[1;32m   2258\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mOSError\u001b[39;00m(\n\u001b[1;32m   2259\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39mUnable to load vocabulary from file. \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m   2260\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39mPlease check that the provided vocabulary is accessible and not corrupted.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m   2261\u001b[0m     )\n",
-      "File \u001b[0;32m~/.conda/envs/id23/lib/python3.8/site-packages/transformers/models/whisper/tokenization_whisper.py:304\u001b[0m, in \u001b[0;36mWhisperTokenizer.__init__\u001b[0;34m(self, vocab_file, merges_file, normalizer_file, errors, unk_token, bos_token, eos_token, pad_token, add_prefix_space, language, task, predict_timestamps, **kwargs)\u001b[0m\n\u001b[1;32m    302\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbyte_encoder \u001b[39m=\u001b[39m bytes_to_unicode()\n\u001b[1;32m    303\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbyte_decoder \u001b[39m=\u001b[39m {v: k \u001b[39mfor\u001b[39;00m k, v \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbyte_encoder\u001b[39m.\u001b[39mitems()}\n\u001b[0;32m--> 304\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39;49m(merges_file, encoding\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mutf-8\u001b[39;49m\u001b[39m\"\u001b[39;49m) \u001b[39mas\u001b[39;00m merges_handle:\n\u001b[1;32m    305\u001b[0m     bpe_merges \u001b[39m=\u001b[39m merges_handle\u001b[39m.\u001b[39mread()\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m)[\u001b[39m1\u001b[39m:\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n\u001b[1;32m    306\u001b[0m bpe_merges \u001b[39m=\u001b[39m [\u001b[39mtuple\u001b[39m(merge\u001b[39m.\u001b[39msplit()) \u001b[39mfor\u001b[39;00m merge \u001b[39min\u001b[39;00m bpe_merges]\n",
-      "\u001b[0;31mTypeError\u001b[0m: expected str, bytes or os.PathLike object, not NoneType"
-     ]
-    }
-   ],
    "source": [
     "from transformers import WhisperForConditionalGeneration, WhisperProcessor\n",
     "\n",
-    "model = WhisperForConditionalGeneration.from_pretrained(\"Zipei-KTH/whisper_hi_test\")\n",
-    "processor = WhisperProcessor.from_pretrained(\"Zipei-KTH/whisper_hi_test\")\n"
    ]
   },
   {
@@ -727,9 +511,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "id23kernel",
    "language": "python",
-   "name": "id23kernel"
   },
   "language_info": {
    "codemirror_mode": {

    "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "442a2279299a4727a8f0fcf086cdd356",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
     "from transformers import Seq2SeqTrainingArguments\n",
     "\n",
     "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=\"./whisper-small-hi\",  # change to a repo name of your choice\n",
     "    per_device_train_batch_size=4,\n",
+    "    gradient_accumulation_steps=4,  # increase by 2x for every 2x decrease in batch size\n",
     "    learning_rate=1e-5,\n",
     "    warmup_steps=500,\n",
     "    max_steps=1000,\n",
     "    per_device_eval_batch_size=2,\n",
     "    predict_with_generate=True,\n",
     "    generation_max_length=225,\n",
+    "    save_steps=500,\n",
+    "    eval_steps=500,\n",
     "    logging_steps=25,\n",
     "    report_to=[\"tensorboard\"],\n",
     "    load_best_model_at_end=True,\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "ename": "ValueError",
+     "evalue": "No valid checkpoint found in output directory (./whisper-small-hi)",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[1;32m/u/11/zhangz13/unix/zipei/ID2223_NEW/ID2223_TopGaming/Lab2/whisper_hi_test/asr.ipynb Cell 19\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2Bsmurf/u/11/zhangz13/unix/zipei/ID2223_NEW/ID2223_TopGaming/Lab2/whisper_hi_test/asr.ipynb#X24sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m trainer\u001b[39m.\u001b[39;49mtrain(resume_from_checkpoint\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n",
+      "File \u001b[0;32m~/.conda/envs/id23/lib/python3.8/site-packages/transformers/trainer.py:1523\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1521\u001b[0m     resume_from_checkpoint \u001b[39m=\u001b[39m get_last_checkpoint(args\u001b[39m.\u001b[39moutput_dir)\n\u001b[1;32m   1522\u001b[0m     \u001b[39mif\u001b[39;00m resume_from_checkpoint \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1523\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNo valid checkpoint found in output directory (\u001b[39m\u001b[39m{\u001b[39;00margs\u001b[39m.\u001b[39moutput_dir\u001b[39m}\u001b[39;00m\u001b[39m)\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m   1525\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[1;32m   1526\u001b[0m     resume_from_checkpoint \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m   1527\u001b[0m     \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m is_sagemaker_mp_enabled()\n\u001b[1;32m   1528\u001b[0m     \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_deepspeed_enabled\n\u001b[1;32m   1529\u001b[0m     \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_fsdp_enabled\n\u001b[1;32m   1530\u001b[0m ):\n\u001b[1;32m   1531\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_load_from_checkpoint(resume_from_checkpoint)\n",
+      "\u001b[0;31mValueError\u001b[0m: No valid checkpoint found in output directory (./whisper-small-hi)"
      ]
     }
    ],
    "source": [
+    "trainer.train(resume_from_checkpoint=True)"
    ]
   },
   {
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "kwargs = {\n",
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "from transformers import WhisperForConditionalGeneration, WhisperProcessor\n",
     "\n",
+    "model = WhisperForConditionalGeneration.from_pretrained(\"Zipei-KTH/whisper-small-hi\")\n",
+    "processor = WhisperProcessor.from_pretrained(\"Zipei-KTH/whisper-small-hi\")\n"
    ]
   },
   {
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "dladenv",
    "language": "python",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {

generation_config.json ADDED Viewed

	@@ -0,0 +1,263 @@

+{
+  "alignment_heads": [
+    [
+      5,
+      3
+    ],
+    [
+      5,
+      9
+    ],
+    [
+      8,
+      0
+    ],
+    [
+      8,
+      4
+    ],
+    [
+      8,
+      7
+    ],
+    [
+      8,
+      8
+    ],
+    [
+      9,
+      0
+    ],
+    [
+      9,
+      7
+    ],
+    [
+      9,
+      9
+    ],
+    [
+      10,
+      5
+    ]
+  ],
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "decoder_start_token_id": 50258,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": [
+    [
+      1,
+      null
+    ],
+    [
+      2,
+      50359
+    ]
+  ],
+  "is_multilingual": true,
+  "lang_to_id": {
+    "<|af|>": 50327,
+    "<|am|>": 50334,
+    "<|ar|>": 50272,
+    "<|as|>": 50350,
+    "<|az|>": 50304,
+    "<|ba|>": 50355,
+    "<|be|>": 50330,
+    "<|bg|>": 50292,
+    "<|bn|>": 50302,
+    "<|bo|>": 50347,
+    "<|br|>": 50309,
+    "<|bs|>": 50315,
+    "<|ca|>": 50270,
+    "<|cs|>": 50283,
+    "<|cy|>": 50297,
+    "<|da|>": 50285,
+    "<|de|>": 50261,
+    "<|el|>": 50281,
+    "<|en|>": 50259,
+    "<|es|>": 50262,
+    "<|et|>": 50307,
+    "<|eu|>": 50310,
+    "<|fa|>": 50300,
+    "<|fi|>": 50277,
+    "<|fo|>": 50338,
+    "<|fr|>": 50265,
+    "<|gl|>": 50319,
+    "<|gu|>": 50333,
+    "<|haw|>": 50352,
+    "<|ha|>": 50354,
+    "<|he|>": 50279,
+    "<|hi|>": 50276,
+    "<|hr|>": 50291,
+    "<|ht|>": 50339,
+    "<|hu|>": 50286,
+    "<|hy|>": 50312,
+    "<|id|>": 50275,
+    "<|is|>": 50311,
+    "<|it|>": 50274,
+    "<|ja|>": 50266,
+    "<|jw|>": 50356,
+    "<|ka|>": 50329,
+    "<|kk|>": 50316,
+    "<|km|>": 50323,
+    "<|kn|>": 50306,
+    "<|ko|>": 50264,
+    "<|la|>": 50294,
+    "<|lb|>": 50345,
+    "<|ln|>": 50353,
+    "<|lo|>": 50336,
+    "<|lt|>": 50293,
+    "<|lv|>": 50301,
+    "<|mg|>": 50349,
+    "<|mi|>": 50295,
+    "<|mk|>": 50308,
+    "<|ml|>": 50296,
+    "<|mn|>": 50314,
+    "<|mr|>": 50320,
+    "<|ms|>": 50282,
+    "<|mt|>": 50343,
+    "<|my|>": 50346,
+    "<|ne|>": 50313,
+    "<|nl|>": 50271,
+    "<|nn|>": 50342,
+    "<|no|>": 50288,
+    "<|oc|>": 50328,
+    "<|pa|>": 50321,
+    "<|pl|>": 50269,
+    "<|ps|>": 50340,
+    "<|pt|>": 50267,
+    "<|ro|>": 50284,
+    "<|ru|>": 50263,
+    "<|sa|>": 50344,
+    "<|sd|>": 50332,
+    "<|si|>": 50322,
+    "<|sk|>": 50298,
+    "<|sl|>": 50305,
+    "<|sn|>": 50324,
+    "<|so|>": 50326,
+    "<|sq|>": 50317,
+    "<|sr|>": 50303,
+    "<|su|>": 50357,
+    "<|sv|>": 50273,
+    "<|sw|>": 50318,
+    "<|ta|>": 50287,
+    "<|te|>": 50299,
+    "<|tg|>": 50331,
+    "<|th|>": 50289,
+    "<|tk|>": 50341,
+    "<|tl|>": 50348,
+    "<|tr|>": 50268,
+    "<|tt|>": 50351,
+    "<|uk|>": 50280,
+    "<|ur|>": 50290,
+    "<|uz|>": 50337,
+    "<|vi|>": 50278,
+    "<|yi|>": 50335,
+    "<|yo|>": 50325,
+    "<|zh|>": 50260
+  },
+  "max_initial_timestamp_index": 1,
+  "max_length": 448,
+  "no_timestamps_token_id": 50363,
+  "pad_token_id": 50257,
+  "return_timestamps": false,
+  "suppress_tokens": [
+    1,
+    2,
+    7,
+    8,
+    9,
+    10,
+    14,
+    25,
+    26,
+    27,
+    28,
+    29,
+    31,
+    58,
+    59,
+    60,
+    61,
+    62,
+    63,
+    90,
+    91,
+    92,
+    93,
+    359,
+    503,
+    522,
+    542,
+    873,
+    893,
+    902,
+    918,
+    922,
+    931,
+    1350,
+    1853,
+    1982,
+    2460,
+    2627,
+    3246,
+    3253,
+    3268,
+    3536,
+    3846,
+    3961,
+    4183,
+    4667,
+    6585,
+    6647,
+    7273,
+    9061,
+    9383,
+    10428,
+    10929,
+    11938,
+    12033,
+    12331,
+    12562,
+    13793,
+    14157,
+    14635,
+    15265,
+    15618,
+    16553,
+    16604,
+    18362,
+    18956,
+    20075,
+    21675,
+    22520,
+    26130,
+    26161,
+    26435,
+    28279,
+    29464,
+    31650,
+    32302,
+    32470,
+    36865,
+    42863,
+    47425,
+    49870,
+    50254,
+    50258,
+    50358,
+    50359,
+    50360,
+    50361,
+    50362
+  ],
+  "task_to_id": {
+    "transcribe": 50359,
+    "translate": 50358
+  },
+  "transformers_version": "4.35.2"
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7638fde3056f0abfccc3bf03f66970a11e9ef5d1462008c2b8c26b3c0d6087b2
 size 966995080

 version https://git-lfs.github.com/spec/v1
+oid sha256:fea79cc5083d62d74deb57dacf19ff302c9e320e987f7dccfc49d55b2b84c2da
 size 966995080

runs/Dec07_01-11-40_smurf/events.out.tfevents.1701904301.smurf.965337.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df55c01598422289195729096a6050f903a812c34a2e6bf0c288e901b730855c
+size 17893

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13953d5a093d9cb83f77133d0c6731e17ddbc014d55cf8f8827acf954e4adb04
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:e29181f470382fce52e35a2769c9ed58dabf92a7ebfe37f86e4a495bbe2dd96a
 size 4728