DocWolle
/

whisper_tflite_models

Automatic Speech Recognition

Model card Files Files and versions

DocWolle commited on Mar 14, 2025

Commit

b779ac5

·

verified ·

1 Parent(s): f813885

Upload Create_mel_vocab.ipynb

Files changed (1) hide show

Create_mel_vocab.ipynb +91 -0

Create_mel_vocab.ipynb ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "!git clone https://github.com/openai/whisper.git"
+      ],
+      "metadata": {
+        "id": "1p9gHe1Yi3ai"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import io\n",
+        "import sys\n",
+        "import json\n",
+        "import struct\n",
+        "import base64\n",
+        "import torch\n",
+        "import numpy as np\n",
+        "from pathlib import Path\n",
+        "\n",
+        "# SET PARAMETER: True: multilingual False: English only\n",
+        "multilingual = True\n",
+        "\n",
+        "dir_whisper = \"/content/whisper\"\n",
+        "dir_out = \"/content/\"\n",
+        "\n",
+        "# load mel filters\n",
+        "n_mels = 80\n",
+        "with np.load(Path(dir_whisper) / \"whisper\" / \"assets\" / \"mel_filters.npz\") as f:\n",
+        "    filters = torch.from_numpy(f[f\"mel_{n_mels}\"])\n",
+        "\n",
+        "# load tokenizer\n",
+        "\n",
+        "tokenizer = Path(dir_whisper) / \"whisper\" / \"assets\" / (multilingual and \"multilingual.tiktoken\" or \"gpt2.tiktoken\")\n",
+        "\n",
+        "with open(tokenizer, \"rb\") as f:\n",
+        "    contents = f.read()\n",
+        "    tokens = {base64.b64decode(token): int(rank) for token, rank in (line.split() for line in contents.splitlines() if line)}\n",
+        "\n",
+        "# output in the same directory as the model\n",
+        "fname_out = Path(dir_out) / (multilingual and \"filters_vocab_multilingual.bin\" or \"filters_vocab_en.bin\")\n",
+        "\n",
+        "fout = fname_out.open(\"wb\")\n",
+        "\n",
+        "fout.write(struct.pack(\"i\", 0x5553454E))\n",
+        "# write mel filters\n",
+        "fout.write(struct.pack(\"i\", filters.shape[0]))\n",
+        "fout.write(struct.pack(\"i\", filters.shape[1]))\n",
+        "for i in range(filters.shape[0]):\n",
+        "    for j in range(filters.shape[1]):\n",
+        "        fout.write(struct.pack(\"f\", filters[i][j]))\n",
+        "\n",
+        "# write tokenizer\n",
+        "fout.write(struct.pack(\"i\", len(tokens)))\n",
+        "\n",
+        "for key in tokens:\n",
+        "    fout.write(struct.pack(\"i\", len(key)))\n",
+        "    fout.write(key)\n",
+        "\n",
+        "fout.close()\n",
+        "\n",
+        "print(\"Done. Output file: \" , fname_out)\n",
+        "print(\"\")"
+      ],
+      "metadata": {
+        "id": "oSJIqeknjLqD"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}