G2P

File size: 4,558 Bytes

650ef1a

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "DP_Inference.ipynb",
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "bsvxO3kfo_sr"
      },
      "source": [
        "pip install deep-phonemizer"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XRTPugZ0pL-b"
      },
      "source": [
        "from dp.phonemizer import Phonemizer\n",
        "\n",
        "!wget https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt\n",
        "\n",
        "phonemizer = Phonemizer.from_checkpoint('en_us_cmudict_ipa_forward.pt')"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "ei9oN93rpilk",
        "outputId": "1461e27d-ba72-4249-9ecf-f98a9563dee0"
      },
      "source": [
        "phonemizer('Phonemizing an English text is imposimpable!', lang='en_us')"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            },
            "text/plain": [
              "'fɑːnəmaɪzɪŋ æn ɪŋglɪʃ tɛkst aɪz ɪmpəzɪmpəbəl!'"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 3
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Cq8zgZOMpk5T",
        "outputId": "ceb1269b-619d-402a-c661-f5be5f4d2154"
      },
      "source": [
        "# Phonemize a list of texts and pull out model predictions with confidence scores\n",
        "result = phonemizer.phonemise_list(['Phonemizing an US-English text is imposimpable!', 'Holymoly.'], lang='en_us')\n",
        "\n",
        "for word, pred in result.predictions.items():\n",
        "  print(f'{word} {pred.phonemes} {pred.confidence}')"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Phonemizing fɑːnəmaɪzɪŋ 0.9998687552490106\n",
            "Holymoly hɑːliːmɑːli 0.6092846695646951\n",
            "imposimpable ɪmpəzɪmpəbəl 0.9988885450352498\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ylKahJLhpltv",
        "outputId": "93dfd4b0-58f9-4c30-f105-b9f8dec00a9f"
      },
      "source": [
        "# Print confidence per phoneme\n",
        "result = phonemizer.phonemise_list(['Otorhinolaryngologist'], lang='en_us')\n",
        "pred = result.predictions['Otorhinolaryngologist']\n",
        "\n",
        "print(f'{pred.phonemes} {pred.confidence}\\n')\n",
        "\n",
        "for c, prob in zip(pred.phoneme_tokens, pred.token_probs):\n",
        "  print(f'{c} {prob}')"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "oʊtɔrhɪnɑːlɛrɪŋgoʊlgɪst 0.6062305000504705\n",
            "\n",
            "<en_us> 1.0\n",
            "o 1.0\n",
            "ʊ 0.999997615814209\n",
            "t 1.0\n",
            "ɔ 0.8052271008491516\n",
            "r 0.9994328618049622\n",
            "h 1.0\n",
            "ɪ 0.9999969005584717\n",
            "n 1.0\n",
            "ɑ 0.9980535507202148\n",
            "ː 0.9757038354873657\n",
            "l 1.0\n",
            "ɛ 0.7834091186523438\n",
            "r 0.9998308420181274\n",
            "ɪ 0.9999746084213257\n",
            "ŋ 0.999996542930603\n",
            "g 0.9999998807907104\n",
            "o 0.999997615814209\n",
            "ʊ 0.9968417882919312\n",
            "l 1.0\n",
            "g 0.9992079138755798\n",
            "ɪ 0.9950146079063416\n",
            "s 0.9965135455131531\n",
            "t 1.0\n",
            "<end> 1.0\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}