niobures commited on
Commit
650ef1a
·
verified ·
1 Parent(s): 3a0e029

DeepPhonemizer

Browse files
.gitattributes CHANGED
@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  BERT/bene-ges/en_g2p_cmu_bert_large/en_g2p.nemo filter=lfs diff=lfs merge=lfs -text
37
  BERT/bene-ges/ru_g2p_ipa_bert_large/g2p_correct_vocab.txt filter=lfs diff=lfs merge=lfs -text
38
  BERT/bene-ges/ru_g2p_ipa_bert_large/ru_g2p.nemo filter=lfs diff=lfs merge=lfs -text
 
 
36
  BERT/bene-ges/en_g2p_cmu_bert_large/en_g2p.nemo filter=lfs diff=lfs merge=lfs -text
37
  BERT/bene-ges/ru_g2p_ipa_bert_large/g2p_correct_vocab.txt filter=lfs diff=lfs merge=lfs -text
38
  BERT/bene-ges/ru_g2p_ipa_bert_large/ru_g2p.nemo filter=lfs diff=lfs merge=lfs -text
39
+ DeepPhonemizer/papers/Transformer[[:space:]]based[[:space:]]Grapheme-to-Phoneme[[:space:]]Conversion.pdf filter=lfs diff=lfs merge=lfs -text
DeepPhonemizer/code/DeepPhonemizer.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c26aa6b58eb79ca0aa064231cac10138a2e2e1281924c4de13acb495b18aeca5
3
+ size 1551747
DeepPhonemizer/colab/DP_Inference.ipynb ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "name": "DP_Inference.ipynb",
7
+ "provenance": []
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ }
16
+ },
17
+ "cells": [
18
+ {
19
+ "cell_type": "code",
20
+ "metadata": {
21
+ "id": "bsvxO3kfo_sr"
22
+ },
23
+ "source": [
24
+ "pip install deep-phonemizer"
25
+ ],
26
+ "execution_count": null,
27
+ "outputs": []
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "metadata": {
32
+ "id": "XRTPugZ0pL-b"
33
+ },
34
+ "source": [
35
+ "from dp.phonemizer import Phonemizer\n",
36
+ "\n",
37
+ "!wget https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt\n",
38
+ "\n",
39
+ "phonemizer = Phonemizer.from_checkpoint('en_us_cmudict_ipa_forward.pt')"
40
+ ],
41
+ "execution_count": null,
42
+ "outputs": []
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "metadata": {
47
+ "colab": {
48
+ "base_uri": "https://localhost:8080/",
49
+ "height": 35
50
+ },
51
+ "id": "ei9oN93rpilk",
52
+ "outputId": "1461e27d-ba72-4249-9ecf-f98a9563dee0"
53
+ },
54
+ "source": [
55
+ "phonemizer('Phonemizing an English text is imposimpable!', lang='en_us')"
56
+ ],
57
+ "execution_count": null,
58
+ "outputs": [
59
+ {
60
+ "output_type": "execute_result",
61
+ "data": {
62
+ "application/vnd.google.colaboratory.intrinsic+json": {
63
+ "type": "string"
64
+ },
65
+ "text/plain": [
66
+ "'fɑːnəmaɪzɪŋ æn ɪŋglɪʃ tɛkst aɪz ɪmpəzɪmpəbəl!'"
67
+ ]
68
+ },
69
+ "metadata": {
70
+ "tags": []
71
+ },
72
+ "execution_count": 3
73
+ }
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "metadata": {
79
+ "colab": {
80
+ "base_uri": "https://localhost:8080/"
81
+ },
82
+ "id": "Cq8zgZOMpk5T",
83
+ "outputId": "ceb1269b-619d-402a-c661-f5be5f4d2154"
84
+ },
85
+ "source": [
86
+ "# Phonemize a list of texts and pull out model predictions with confidence scores\n",
87
+ "result = phonemizer.phonemise_list(['Phonemizing an US-English text is imposimpable!', 'Holymoly.'], lang='en_us')\n",
88
+ "\n",
89
+ "for word, pred in result.predictions.items():\n",
90
+ " print(f'{word} {pred.phonemes} {pred.confidence}')"
91
+ ],
92
+ "execution_count": null,
93
+ "outputs": [
94
+ {
95
+ "output_type": "stream",
96
+ "text": [
97
+ "Phonemizing fɑːnəmaɪzɪŋ 0.9998687552490106\n",
98
+ "Holymoly hɑːliːmɑːli 0.6092846695646951\n",
99
+ "imposimpable ɪmpəzɪmpəbəl 0.9988885450352498\n"
100
+ ],
101
+ "name": "stdout"
102
+ }
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "metadata": {
108
+ "colab": {
109
+ "base_uri": "https://localhost:8080/"
110
+ },
111
+ "id": "ylKahJLhpltv",
112
+ "outputId": "93dfd4b0-58f9-4c30-f105-b9f8dec00a9f"
113
+ },
114
+ "source": [
115
+ "# Print confidence per phoneme\n",
116
+ "result = phonemizer.phonemise_list(['Otorhinolaryngologist'], lang='en_us')\n",
117
+ "pred = result.predictions['Otorhinolaryngologist']\n",
118
+ "\n",
119
+ "print(f'{pred.phonemes} {pred.confidence}\\n')\n",
120
+ "\n",
121
+ "for c, prob in zip(pred.phoneme_tokens, pred.token_probs):\n",
122
+ " print(f'{c} {prob}')"
123
+ ],
124
+ "execution_count": null,
125
+ "outputs": [
126
+ {
127
+ "output_type": "stream",
128
+ "text": [
129
+ "oʊtɔrhɪnɑːlɛrɪŋgoʊlgɪst 0.6062305000504705\n",
130
+ "\n",
131
+ "<en_us> 1.0\n",
132
+ "o 1.0\n",
133
+ "ʊ 0.999997615814209\n",
134
+ "t 1.0\n",
135
+ "ɔ 0.8052271008491516\n",
136
+ "r 0.9994328618049622\n",
137
+ "h 1.0\n",
138
+ "ɪ 0.9999969005584717\n",
139
+ "n 1.0\n",
140
+ "ɑ 0.9980535507202148\n",
141
+ "ː 0.9757038354873657\n",
142
+ "l 1.0\n",
143
+ "ɛ 0.7834091186523438\n",
144
+ "r 0.9998308420181274\n",
145
+ "ɪ 0.9999746084213257\n",
146
+ "ŋ 0.999996542930603\n",
147
+ "g 0.9999998807907104\n",
148
+ "o 0.999997615814209\n",
149
+ "ʊ 0.9968417882919312\n",
150
+ "l 1.0\n",
151
+ "g 0.9992079138755798\n",
152
+ "ɪ 0.9950146079063416\n",
153
+ "s 0.9965135455131531\n",
154
+ "t 1.0\n",
155
+ "<end> 1.0\n"
156
+ ],
157
+ "name": "stdout"
158
+ }
159
+ ]
160
+ }
161
+ ]
162
+ }
DeepPhonemizer/colab/Training_Example.ipynb ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "accelerator": "GPU",
6
+ "colab": {
7
+ "name": "Training_Example.ipynb",
8
+ "provenance": []
9
+ },
10
+ "kernelspec": {
11
+ "display_name": "Python 3",
12
+ "name": "python3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ }
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "code",
21
+ "metadata": {
22
+ "id": "2zx09MSGFHjT"
23
+ },
24
+ "source": [
25
+ "!pip install deep-phonemizer"
26
+ ],
27
+ "execution_count": null,
28
+ "outputs": []
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "metadata": {
33
+ "id": "oZ1L1BlhOUMR"
34
+ },
35
+ "source": [
36
+ "# Dowload and prepare a dataset\n",
37
+ "!wget https://raw.githubusercontent.com/CUNY-CL/wikipron/master/data/scrape/tsv/eng_latn_us_broad.tsv\n",
38
+ "\n",
39
+ "with open('eng_latn_us_broad.tsv', 'r', encoding='utf-8') as f:\n",
40
+ " lines = f.readlines()\n",
41
+ "\n",
42
+ "# Prepare data as tuples (lang, word, phoneme)\n",
43
+ "lines = [l.replace(' ', '').replace('\\n', '') for l in lines]\n",
44
+ "splits = [l.split('\\t') for l in lines]\n",
45
+ "train_data = [('en_us', s[0], s[1]) for s in splits if len(s)==2]\n",
46
+ "\n",
47
+ "for d in train_data[:10000:1000]:\n",
48
+ " print(d)\n"
49
+ ],
50
+ "execution_count": null,
51
+ "outputs": []
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "metadata": {
56
+ "id": "MiV-OUi0FQ-O"
57
+ },
58
+ "source": [
59
+ "# Read standard config and adjust some params for speedup\n",
60
+ "from dp.utils.io import read_config, save_config\n",
61
+ "import dp\n",
62
+ "import os\n",
63
+ "\n",
64
+ "config_file = os.path.dirname(dp.__file__) + '/configs/forward_config.yaml'\n",
65
+ "config = read_config(config_file)\n",
66
+ "config['training']['epochs'] = 10\n",
67
+ "config['training']['warmup_steps'] = 100\n",
68
+ "config['training']['generate_steps'] = 500\n",
69
+ "config['training']['validate_steps'] = 500\n",
70
+ "save_config(config, 'config.yaml')\n",
71
+ "\n",
72
+ "for k, v in config.items():\n",
73
+ " print(f'{k} {v}')\n",
74
+ "\n"
75
+ ],
76
+ "execution_count": null,
77
+ "outputs": []
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "metadata": {
82
+ "id": "m_SNVv_HN4fR"
83
+ },
84
+ "source": [
85
+ "%load_ext tensorboard\n",
86
+ "%tensorboard --logdir /content/checkpoints"
87
+ ],
88
+ "execution_count": null,
89
+ "outputs": []
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "metadata": {
94
+ "id": "K09wG-ymXECZ"
95
+ },
96
+ "source": [
97
+ "from dp.preprocess import preprocess\n",
98
+ "from dp.train import train\n",
99
+ "\n",
100
+ "preprocess(config_file='config.yaml', train_data=train_data)\n",
101
+ "train(config_file='config.yaml')"
102
+ ],
103
+ "execution_count": null,
104
+ "outputs": []
105
+ },
106
+ {
107
+ "cell_type": "code",
108
+ "metadata": {
109
+ "id": "2KcIwL6QdvEJ",
110
+ "colab": {
111
+ "base_uri": "https://localhost:8080/"
112
+ },
113
+ "outputId": "ed499929-1103-4a22-dce8-4a54d85f9b9e"
114
+ },
115
+ "source": [
116
+ "# Load phonemizer (including the training data dictionary)\n",
117
+ "from dp.phonemizer import Phonemizer\n",
118
+ "\n",
119
+ "phonemizer = Phonemizer.from_checkpoint('/content/checkpoints/best_model.pt')\n",
120
+ "result = phonemizer('Phonemizing an English text is imposimpable!', lang='en_us')\n",
121
+ "\n",
122
+ "print(result)"
123
+ ],
124
+ "execution_count": null,
125
+ "outputs": [
126
+ {
127
+ "output_type": "stream",
128
+ "text": [
129
+ "2021-05-12 10:15:19,916.916 DEBUG phonemizer: Initializing phonemizer with model step 18000\n"
130
+ ],
131
+ "name": "stderr"
132
+ },
133
+ {
134
+ "output_type": "stream",
135
+ "text": [
136
+ "fənəmaɪzɪŋ ən ɪŋɡlɪʃ tɛkst ɪz ɪmpɑsɪmpəbəl!\n"
137
+ ],
138
+ "name": "stdout"
139
+ }
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "metadata": {
145
+ "id": "Xt85fzFneDno",
146
+ "colab": {
147
+ "base_uri": "https://localhost:8080/"
148
+ },
149
+ "outputId": "bc1cb892-9cd2-4540-b930-e4443ddf69c2"
150
+ },
151
+ "source": [
152
+ "# Phonemize a list of texts and pull out model predictions with confidence scores\n",
153
+ "result = phonemizer.phonemise_list(['Phonemizing an US-English text is imposimpable!'], lang='en_us')\n",
154
+ "\n",
155
+ "for word, pred in result.predictions.items():\n",
156
+ " print(f'{word} {pred.phonemes} {pred.confidence}')"
157
+ ],
158
+ "execution_count": null,
159
+ "outputs": [
160
+ {
161
+ "output_type": "stream",
162
+ "text": [
163
+ "imposimpable ɪmpɑsɪmpəbəl 0.2185952042855603\n",
164
+ "Phonemizing fənəmaɪzɪŋ 0.22222847233670942\n"
165
+ ],
166
+ "name": "stdout"
167
+ }
168
+ ]
169
+ }
170
+ ]
171
+ }
DeepPhonemizer/papers/Grapheme-to-phoneme conversion using Long Short-Term Memory recurrent neural networks.pdf ADDED
Binary file (98.8 kB). View file
 
DeepPhonemizer/papers/Transformer based Grapheme-to-Phoneme Conversion.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fdaf60f1f91745e95b7139462ea26dd14c916725034c2ce070509ac07b93422
3
+ size 348626
DeepPhonemizer/pretrained/en_us_cmudict_forward.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e1fb223d7e027bf7b33052540c6f71d19db6d7fd87ab8671152b8b114501c2
3
+ size 66725366
DeepPhonemizer/pretrained/en_us_cmudict_ipa_forward.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cadce3d77597b55e772799cb46994ab29a460f1a62a87207b52f3cdb29894e02
3
+ size 65637046
DeepPhonemizer/pretrained/latin_ipa_forward.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca8439d99bbf49f4937cdb60afccbbeb872894bcd8a6febb6517a696e2e9a33f
3
+ size 70816182