niobures commited on
Commit
d0b4949
·
verified ·
1 Parent(s): 0e49683

RNNoise (libs, models)

Browse files
.gitattributes CHANGED
@@ -36,3 +36,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  A[[:space:]]Hybrid[[:space:]]DSP_Deep[[:space:]]Learning[[:space:]]Approach[[:space:]]to[[:space:]]Real-Time[[:space:]]Full-Band[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
37
  RNNoise-Ex.[[:space:]]Hybrid[[:space:]]Speech[[:space:]]Enhancement[[:space:]]System[[:space:]]based[[:space:]]on[[:space:]]RNN[[:space:]]and[[:space:]]Spectral[[:space:]]Features.pdf filter=lfs diff=lfs merge=lfs -text
38
  RNNoise.[[:space:]]Learning[[:space:]]Noise[[:space:]]Suppression.pdf filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
36
  A[[:space:]]Hybrid[[:space:]]DSP_Deep[[:space:]]Learning[[:space:]]Approach[[:space:]]to[[:space:]]Real-Time[[:space:]]Full-Band[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
37
  RNNoise-Ex.[[:space:]]Hybrid[[:space:]]Speech[[:space:]]Enhancement[[:space:]]System[[:space:]]based[[:space:]]on[[:space:]]RNN[[:space:]]and[[:space:]]Spectral[[:space:]]Features.pdf filter=lfs diff=lfs merge=lfs -text
38
  RNNoise.[[:space:]]Learning[[:space:]]Noise[[:space:]]Suppression.pdf filter=lfs diff=lfs merge=lfs -text
39
+ libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-aarch64.so filter=lfs diff=lfs merge=lfs -text
40
+ libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armel.so filter=lfs diff=lfs merge=lfs -text
41
+ libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armhf.so filter=lfs diff=lfs merge=lfs -text
42
+ libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86-64.so filter=lfs diff=lfs merge=lfs -text
43
+ libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86.so filter=lfs diff=lfs merge=lfs -text
44
+ libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-aarch64.dylib filter=lfs diff=lfs merge=lfs -text
45
+ libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-x86-64.dylib filter=lfs diff=lfs merge=lfs -text
46
+ libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86-64.dll filter=lfs diff=lfs merge=lfs -text
47
+ libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86.dll filter=lfs diff=lfs merge=lfs -text
48
+ models/ailia-models/code/babble_15dB.wav filter=lfs diff=lfs merge=lfs -text
49
+ models/ailia-models/code/denoised.wav filter=lfs diff=lfs merge=lfs -text
libs/rnnoise-bin/.github/workflows/build.yml ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: build
2
+
3
+ on:
4
+ release:
5
+ types:
6
+ - created
7
+ workflow_dispatch:
8
+
9
+ env:
10
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
11
+
12
+ jobs:
13
+ build-linux-x86-64:
14
+ runs-on: ubuntu-20.04
15
+ steps:
16
+ - name: Clone RNNoise
17
+ uses: sudosubin/git-clone-action@v1.0.1
18
+ with:
19
+ repository: xiph/rnnoise
20
+ platform: gitlab.xiph.org
21
+ - name: autogen
22
+ run: ./autogen.sh
23
+ - name: configure
24
+ run: ./configure
25
+ - name: build
26
+ run: make
27
+ - name: Get release
28
+ id: get_release
29
+ uses: bruceadams/get-release@v1.2.2
30
+ - name: Upload
31
+ uses: actions/upload-release-asset@v1.0.2
32
+ with:
33
+ upload_url: ${{ steps.get_release.outputs.upload_url }}
34
+ asset_path: .libs/librnnoise.so
35
+ asset_name: librnnoise-linux-x86-64.so
36
+ asset_content_type: application/octet-stream
37
+ build-linux-x86:
38
+ runs-on: ubuntu-20.04
39
+ steps:
40
+ - name: Clone RNNoise
41
+ uses: sudosubin/git-clone-action@v1.0.1
42
+ with:
43
+ repository: xiph/rnnoise
44
+ platform: gitlab.xiph.org
45
+ - name: apt update
46
+ run: sudo apt-get update -y
47
+ - name: install gcc-i686-linux-gnu
48
+ run: sudo apt-get install gcc-i686-linux-gnu -y
49
+ - name: autogen
50
+ run: ./autogen.sh
51
+ - name: configure
52
+ run: ./configure --host=i686-linux-gnu "CFLAGS=-m32" "CXXFLAGS=-m32" "LDFLAGS=-m32"
53
+ - name: build
54
+ run: make
55
+ - name: Get release
56
+ id: get_release
57
+ uses: bruceadams/get-release@v1.2.2
58
+ - name: Upload
59
+ uses: actions/upload-release-asset@v1.0.2
60
+ with:
61
+ upload_url: ${{ steps.get_release.outputs.upload_url }}
62
+ asset_path: .libs/librnnoise.so
63
+ asset_name: librnnoise-linux-x86.so
64
+ asset_content_type: application/octet-stream
65
+ build-linux-aarch64:
66
+ runs-on: ubuntu-20.04
67
+ steps:
68
+ - name: Clone RNNoise
69
+ uses: sudosubin/git-clone-action@v1.0.1
70
+ with:
71
+ repository: xiph/rnnoise
72
+ platform: gitlab.xiph.org
73
+ - name: apt update
74
+ run: sudo apt-get update -y
75
+ - name: install gcc-aarch64-linux-gnu
76
+ run: sudo apt-get install gcc-aarch64-linux-gnu -y
77
+ - name: autogen
78
+ run: ./autogen.sh
79
+ - name: configure
80
+ run: ./configure --host=aarch64-linux-gnu
81
+ - name: build
82
+ run: make
83
+ - name: Get release
84
+ id: get_release
85
+ uses: bruceadams/get-release@v1.2.2
86
+ - name: Upload
87
+ uses: actions/upload-release-asset@v1.0.2
88
+ with:
89
+ upload_url: ${{ steps.get_release.outputs.upload_url }}
90
+ asset_path: .libs/librnnoise.so
91
+ asset_name: librnnoise-linux-aarch64.so
92
+ asset_content_type: application/octet-stream
93
+ build-linux-armel:
94
+ runs-on: ubuntu-20.04
95
+ steps:
96
+ - name: Clone RNNoise
97
+ uses: sudosubin/git-clone-action@v1.0.1
98
+ with:
99
+ repository: xiph/rnnoise
100
+ platform: gitlab.xiph.org
101
+ - name: apt update
102
+ run: sudo apt-get update -y
103
+ - name: install gcc-arm-linux-gnueabi
104
+ run: sudo apt-get install gcc-arm-linux-gnueabi -y
105
+ - name: autogen
106
+ run: ./autogen.sh
107
+ - name: configure
108
+ run: ./configure --host=arm-linux-gnueabi
109
+ - name: build
110
+ run: make
111
+ - name: Get release
112
+ id: get_release
113
+ uses: bruceadams/get-release@v1.2.2
114
+ - name: Upload
115
+ uses: actions/upload-release-asset@v1.0.2
116
+ with:
117
+ upload_url: ${{ steps.get_release.outputs.upload_url }}
118
+ asset_path: .libs/librnnoise.so
119
+ asset_name: librnnoise-linux-armel.so
120
+ asset_content_type: application/octet-stream
121
+ build-linux-armhf:
122
+ runs-on: ubuntu-20.04
123
+ steps:
124
+ - name: Clone RNNoise
125
+ uses: sudosubin/git-clone-action@v1.0.1
126
+ with:
127
+ repository: xiph/rnnoise
128
+ platform: gitlab.xiph.org
129
+ - name: apt update
130
+ run: sudo apt-get update -y
131
+ - name: install gcc-arm-linux-gnueabihf
132
+ run: sudo apt-get install gcc-arm-linux-gnueabihf -y
133
+ - name: autogen
134
+ run: ./autogen.sh
135
+ - name: configure
136
+ run: ./configure --host=arm-linux-gnueabihf
137
+ - name: build
138
+ run: make
139
+ - name: Get release
140
+ id: get_release
141
+ uses: bruceadams/get-release@v1.2.2
142
+ - name: Upload
143
+ uses: actions/upload-release-asset@v1.0.2
144
+ with:
145
+ upload_url: ${{ steps.get_release.outputs.upload_url }}
146
+ asset_path: .libs/librnnoise.so
147
+ asset_name: librnnoise-linux-armhf.so
148
+ asset_content_type: application/octet-stream
149
+ build-windows-x86-64:
150
+ runs-on: ubuntu-20.04
151
+ steps:
152
+ - name: Clone RNNoise
153
+ uses: sudosubin/git-clone-action@v1.0.1
154
+ with:
155
+ repository: xiph/rnnoise
156
+ platform: gitlab.xiph.org
157
+ - name: apt update
158
+ run: sudo apt-get update -y
159
+ - name: install mingw-w64
160
+ run: sudo apt-get install mingw-w64 -y
161
+ - name: autogen
162
+ run: ./autogen.sh
163
+ - name: configure
164
+ run: ./configure --host=x86_64-w64-mingw32
165
+ - name: build
166
+ run: make
167
+ - name: Get release
168
+ id: get_release
169
+ uses: bruceadams/get-release@v1.2.2
170
+ - name: Upload
171
+ uses: actions/upload-release-asset@v1.0.2
172
+ with:
173
+ upload_url: ${{ steps.get_release.outputs.upload_url }}
174
+ asset_path: .libs/librnnoise-0.dll
175
+ asset_name: librnnoise-windows-x86-64.dll
176
+ asset_content_type: application/octet-stream
177
+ build-windows-x86:
178
+ runs-on: ubuntu-20.04
179
+ steps:
180
+ - name: Clone RNNoise
181
+ uses: sudosubin/git-clone-action@v1.0.1
182
+ with:
183
+ repository: xiph/rnnoise
184
+ platform: gitlab.xiph.org
185
+ - name: apt update
186
+ run: sudo apt-get update -y
187
+ - name: install mingw-w64
188
+ run: sudo apt-get install mingw-w64 -y
189
+ - name: autogen
190
+ run: ./autogen.sh
191
+ - name: configure
192
+ run: ./configure --host=i686-w64-mingw32
193
+ - name: build
194
+ run: make
195
+ - name: Get release
196
+ id: get_release
197
+ uses: bruceadams/get-release@v1.2.2
198
+ - name: Upload
199
+ uses: actions/upload-release-asset@v1.0.2
200
+ with:
201
+ upload_url: ${{ steps.get_release.outputs.upload_url }}
202
+ asset_path: .libs/librnnoise-0.dll
203
+ asset_name: librnnoise-windows-x86.dll
204
+ asset_content_type: application/octet-stream
205
+ build-macos-x86-64:
206
+ runs-on: macos-11
207
+ steps:
208
+ - name: Clone RNNoise
209
+ uses: sudosubin/git-clone-action@v1.0.1
210
+ with:
211
+ repository: xiph/rnnoise
212
+ platform: gitlab.xiph.org
213
+ - name: install automake
214
+ run: brew install automake
215
+ - name: autogen
216
+ run: ./autogen.sh
217
+ - name: configure
218
+ run: ./configure
219
+ - name: build
220
+ run: make
221
+ - name: Get release
222
+ id: get_release
223
+ uses: bruceadams/get-release@v1.2.2
224
+ - name: Upload
225
+ uses: actions/upload-release-asset@v1.0.2
226
+ with:
227
+ upload_url: ${{ steps.get_release.outputs.upload_url }}
228
+ asset_path: .libs/librnnoise.dylib
229
+ asset_name: librnnoise-macos-x86-64.dylib
230
+ asset_content_type: application/octet-stream
231
+ build-macos-aarch64:
232
+ runs-on: macos-11
233
+ steps:
234
+ - name: Clone RNNoise
235
+ uses: sudosubin/git-clone-action@v1.0.1
236
+ with:
237
+ repository: xiph/rnnoise
238
+ platform: gitlab.xiph.org
239
+ - name: install automake
240
+ run: brew install automake
241
+ - name: autogen
242
+ run: ./autogen.sh
243
+ - name: configure
244
+ run: ./configure --host=aarch64-apple-darwin CFLAGS="-arch arm64"
245
+ - name: build
246
+ run: make
247
+ - name: Get release
248
+ id: get_release
249
+ uses: bruceadams/get-release@v1.2.2
250
+ - name: Upload
251
+ uses: actions/upload-release-asset@v1.0.2
252
+ with:
253
+ upload_url: ${{ steps.get_release.outputs.upload_url }}
254
+ asset_path: .libs/librnnoise.dylib
255
+ asset_name: librnnoise-macos-aarch64.dylib
256
+ asset_content_type: application/octet-stream
libs/rnnoise-bin/README.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # rnnoise-bin
2
+
3
+ this repository contains builds of RNNoise for the following platforms:
4
+
5
+ linux/x86-64
6
+ linux/x86
7
+ linux/aarch64
8
+ linux/armel
9
+ linux/armhf
10
+ windows/x86-64
11
+ windows/x86
12
+ macos/x86-64
13
+ macos/aarch64
14
+
15
+ the builds can be found in the [release section](https://github.com/mjwells2002/rnnoise-bin/releases)
16
+
17
+ each build will be tagged with the git commit it was built from in the [RNNoise repo](https://gitlab.xiph.org/xiph/rnnoise)
18
+
19
+ these builds are produced with github actions you can see the workflow file [here](https://github.com/mjwells2002/rnnoise-bin/blob/main/.github/workflows/build.yml)
libs/rnnoise-bin/releases/7f449bf8/7f449bf8.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa329b91913c0d3b2d2ebde40df022d618b65a251f7aa9554ac9cf6cbcbe4837
3
+ size 189611
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-aarch64.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05712a1801a3fd60af61abf03de08819955d88880fef6303d7f78a653f4230c0
3
+ size 242184
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armel.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:285c4f4bcbdb66f3e8b2031b666c7a2109284e16dde187c09239bc7e63d23ec5
3
+ size 235536
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armhf.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d69847b6f4ecb4b4a976dcec43138390c1ced37dd543a90341a4a7649195632
3
+ size 210068
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86-64.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53b823a1545ee9a9734ed3255ccfe5bed9069790cb95ee7fe7797a55eea7e3c0
3
+ size 253000
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61ee37a51356d8fcc15b12ffa1ff3e286f8b5055454f2ff8ef0657e8e9ea5990
3
+ size 141456
libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-aarch64.dylib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73c1d80f521daf7a6103aeffa6c7ed365c85276c75c72ea086c6c5a5f270b0f2
3
+ size 169167
libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-x86-64.dylib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b24d9cd6ebc840e3ef7edbd92f5ae046c5c59439252d5702202c3b3996d60f4b
3
+ size 171504
libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86-64.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0be864c6f8f16e854a3d7a35e5b9d133aea50dac4ec80600df78dc834686570f
3
+ size 551036
libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c299f873df269f1798d66a6fb3797c696bffcd71e719f65e460b663685dcc94
3
+ size 471627
libs/rnnoise-bin/releases/7f449bf8/rnnoise-bin-7f449bf8.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4abc4f144495a6f509dfaff0120ddfae523c37bf1afe252597b63e9773761727
3
+ size 1959
models/ailia-models/code/README.md ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # rnnoise
2
+
3
+ ## Input
4
+
5
+ Audio file
6
+
7
+ - Sample rate: 48 kHz
8
+ - Bit per sample: 16-bit
9
+ - Bit rate: 768 kbps
10
+
11
+ https://github.com/axinc-ai/ailia-models/assets/29946532/f1908958-d3be-44a7-9180-59c375bb488c
12
+
13
+ (Audio from https://jmvalin.ca/demo/rnnoise/)
14
+
15
+ ## Output
16
+
17
+ Audio file
18
+
19
+ https://github.com/axinc-ai/ailia-models/assets/29946532/21eaf44d-bffd-428a-9637-f5d385364698
20
+
21
+ ## Usage
22
+ Automatically downloads the onnx and prototxt files on the first run.
23
+ It is necessary to be connected to the Internet while downloading.
24
+
25
+ For the sample wav,
26
+ ```bash
27
+ $ python3 rnnoise.py
28
+ ```
29
+
30
+ If you want to specify the audio, put the file path after the `--input` option.
31
+ You can use `--savepath` option to change the name of the output file to save.
32
+ ```bash
33
+ $ python3 rnnoise.py --input AUDIO_FILE --savepath SAVE_AUDIO_FILE
34
+ ```
35
+
36
+ ## Reference
37
+
38
+ - [rnnoise](https://github.com/xiph/rnnoise)
39
+ - [xiph.org / moz://a](https://jmvalin.ca/demo/rnnoise/)
40
+
41
+ ## Framework
42
+
43
+ Keras
44
+
45
+ ## Model Format
46
+
47
+ ONNX opset=14
48
+
49
+ ## Netron
50
+
51
+ [rnn_model.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/rnnoise/rnn_model.onnx.prototxt)
models/ailia-models/code/babble_15dB.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80ea7f570a750027c97ef86a2f9931d25ecc1886973bade22b06d774d71d1565
3
+ size 259244
models/ailia-models/code/denoised.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d293e13ee78336fd497bba43eff580f48c1b30f95819caa8d2a249123c3ef84
3
+ size 259244
models/ailia-models/code/kiss_fft.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import numpy as np
4
+
5
+ MAXFACTORS = 8
6
+
7
+
8
+ class Complex:
9
+ def __init__(self):
10
+ self.r = 0.0
11
+ self.i = 0.0
12
+
13
+ def __repr__(self):
14
+ return '{:.6f}{}{:.6f}j'.format(self.r, '-' if 0 > self.i else '+', abs(self.i))
15
+
16
+
17
+ class FFTState:
18
+ nfft = 0
19
+ scale = 0
20
+ shift = 0
21
+ factors = np.zeros(2 * MAXFACTORS, dtype=int)
22
+ bitrev = None
23
+ twiddles = None
24
+ arch_fft = None
25
+
26
+
27
+ def C_ADD(res, a, b):
28
+ res.r = a.r + b.r
29
+ res.i = a.i + b.i
30
+
31
+
32
+ def C_SUB(res, a, b):
33
+ res.r = a.r - b.r
34
+ res.i = a.i - b.i
35
+
36
+
37
+ def C_ADDTO(res, a):
38
+ res.r = res.r + a.r
39
+ res.i = res.i + a.i
40
+
41
+
42
+ def C_MUL(m, a, b):
43
+ m.r = a.r * b.r - a.i * b.i
44
+ m.i = a.r * b.i + a.i * b.r
45
+
46
+
47
+ def C_MULBYSCALAR(c, s):
48
+ c.r *= s
49
+ c.i *= s
50
+
51
+
52
+ def kf_bfly2(Fout, m, N):
53
+ tw = 0.7071067812
54
+
55
+ for i in range(N):
56
+ Fout2 = Fout[4:]
57
+ t = Fout2[0]
58
+
59
+ C_SUB(Fout2[0], Fout[0], t)
60
+ C_ADDTO(Fout[0], t)
61
+
62
+ t.r = (Fout2[1].r + Fout2[1].i) * tw
63
+ t.i = (Fout2[1].i - Fout2[1].r) * tw
64
+ C_SUB(Fout2[1], Fout[1], t)
65
+ C_ADDTO(Fout[1], t)
66
+
67
+ t.r = Fout2[2].i
68
+ t.i = -Fout2[2].r
69
+ C_SUB(Fout2[2], Fout[2], t)
70
+ C_ADDTO(Fout[2], t)
71
+
72
+ t.r = (Fout2[3].i - Fout2[3].r) * tw
73
+ t.i = -(Fout2[3].i + Fout2[3].r) * tw
74
+ C_SUB(Fout2[3], Fout[3], t)
75
+ C_ADDTO(Fout[3], t)
76
+
77
+ Fout = Fout[8:]
78
+
79
+
80
+ def kf_bfly4(Fout, fstride, st, m, N, mm):
81
+ if m == 1:
82
+ # Degenerate case where all the twiddles are 1.
83
+ for i in range(N):
84
+ scratch0 = Complex()
85
+ scratch1 = Complex()
86
+
87
+ C_SUB(scratch0, Fout[0], Fout[2])
88
+ C_ADDTO(Fout[0], Fout[2])
89
+ C_ADD(scratch1, Fout[1], Fout[3])
90
+ C_SUB(Fout[2], Fout[0], scratch1)
91
+ C_ADDTO(Fout[0], scratch1)
92
+ C_SUB(scratch1, Fout[1], Fout[3])
93
+
94
+ Fout[1].r = scratch0.r + scratch1.i
95
+ Fout[1].i = scratch0.i - scratch1.r
96
+ Fout[3].r = scratch0.r - scratch1.i
97
+ Fout[3].i = scratch0.i + scratch1.r
98
+ Fout = Fout[4:]
99
+ else:
100
+ scratch = [Complex() for _ in range(6)]
101
+ m2 = 2 * m
102
+ m3 = 3 * m
103
+ Fout_beg = Fout
104
+ for i in range(N):
105
+ Fout = Fout_beg[i * mm:]
106
+ tw3 = tw2 = tw1 = st.twiddles
107
+ # m is guaranteed to be a multiple of 4.
108
+ for j in range(m):
109
+ C_MUL(scratch[0], Fout[m], tw1[0])
110
+ C_MUL(scratch[1], Fout[m2], tw2[0])
111
+ C_MUL(scratch[2], Fout[m3], tw3[0])
112
+
113
+ C_SUB(scratch[5], Fout[0], scratch[1])
114
+ C_ADDTO(Fout[0], scratch[1])
115
+ C_ADD(scratch[3], scratch[0], scratch[2])
116
+ C_SUB(scratch[4], scratch[0], scratch[2])
117
+ C_SUB(Fout[m2], Fout[0], scratch[3])
118
+ tw1 = tw1[fstride:]
119
+ tw2 = tw2[fstride * 2:]
120
+ tw3 = tw3[fstride * 3:]
121
+ C_ADDTO(Fout[0], scratch[3])
122
+
123
+ Fout[m].r = scratch[5].r + scratch[4].i
124
+ Fout[m].i = scratch[5].i - scratch[4].r
125
+ Fout[m3].r = scratch[5].r - scratch[4].i
126
+ Fout[m3].i = scratch[5].i + scratch[4].r
127
+
128
+ Fout = Fout[1:]
129
+
130
+
131
+ def kf_bfly3(Fout, fstride, st, m, N, mm):
132
+ m2 = 2 * m
133
+ scratch = [Complex() for _ in range(5)]
134
+
135
+ Fout_beg = Fout
136
+ epi3 = st.twiddles[fstride * m]
137
+ for i in range(N):
138
+ Fout = Fout_beg[i * mm:]
139
+ tw1 = tw2 = st.twiddles
140
+ # For non-custom modes, m is guaranteed to be a multiple of 4.
141
+ k = m
142
+ while 0 < k:
143
+ C_MUL(scratch[1], Fout[m], tw1[0])
144
+ C_MUL(scratch[2], Fout[m2], tw2[0])
145
+
146
+ C_ADD(scratch[3], scratch[1], scratch[2])
147
+ C_SUB(scratch[0], scratch[1], scratch[2])
148
+ tw1 = tw1[fstride:]
149
+ tw2 = tw2[fstride * 2:]
150
+
151
+ Fout[m].r = Fout[0].r - scratch[3].r / 2
152
+ Fout[m].i = Fout[0].i - scratch[3].i / 2
153
+
154
+ C_MULBYSCALAR(scratch[0], epi3.i)
155
+
156
+ C_ADDTO(Fout[0], scratch[3])
157
+
158
+ Fout[m2].r = Fout[m].r + scratch[0].i
159
+ Fout[m2].i = Fout[m].i - scratch[0].r
160
+
161
+ Fout[m].r = Fout[m].r - scratch[0].i
162
+ Fout[m].i = Fout[m].i + scratch[0].r
163
+
164
+ Fout = Fout[1:]
165
+ k = k - 1
166
+
167
+
168
+ def kf_bfly5(Fout, fstride, st, m, N, mm):
169
+ scratch = [Complex() for _ in range(13)]
170
+ Fout_beg = Fout
171
+
172
+ ya = st.twiddles[fstride * m]
173
+ yb = st.twiddles[fstride * 2 * m]
174
+ tw = st.twiddles
175
+ for i in range(N):
176
+ Fout = Fout_beg[i * mm:]
177
+ Fout0 = Fout
178
+ Fout1 = Fout0[m:]
179
+ Fout2 = Fout0[2 * m:]
180
+ Fout3 = Fout0[3 * m:]
181
+ Fout4 = Fout0[4 * m:]
182
+
183
+ # For non-custom modes, m is guaranteed to be a multiple of 4.
184
+ for u in range(m):
185
+ scratch[0].r = Fout0[0].r
186
+ scratch[0].i = Fout0[0].i
187
+
188
+ C_MUL(scratch[1], Fout1[0], tw[u * fstride])
189
+ C_MUL(scratch[2], Fout2[0], tw[2 * u * fstride])
190
+ C_MUL(scratch[3], Fout3[0], tw[3 * u * fstride])
191
+ C_MUL(scratch[4], Fout4[0], tw[4 * u * fstride])
192
+
193
+ C_ADD(scratch[7], scratch[1], scratch[4])
194
+ C_SUB(scratch[10], scratch[1], scratch[4])
195
+ C_ADD(scratch[8], scratch[2], scratch[3])
196
+ C_SUB(scratch[9], scratch[2], scratch[3])
197
+
198
+ Fout0[0].r = Fout0[0].r + (scratch[7].r + scratch[8].r)
199
+ Fout0[0].i = Fout0[0].i + (scratch[7].i + scratch[8].i)
200
+
201
+ scratch[5].r = scratch[0].r + ((scratch[7].r * ya.r) + (scratch[8].r * yb.r))
202
+ scratch[5].i = scratch[0].i + ((scratch[7].i * ya.r) + (scratch[8].i * yb.r))
203
+
204
+ scratch[6].r = (scratch[10].i * ya.i) + (scratch[9].i * yb.i)
205
+ scratch[6].i = -((scratch[10].r * ya.i) + (scratch[9].r * yb.i))
206
+
207
+ C_SUB(Fout1[0], scratch[5], scratch[6])
208
+ C_ADD(Fout4[0], scratch[5], scratch[6])
209
+
210
+ scratch[11].r = scratch[0].r + ((scratch[7].r * yb.r) + (scratch[8].r * ya.r))
211
+ scratch[11].i = scratch[0].i + ((scratch[7].i * yb.r) + (scratch[8].i * ya.r))
212
+ scratch[12].r = (scratch[9].i * ya.i) - (scratch[10].i * yb.i)
213
+ scratch[12].i = (scratch[10].r * yb.i) - (scratch[9].r * ya.i)
214
+
215
+ C_ADD(Fout2[0], scratch[11], scratch[12])
216
+ C_SUB(Fout3[0], scratch[11], scratch[12])
217
+
218
+ Fout0 = Fout0[1:]
219
+ Fout1 = Fout1[1:]
220
+ Fout2 = Fout2[1:]
221
+ Fout3 = Fout3[1:]
222
+ Fout4 = Fout4[1:]
223
+
224
+
225
+ def compute_bitrev_table(Fout, f, fstride, in_stride, factors, st):
226
+ p = int(factors[0]) # the radix
227
+ m = int(factors[1]) # stage's fft length/p
228
+
229
+ if m == 1:
230
+ for j in range(p):
231
+ f[0] = Fout + j
232
+ f = f[fstride * in_stride:]
233
+ else:
234
+ for j in range(p):
235
+ compute_bitrev_table(Fout, f, fstride * p, in_stride, factors[2:], st)
236
+ f = f[fstride * in_stride:]
237
+ Fout += m
238
+
239
+
240
+ def kf_factor(n, facbuf):
241
+ p = 4
242
+ stages = 0
243
+ nbak = n
244
+
245
+ while n > 1:
246
+ while n % p:
247
+ p = 2 if p == 4 else 3 if p == 2 else p + 2
248
+ if p > 32000 or p * p > n:
249
+ p = n
250
+ n /= p
251
+ if p > 5:
252
+ return 0
253
+
254
+ facbuf[2 * stages] = p
255
+ if p == 2 and stages > 1:
256
+ facbuf[2 * stages] = 4
257
+ facbuf[2] = 2
258
+ stages = stages + 1
259
+
260
+ n = nbak
261
+
262
+ for i in range(stages // 2):
263
+ tmp = facbuf[2 * i]
264
+ facbuf[2 * i] = facbuf[2 * (stages - i - 1)]
265
+ facbuf[2 * (stages - i - 1)] = tmp
266
+
267
+ for i in range(stages):
268
+ n /= facbuf[2 * i]
269
+ facbuf[2 * i + 1] = n
270
+
271
+ return 1
272
+
273
+
274
+ def compute_twiddles(twiddles, nfft):
275
+ for i in range(nfft):
276
+ phase = (-2 * math.pi / nfft) * i
277
+ twiddles[i].r = math.cos(phase)
278
+ twiddles[i].i = math.sin(phase)
279
+
280
+
281
+ def opus_fft_alloc_twiddles(nfft):
282
+ st = FFTState()
283
+
284
+ st.nfft = nfft
285
+ st.scale = 1. / nfft
286
+
287
+ st.twiddles = twiddles = [Complex() for _ in range(nfft)]
288
+ compute_twiddles(twiddles, nfft)
289
+ st.shift = -1
290
+ kf_factor(nfft, st.factors)
291
+
292
+ # bitrev
293
+ st.bitrev = bitrev = np.zeros(nfft, dtype=int)
294
+
295
+ compute_bitrev_table(0, bitrev, 1, 1, st.factors, st)
296
+
297
+ return st
298
+
299
+
300
+ def opus_fft_impl(st, fout):
301
+ fstride = np.zeros(MAXFACTORS, dtype=int)
302
+
303
+ # shift can be -1
304
+ shift = st.shift if st.shift > 0 else 0
305
+
306
+ fstride[0] = 1
307
+ L = 0
308
+ while True:
309
+ p = st.factors[2 * L]
310
+ m = st.factors[2 * L + 1]
311
+ fstride[L + 1] = fstride[L] * p
312
+ L += 1
313
+ if m == 1:
314
+ break
315
+
316
+ m = st.factors[2 * L - 1]
317
+ for i in range(L - 1, -1, -1):
318
+ if i != 0:
319
+ m2 = st.factors[2 * i - 1]
320
+ else:
321
+ m2 = 1
322
+
323
+ x = st.factors[2 * i]
324
+ if x == 2:
325
+ kf_bfly2(fout, m, fstride[i])
326
+ elif x == 4:
327
+ kf_bfly4(fout, fstride[i] << shift, st, m, fstride[i], m2)
328
+ elif x == 3:
329
+ kf_bfly3(fout, fstride[i] << shift, st, m, fstride[i], m2)
330
+ elif x == 5:
331
+ kf_bfly5(fout, fstride[i] << shift, st, m, fstride[i], m2)
332
+
333
+ m = m2
334
+
335
+
336
+ def opus_fft(st, fin, fout):
337
+ scale = st.scale
338
+
339
+ # Bit-reverse the input
340
+ for i in range(st.nfft):
341
+ x = fin[i]
342
+ fout[st.bitrev[i]].r = scale * x.r
343
+ fout[st.bitrev[i]].i = scale * x.i
344
+
345
+ opus_fft_impl(st, fout)
models/ailia-models/code/pitch.py ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+
4
+ def find_best_pitch(xcorr, y, _len, max_pitch, best_pitch):
5
+ Syy = 1
6
+ best_num = [-1, -1]
7
+ best_den = [0, 0]
8
+
9
+ best_pitch[0] = 0
10
+ best_pitch[1] = 1
11
+ for j in range(_len):
12
+ Syy = Syy + (y[j] * y[j])
13
+ for i in range(max_pitch):
14
+ if xcorr[i] > 0:
15
+ num = xcorr[i] * xcorr[i]
16
+ if num * best_den[1] > best_num[1] * Syy:
17
+ if num * best_den[0] > best_num[0] * Syy:
18
+ best_num[1] = best_num[0]
19
+ best_den[1] = best_den[0]
20
+ best_pitch[1] = best_pitch[0]
21
+ best_num[0] = num
22
+ best_den[0] = Syy
23
+ best_pitch[0] = i
24
+ else:
25
+ best_num[1] = num
26
+ best_den[1] = Syy
27
+ best_pitch[1] = i
28
+
29
+ Syy += (y[i + _len] * y[i + _len]) - (y[i] * y[i])
30
+ Syy = max(1, Syy)
31
+
32
+
33
+ def _celt_lpc(lpc, ac, p):
34
+ """
35
+ lpc (out): [0...p-1] LPC coefficients
36
+ ac (in): [0...p] autocorrelation values
37
+ """
38
+ error = ac[0]
39
+
40
+ for i in range(p):
41
+ lpc[i] = 0
42
+
43
+ if ac[0] != 0:
44
+ for i in range(p):
45
+ # Sum up this iteration's reflection coefficient
46
+ rr = 0
47
+ for j in range(i):
48
+ rr += lpc[j] * ac[i - j]
49
+ rr += ac[i + 1]
50
+ r = -rr / error
51
+ # Update LPC coefficients and total error
52
+ lpc[i] = r
53
+
54
+ for j in range((i + 1) >> 1):
55
+ tmp1 = lpc[j]
56
+ tmp2 = lpc[i - 1 - j]
57
+ lpc[j] = tmp1 + (r * tmp2)
58
+ lpc[i - 1 - j] = tmp2 + (r * tmp1)
59
+
60
+ error = error - ((r * r) * error)
61
+ # Bail out once we get 30 dB gain
62
+ if error < .001 * ac[0]:
63
+ break
64
+
65
+
66
+ def _celt_autocorr(x, ac, window, overlap, lag, n):
67
+ """
68
+ x: (in) [0...n-1] samples x
69
+ ac: (out) [0...lag-1] ac values
70
+ """
71
+
72
+ fastN = n - lag
73
+ if overlap == 0:
74
+ xptr = x
75
+ else:
76
+ xx = [0] * n
77
+ for i in range(n):
78
+ xx[i] = x[i]
79
+ for i in range(overlap):
80
+ xx[i] = x[i] * window[i]
81
+ xx[n - i - 1] = x[n - i - 1] * window[i]
82
+ xptr = xx
83
+
84
+ shift = 0
85
+ celt_pitch_xcorr(xptr, xptr, ac, fastN, lag + 1)
86
+
87
+ for k in range(lag + 1):
88
+ d = 0
89
+ for i in range(k + fastN, n):
90
+ d = d + (xptr[i] * xptr[i - k])
91
+ ac[k] += d
92
+
93
+ return shift
94
+
95
+
96
+ def celt_fir5(x, num, y, N, mem):
97
+ num0 = num[0]
98
+ num1 = num[1]
99
+ num2 = num[2]
100
+ num3 = num[3]
101
+ num4 = num[4]
102
+ mem0 = mem[0]
103
+ mem1 = mem[1]
104
+ mem2 = mem[2]
105
+ mem3 = mem[3]
106
+ mem4 = mem[4]
107
+ for i in range(N):
108
+ _sum = x[i]
109
+ _sum = _sum + num0 * mem0
110
+ _sum = _sum + num1 * mem1
111
+ _sum = _sum + num2 * mem2
112
+ _sum = _sum + num3 * mem3
113
+ _sum = _sum + num4 * mem4
114
+ mem4 = mem3
115
+ mem3 = mem2
116
+ mem2 = mem1
117
+ mem1 = mem0
118
+ mem0 = x[i]
119
+ y[i] = _sum
120
+
121
+ mem[0] = mem0
122
+ mem[1] = mem1
123
+ mem[2] = mem2
124
+ mem[3] = mem3
125
+ mem[4] = mem4
126
+
127
+
128
+ def pitch_downsample(x, x_lp, _len, C):
129
+ ac = [0] * 5
130
+ tmp = 1.
131
+ lpc = [0] * 4
132
+ mem = [0] * 5
133
+ lpc2 = [0] * 5
134
+ c1 = .8
135
+
136
+ for i in range(1, _len >> 1):
137
+ x_lp[i] = .5 * (.5 * (x[0][(2 * i - 1)] + x[0][(2 * i + 1)]) + x[0][2 * i])
138
+ x_lp[0] = .5 * (.5 * (x[0][1]) + x[0][0])
139
+ if C == 2:
140
+ for i in range(1, _len >> 2):
141
+ x_lp[i] += .5 * (.5 * (x[1][(2 * i - 1)] + x[1][(2 * i + 1)]) + x[1][2 * i])
142
+ x_lp[0] += .5 * (.5 * (x[1][1]) + x[1][0])
143
+
144
+ _celt_autocorr(x_lp, ac, None, 0, 4, _len >> 1)
145
+
146
+ # Noise floor -40 dB
147
+ ac[0] *= 1.0001
148
+
149
+ # Lag windowing
150
+ for i in range(1, 4 + 1):
151
+ ac[i] -= ac[i] * (.008 * i) * (.008 * i)
152
+
153
+ _celt_lpc(lpc, ac, 4)
154
+ for i in range(4):
155
+ tmp = .9 * tmp
156
+ lpc[i] = lpc[i] * tmp
157
+
158
+ # Add a zero
159
+ lpc2[0] = lpc[0] + .8
160
+ lpc2[1] = lpc[1] + c1 * lpc[0]
161
+ lpc2[2] = lpc[2] + c1 * lpc[1]
162
+ lpc2[3] = lpc[3] + c1 * lpc[2]
163
+ lpc2[4] = c1 * lpc[3]
164
+ celt_fir5(x_lp, lpc2, x_lp, _len >> 1, mem)
165
+
166
+
167
+ def xcorr_kernel(x, y, _sum, _len):
168
+ y_0 = y[0]
169
+ y_1 = y[1]
170
+ y_2 = y[2]
171
+ y = y[3:]
172
+ for j in range(0, _len - 3, 4):
173
+ tmp = x[0]
174
+ y_3 = y[0]
175
+ x = x[1:]
176
+ y = y[1:]
177
+ _sum[0] = _sum[0] + tmp * y_0
178
+ _sum[1] = _sum[1] + tmp * y_1
179
+ _sum[2] = _sum[2] + tmp * y_2
180
+ _sum[3] = _sum[3] + tmp * y_3
181
+ tmp = x[0]
182
+ y_0 = y[0]
183
+ x = x[1:]
184
+ y = y[1:]
185
+ _sum[0] = _sum[0] + tmp * y_1
186
+ _sum[1] = _sum[1] + tmp * y_2
187
+ _sum[2] = _sum[2] + tmp * y_3
188
+ _sum[3] = _sum[3] + tmp * y_0
189
+ tmp = x[0]
190
+ y_1 = y[0]
191
+ x = x[1:]
192
+ y = y[1:]
193
+ _sum[0] = _sum[0] + tmp * y_2
194
+ _sum[1] = _sum[1] + tmp * y_3
195
+ _sum[2] = _sum[2] + tmp * y_0
196
+ _sum[3] = _sum[3] + tmp * y_1
197
+ tmp = x[0]
198
+ y_2 = y[0]
199
+ x = x[1:]
200
+ y = y[1:]
201
+ _sum[0] = _sum[0] + tmp * y_3
202
+ _sum[1] = _sum[1] + tmp * y_0
203
+ _sum[2] = _sum[2] + tmp * y_1
204
+ _sum[3] = _sum[3] + tmp * y_2
205
+ j += 4
206
+ if j < _len:
207
+ tmp = x[0]
208
+ y_3 = y[0]
209
+ x = x[1:]
210
+ y = y[1:]
211
+ _sum[0] = _sum[0] + tmp * y_0
212
+ _sum[1] = _sum[1] + tmp * y_1
213
+ _sum[2] = _sum[2] + tmp * y_2
214
+ _sum[3] = _sum[3] + tmp * y_3
215
+ j += 1
216
+ if j < _len:
217
+ tmp = x[0]
218
+ y_0 = y[0]
219
+ x = x[1:]
220
+ y = y[1:]
221
+ _sum[0] = _sum[0] + tmp * y_1
222
+ _sum[1] = _sum[1] + tmp * y_2
223
+ _sum[2] = _sum[2] + tmp * y_3
224
+ _sum[3] = _sum[3] + tmp * y_0
225
+ j += 1
226
+ if j < _len:
227
+ tmp = x[0]
228
+ y_1 = y[0]
229
+ _sum[0] = _sum[0] + tmp * y_2
230
+ _sum[1] = _sum[1] + tmp * y_3
231
+ _sum[2] = _sum[2] + tmp * y_0
232
+ _sum[3] = _sum[3] + tmp * y_1
233
+
234
+
235
+ def dual_inner_prod(x, y01, y02, N):
236
+ xy01 = xy02 = 0
237
+ for i in range(N):
238
+ xy01 = xy01 + x[i] * y01[i]
239
+ xy02 = xy02 + x[i] * y02[i]
240
+ return xy01, xy02
241
+
242
+
243
+ def celt_inner_prod(x, y, N):
244
+ xy = 0
245
+ for i in range(N):
246
+ xy = xy + x[i] * y[i]
247
+ return xy
248
+
249
+
250
+ def celt_pitch_xcorr(_x, _y, xcorr, _len, max_pitch):
251
+ # The EDSP version requires that max_pitch is at least 1, and that _x is 32-bit aligned.
252
+ # Since it's hard to put asserts in assembly, put them here.
253
+ for i in range(0, max_pitch - 3, 4):
254
+ _sum = [0, 0, 0, 0]
255
+ xcorr_kernel(_x, _y[i:], _sum, _len)
256
+ xcorr[i] = _sum[0]
257
+ xcorr[i + 1] = _sum[1]
258
+ xcorr[i + 2] = _sum[2]
259
+ xcorr[i + 3] = _sum[3]
260
+ i += 4
261
+
262
+ # In case max_pitch isn't a multiple of 4, do non-unrolled version.
263
+ for i in range(i, max_pitch):
264
+ _sum = celt_inner_prod(_x, _y[i:], _len)
265
+ xcorr[i] = _sum
266
+
267
+
268
+ def pitch_search(x_lp, y, _len, max_pitch):
269
+ best_pitch = [0, 0]
270
+ lag = _len + max_pitch
271
+
272
+ x_lp4 = [0] * (_len >> 2)
273
+ y_lp4 = [0] * (lag >> 2)
274
+ xcorr = [0] * (max_pitch >> 1)
275
+
276
+ # Downsample by 2 again
277
+ for j in range(_len >> 2):
278
+ x_lp4[j] = x_lp[2 * j]
279
+ for j in range(lag >> 2):
280
+ y_lp4[j] = y[2 * j]
281
+
282
+ # Coarse search with 4x decimation
283
+
284
+ celt_pitch_xcorr(x_lp4, y_lp4, xcorr, _len >> 2, max_pitch >> 2)
285
+
286
+ find_best_pitch(xcorr, y_lp4, _len >> 2, max_pitch >> 2, best_pitch)
287
+
288
+ # Finer search with 2x decimation
289
+ for i in range(max_pitch >> 1):
290
+ xcorr[i] = 0
291
+ if abs(i - 2 * best_pitch[0]) > 2 and abs(i - 2 * best_pitch[1]) > 2:
292
+ continue
293
+ _sum = celt_inner_prod(x_lp, y[i:], _len >> 1)
294
+ xcorr[i] = max(-1, _sum)
295
+ find_best_pitch(xcorr, y, _len >> 1, max_pitch >> 1, best_pitch)
296
+
297
+ # Refine by pseudo-interpolation
298
+ offset = 0
299
+ if 0 < best_pitch[0] < (max_pitch >> 1) - 1:
300
+ a = xcorr[best_pitch[0] - 1]
301
+ b = xcorr[best_pitch[0]]
302
+ c = xcorr[best_pitch[0] + 1]
303
+ if (c - a) > .7 * (b - a):
304
+ offset = 1
305
+ elif (a - c) > .7 * (b - c):
306
+ offset = -1
307
+
308
+ pitch = 2 * best_pitch[0] - offset
309
+ return pitch
310
+
311
+
312
+ def compute_pitch_gain(xy, xx, yy):
313
+ return xy / math.sqrt(1 + xx * yy)
314
+
315
+
316
+ second_check = [0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2]
317
+
318
+
319
+ def remove_doubling(x, maxperiod, minperiod, N, T0_, prev_period, prev_gain):
320
+ xcorr = [0] * 3
321
+
322
+ minperiod0 = minperiod
323
+ maxperiod //= 2
324
+ minperiod //= 2
325
+ T0_[0] //= 2
326
+ prev_period //= 2
327
+ N //= 2
328
+ x0 = x
329
+ x = x0[maxperiod:]
330
+ if T0_[0] >= maxperiod:
331
+ T0_[0] = maxperiod - 1
332
+
333
+ T = T0 = T0_[0]
334
+ yy_lookup = [0] * (maxperiod + 1)
335
+ xx, xy = dual_inner_prod(x, x, x0[maxperiod - T0:], N)
336
+ yy_lookup[0] = xx
337
+ yy = xx
338
+ for i in range(1, maxperiod + 1):
339
+ yy = yy + (x0[maxperiod - i] * x0[maxperiod - i]) - (x[N - i] * x[N - i])
340
+ yy_lookup[i] = max(0, yy)
341
+ yy = yy_lookup[T0]
342
+ best_xy = xy
343
+ best_yy = yy
344
+ g = g0 = compute_pitch_gain(xy, xx, yy)
345
+ # Look for any pitch at T/k
346
+ for k in range(2, 15 + 1):
347
+ T1 = (2 * T0 + k) // (2 * k)
348
+ if T1 < minperiod:
349
+ break
350
+ # Look for another strong correlation at T1b
351
+ if k == 2:
352
+ if T1 + T0 > maxperiod:
353
+ T1b = T0
354
+ else:
355
+ T1b = T0 + T1
356
+ else:
357
+ T1b = (2 * second_check[k] * T0 + k) // (2 * k);
358
+
359
+ xy, xy2 = dual_inner_prod(x, x0[maxperiod - T1:], x0[maxperiod - T1b:], N)
360
+ xy = .5 * (xy + xy2)
361
+ yy = .5 * (yy_lookup[T1] + yy_lookup[T1b])
362
+ g1 = compute_pitch_gain(xy, xx, yy)
363
+ if abs(T1 - prev_period) <= 1:
364
+ cont = prev_gain
365
+ elif abs(T1 - prev_period) <= 2 and 5 * k * k < T0:
366
+ cont = .5 * prev_gain
367
+ else:
368
+ cont = 0
369
+ thresh = max(.3, (.7 * g0) - cont)
370
+
371
+ # Bias against very high pitch (very short period) to avoid false-positives
372
+ # due to short-term correlation
373
+
374
+ if T1 < 3 * minperiod:
375
+ thresh = max(.4, (.85 * g0) - cont)
376
+ elif T1 < 2 * minperiod:
377
+ thresh = max(.5, (.9 * g0) - cont)
378
+ if g1 > thresh:
379
+ best_xy = xy
380
+ best_yy = yy
381
+ T = T1
382
+ g = g1
383
+
384
+ best_xy = max(0, best_xy)
385
+ if best_yy <= best_xy:
386
+ pg = 1.
387
+ else:
388
+ pg = best_xy / (best_yy + 1)
389
+
390
+ for k in range(3):
391
+ xcorr[k] = celt_inner_prod(x, x0[maxperiod - (T + k - 1):], N)
392
+ if xcorr[2] - xcorr[0] > .7 * (xcorr[1] - xcorr[0]):
393
+ offset = 1
394
+ elif xcorr[0] - xcorr[2] > .7 * (xcorr[1] - xcorr[2]):
395
+ offset = -1
396
+ else:
397
+ offset = 0
398
+
399
+ if pg > g:
400
+ pg = g
401
+ T0_[0] = 2 * T + offset
402
+
403
+ if T0_[0] < minperiod0:
404
+ T0_[0] = minperiod0
405
+
406
+ return pg
models/ailia-models/code/rnnoise.py ADDED
@@ -0,0 +1,521 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import math
3
+ import wave
4
+ import struct
5
+ from logging import getLogger
6
+
7
+ import numpy as np
8
+ from tqdm import tqdm
9
+
10
+ import ailia
11
+
12
+ # import original modules
13
+ sys.path.append('../../util')
14
+ from arg_utils import get_base_parser, update_parser, get_savepath # noqa
15
+ from model_utils import check_and_download_models # noqa
16
+
17
+ from kiss_fft import Complex, opus_fft_alloc_twiddles, opus_fft
18
+ from pitch import pitch_downsample, pitch_search, remove_doubling
19
+
20
+ logger = getLogger(__name__)
21
+
22
+ # ======================
23
+ # Parameters
24
+ # ======================
25
+
26
+ WEIGHT_PATH = 'rnn_model.onnx'
27
+ MODEL_PATH = 'rnn_model.onnx.prototxt'
28
+ REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/rnnoise/'
29
+
30
+ AUDIO_PATH = 'babble_15dB.wav'
31
+ OUTPUT_PATH = 'denoised.wav'
32
+
33
+ PITCH_MIN_PERIOD = 60
34
+ PITCH_MAX_PERIOD = 768
35
+ PITCH_FRAME_SIZE = 960
36
+ PITCH_BUF_SIZE = PITCH_MAX_PERIOD + PITCH_FRAME_SIZE
37
+
38
+ NB_BANDS = 22
39
+ CEPS_MEM = 8
40
+ NB_DELTA_CEPS = 6
41
+ NB_FEATURES = NB_BANDS + 3 * NB_DELTA_CEPS + 2
42
+
43
+ FRAME_SIZE_SHIFT = 2
44
+ FRAME_SIZE = 120 << FRAME_SIZE_SHIFT
45
+ WINDOW_SIZE = 2 * FRAME_SIZE
46
+ FREQ_SIZE = FRAME_SIZE + 1
47
+
48
+ # ======================
49
+ # Arguemnt Parser Config
50
+ # ======================
51
+
52
+ parser = get_base_parser(
53
+ 'rnnoise', AUDIO_PATH, OUTPUT_PATH
54
+ )
55
+ parser.add_argument(
56
+ '--onnx',
57
+ action='store_true',
58
+ help='execute onnxruntime version.'
59
+ )
60
+ args = update_parser(parser)
61
+
62
+
63
+ # ======================
64
+ # Secondaty Functions
65
+ # ======================
66
+
67
+ class CommonState:
68
+ init = False
69
+ kfft = None
70
+ half_window = np.zeros(FRAME_SIZE)
71
+ dct_table = np.zeros(NB_BANDS * NB_BANDS)
72
+
73
+
74
+ common = CommonState()
75
+
76
+
77
+ class DenoiseState:
78
+ analysis_mem = np.zeros(FRAME_SIZE)
79
+ cepstral_mem = np.zeros((CEPS_MEM, NB_BANDS))
80
+ memid = 0
81
+ synthesis_mem = np.zeros(FRAME_SIZE)
82
+ pitch_buf = np.zeros(PITCH_BUF_SIZE)
83
+ pitch_enh_buf = np.zeros(PITCH_BUF_SIZE)
84
+ last_gain = 0.0
85
+ last_period = 0
86
+ mem_hp_x = np.zeros(2)
87
+
88
+
89
+ def compute_band_energy(bandE, X):
90
+ eband5ms = [
91
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
92
+ ]
93
+
94
+ _sum = [0] * NB_BANDS
95
+ for i in range(NB_BANDS - 1):
96
+ band_size = (eband5ms[i + 1] - eband5ms[i]) << FRAME_SIZE_SHIFT
97
+ for j in range(band_size):
98
+ frac = j / band_size
99
+ tmp = X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].r ** 2
100
+ tmp += X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].i ** 2
101
+ _sum[i] += (1 - frac) * tmp
102
+ _sum[i + 1] += frac * tmp
103
+
104
+ _sum[0] *= 2
105
+ _sum[NB_BANDS - 1] *= 2
106
+ for i in range(NB_BANDS):
107
+ bandE[i] = _sum[i]
108
+
109
+
110
+ eband5ms = [
111
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
112
+ ]
113
+
114
+
115
+ def compute_band_corr(bandE, X, P):
116
+ _sum = [0] * NB_BANDS
117
+
118
+ for i in range(NB_BANDS - 1):
119
+ band_size = (eband5ms[i + 1] - eband5ms[i]) << FRAME_SIZE_SHIFT
120
+ for j in range(band_size):
121
+ frac = j / band_size
122
+ tmp = X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].r
123
+ tmp += X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].i
124
+ _sum[i] += (1 - frac) * tmp
125
+ _sum[i + 1] += frac * tmp
126
+
127
+ _sum[0] *= 2
128
+ _sum[NB_BANDS - 1] *= 2
129
+ for i in range(NB_BANDS):
130
+ bandE[i] = _sum[i]
131
+
132
+
133
+ def interp_band_gain(g, bandE):
134
+ g[...] = 0
135
+ for i in range(NB_BANDS - 1):
136
+ band_size = (eband5ms[i + 1] - eband5ms[i]) << FRAME_SIZE_SHIFT
137
+ for j in range(band_size):
138
+ frac = j / band_size
139
+ g[(eband5ms[i] << FRAME_SIZE_SHIFT) + j] = (1 - frac) * bandE[i] + frac * bandE[i + 1]
140
+
141
+
142
+ def check_init():
143
+ if common.init:
144
+ return
145
+
146
+ common.kfft = opus_fft_alloc_twiddles(2 * FRAME_SIZE)
147
+
148
+ for i in range(FRAME_SIZE):
149
+ common.half_window[i] = math.sin(
150
+ .5 * math.pi * math.sin(.5 * math.pi * (i + .5) / FRAME_SIZE)
151
+ * math.sin(.5 * math.pi * (i + .5) / FRAME_SIZE)
152
+ )
153
+
154
+ for i in range(NB_BANDS):
155
+ for j in range(NB_BANDS):
156
+ common.dct_table[i * NB_BANDS + j] = math.cos((i + .5) * j * math.pi / NB_BANDS)
157
+ if j == 0:
158
+ common.dct_table[i * NB_BANDS + j] *= math.sqrt(.5)
159
+
160
+ common.init = True
161
+
162
+
163
+ def dct(out, in_data):
164
+ check_init()
165
+
166
+ for i in range(NB_BANDS):
167
+ _sum = 0
168
+ for j in range(NB_BANDS):
169
+ _sum += in_data[j] * common.dct_table[j * NB_BANDS + i]
170
+ out[i] = _sum * math.sqrt(2. / 22)
171
+
172
+ return out
173
+
174
+
175
+ def forward_transform(out, in_data):
176
+ check_init()
177
+
178
+ x = [Complex() for _ in range(WINDOW_SIZE)]
179
+ y = [Complex() for _ in range(WINDOW_SIZE)]
180
+
181
+ for i in range(WINDOW_SIZE):
182
+ x[i].r = in_data[i]
183
+ x[i].i = 0
184
+
185
+ opus_fft(common.kfft, x, y)
186
+ for i in range(FREQ_SIZE):
187
+ out[i] = y[i]
188
+
189
+
190
+ def inverse_transform(out, in_data):
191
+ check_init()
192
+
193
+ x = [Complex() for _ in range(WINDOW_SIZE)]
194
+ y = [Complex() for _ in range(WINDOW_SIZE)]
195
+
196
+ for i in range(FREQ_SIZE):
197
+ x[i] = in_data[i]
198
+ for i in range(i + 1, WINDOW_SIZE):
199
+ x[i].r = x[WINDOW_SIZE - i].r
200
+ x[i].i = -x[WINDOW_SIZE - i].i
201
+
202
+ opus_fft(common.kfft, x, y)
203
+
204
+ # output in reverse order for IFFT.
205
+ out[0] = WINDOW_SIZE * y[0].r
206
+ for i in range(1, WINDOW_SIZE):
207
+ out[i] = WINDOW_SIZE * y[WINDOW_SIZE - i].r
208
+
209
+
210
+ def apply_window(x):
211
+ check_init()
212
+
213
+ for i in range(FRAME_SIZE):
214
+ x[i] *= common.half_window[i]
215
+ x[WINDOW_SIZE - 1 - i] *= common.half_window[i]
216
+
217
+
218
+ def frame_analysis(st, X, Ex, in_data):
219
+ x = np.zeros(WINDOW_SIZE)
220
+ x[:FRAME_SIZE] = st.analysis_mem
221
+ x[FRAME_SIZE:] = in_data
222
+ st.analysis_mem[...] = in_data
223
+
224
+ apply_window(x)
225
+ forward_transform(X, x)
226
+
227
+ compute_band_energy(Ex, X)
228
+
229
+
230
+ def compute_frame_features(st, X, P, Ex, Ep, Exp, features, x):
231
+ E = 0
232
+ spec_variability = 0
233
+ Ly = np.zeros(NB_BANDS)
234
+ p = np.zeros(WINDOW_SIZE)
235
+ pitch_buf = np.zeros(PITCH_BUF_SIZE >> 1)
236
+ tmp = np.zeros(NB_BANDS)
237
+
238
+ frame_analysis(st, X, Ex, x)
239
+
240
+ st.pitch_buf[:PITCH_BUF_SIZE - FRAME_SIZE] = st.pitch_buf[FRAME_SIZE:]
241
+ st.pitch_buf[PITCH_BUF_SIZE - FRAME_SIZE:] = x
242
+ pre = [st.pitch_buf]
243
+ pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1)
244
+ pitch_index = pitch_search(
245
+ pitch_buf[PITCH_MAX_PERIOD >> 1:], pitch_buf, PITCH_FRAME_SIZE,
246
+ PITCH_MAX_PERIOD - 3 * PITCH_MIN_PERIOD)
247
+ pitch_index = PITCH_MAX_PERIOD - pitch_index
248
+
249
+ p_pitch_index = [pitch_index]
250
+ gain = remove_doubling(
251
+ pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
252
+ PITCH_FRAME_SIZE, p_pitch_index, st.last_period, st.last_gain)
253
+ st.last_period = pitch_index = p_pitch_index[0]
254
+ st.last_gain = gain
255
+
256
+ for i in range(WINDOW_SIZE):
257
+ p[i] = st.pitch_buf[PITCH_BUF_SIZE - WINDOW_SIZE - pitch_index + i]
258
+ apply_window(p)
259
+ forward_transform(P, p)
260
+ compute_band_energy(Ep, P)
261
+ compute_band_corr(Exp, X, P)
262
+ for i in range(NB_BANDS):
263
+ Exp[i] = Exp[i] / math.sqrt(.001 + Ex[i] * Ep[i])
264
+ dct(tmp, Exp)
265
+
266
+ for i in range(NB_DELTA_CEPS):
267
+ features[NB_BANDS + 2 * NB_DELTA_CEPS + i] = tmp[i]
268
+ features[NB_BANDS + 2 * NB_DELTA_CEPS] -= 1.3
269
+ features[NB_BANDS + 2 * NB_DELTA_CEPS + 1] -= 0.9
270
+ features[NB_BANDS + 3 * NB_DELTA_CEPS] = .01 * (pitch_index - 300)
271
+
272
+ logMax = -2
273
+ follow = -2
274
+ for i in range(NB_BANDS):
275
+ Ly[i] = math.log10(1e-2 + Ex[i])
276
+ Ly[i] = max(logMax - 7, max(follow - 1.5, Ly[i]))
277
+ logMax = max(logMax, Ly[i])
278
+ follow = max(follow - 1.5, Ly[i])
279
+ E += Ex[i]
280
+
281
+ if E < 0.04:
282
+ # If there's no audio, avoid messing up the state.
283
+ features[...] = 0
284
+ return 1
285
+
286
+ dct(features, Ly)
287
+
288
+ features[0] -= 12
289
+ features[1] -= 4
290
+ ceps_0 = st.cepstral_mem[st.memid]
291
+ ceps_1 = st.cepstral_mem[CEPS_MEM + st.memid - 1] \
292
+ if st.memid < 1 else st.cepstral_mem[st.memid - 1]
293
+ ceps_2 = st.cepstral_mem[CEPS_MEM + st.memid - 2] \
294
+ if st.memid < 2 else st.cepstral_mem[st.memid - 2]
295
+ for i in range(NB_BANDS):
296
+ ceps_0[i] = features[i]
297
+ st.memid += 1
298
+
299
+ for i in range(NB_DELTA_CEPS):
300
+ features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i]
301
+ features[NB_BANDS + i] = ceps_0[i] - ceps_2[i]
302
+ features[NB_BANDS + NB_DELTA_CEPS + i] = ceps_0[i] - 2 * ceps_1[i] + ceps_2[i]
303
+
304
+ # Spectral variability features.
305
+ if st.memid == CEPS_MEM:
306
+ st.memid = 0
307
+
308
+ for i in range(CEPS_MEM):
309
+ mindist = 1e15
310
+ for j in range(CEPS_MEM):
311
+ dist = 0.
312
+ for k in range(NB_BANDS):
313
+ tmp = st.cepstral_mem[i][k] - st.cepstral_mem[j][k]
314
+ dist += tmp * tmp
315
+ if j != i:
316
+ mindist = min(mindist, dist)
317
+
318
+ spec_variability += mindist
319
+
320
+ features[NB_BANDS + 3 * NB_DELTA_CEPS + 1] = spec_variability / CEPS_MEM - 2.1
321
+
322
+ return E < 0.1
323
+
324
+
325
+ def frame_synthesis(st, out, y):
326
+ x = np.zeros(WINDOW_SIZE)
327
+ inverse_transform(x, y)
328
+ apply_window(x)
329
+ for i in range(FRAME_SIZE):
330
+ out[i] = x[i] + st.synthesis_mem[i]
331
+ st.synthesis_mem[...] = x[FRAME_SIZE:]
332
+
333
+
334
+ def biquad(y, mem, x, b, a, N):
335
+ for i in range(N):
336
+ xi = x[i]
337
+ yi = x[i] + mem[0]
338
+ mem[0] = mem[1] + (b[0] * xi - a[0] * yi)
339
+ mem[1] = b[1] * xi - a[1] * yi
340
+ y[i] = yi
341
+
342
+
343
+ def pitch_filter(X, P, Ex, Ep, Exp, g):
344
+ r = np.zeros(NB_BANDS)
345
+ rf = np.zeros(FREQ_SIZE)
346
+
347
+ for i in range(NB_BANDS):
348
+ if Exp[i] > g[i]:
349
+ r[i] = 1
350
+ else:
351
+ r[i] = Exp[i] ** 2 * (1 - g[i] ** 2) / (.001 + (g[i] ** 2) * (1 - Exp[i] ** 2))
352
+ r[i] = math.sqrt(min(1, max(0, r[i])))
353
+ r[i] *= math.sqrt(Ex[i] / (1e-8 + Ep[i]))
354
+
355
+ interp_band_gain(rf, r)
356
+ for i in range(FREQ_SIZE):
357
+ X[i].r += rf[i] * P[i].r
358
+ X[i].i += rf[i] * P[i].i
359
+
360
+ newE = np.zeros(NB_BANDS)
361
+ compute_band_energy(newE, X)
362
+ norm = np.zeros(NB_BANDS)
363
+ normf = np.zeros(FREQ_SIZE)
364
+ for i in range(NB_BANDS):
365
+ norm[i] = math.sqrt(Ex[i] / (1e-8 + newE[i]))
366
+
367
+ interp_band_gain(normf, norm)
368
+ for i in range(FREQ_SIZE):
369
+ X[i].r *= normf[i]
370
+ X[i].i *= normf[i]
371
+
372
+
373
+ # ======================
374
+ # Main functions
375
+ # ======================
376
+
377
+ def preprocess(st, data):
378
+ X = [Complex() for _ in range(FREQ_SIZE)]
379
+ P = [Complex() for _ in range(WINDOW_SIZE)]
380
+ x = np.zeros(FRAME_SIZE)
381
+ Ex = np.zeros(NB_BANDS)
382
+ Ep = np.zeros(NB_BANDS)
383
+ Exp = np.zeros(NB_BANDS)
384
+ features = np.zeros(NB_FEATURES)
385
+
386
+ a_hp = (-1.99599, 0.99600)
387
+ b_hp = (-2., 1.)
388
+ biquad(x, st.mem_hp_x, data, b_hp, a_hp, FRAME_SIZE)
389
+ compute_frame_features(st, X, P, Ex, Ep, Exp, features, x)
390
+
391
+ return X, P, Ex, Ep, Exp, features
392
+
393
+
394
+ def postprocess(st, pp, gains, vad_prob):
395
+ outputs = []
396
+ for p, g, prob in zip(pp, gains, vad_prob):
397
+ X = p["X"]
398
+ P = p["P"]
399
+ Ex = p["Ex"]
400
+ Ep = p["Ep"]
401
+ Exp = p["Exp"]
402
+ gf = np.ones(FREQ_SIZE)
403
+ pitch_filter(X, P, Ex, Ep, Exp, g)
404
+ interp_band_gain(gf, g)
405
+
406
+ for i in range(FREQ_SIZE):
407
+ X[i].r *= gf[i]
408
+ X[i].i *= gf[i]
409
+
410
+ out = np.zeros(FRAME_SIZE)
411
+ frame_synthesis(st, out, X)
412
+
413
+ outputs.append(out)
414
+
415
+ return outputs
416
+
417
+
418
+ def rnnoise_process_frame(net, x):
419
+ x = np.array(x, dtype=np.float32)
420
+ if x.shape[0] < 100:
421
+ x = np.concatenate([
422
+ x,
423
+ np.zeros((100 - x.shape[0], NB_FEATURES), dtype=np.float32)
424
+ ])
425
+
426
+ x = np.expand_dims(x, axis=0)
427
+
428
+ # feedforward
429
+ if not args.onnx:
430
+ output = net.predict([x])
431
+ else:
432
+ output = net.run(None, {'main_input:0': x})
433
+ gains, vad_prob = output
434
+
435
+ return gains[0], vad_prob[0]
436
+
437
+
438
+ def recognize_from_audio(net):
439
+ wav_path = args.input[0]
440
+ logger.info(wav_path)
441
+
442
+ logger.info('Start inference...')
443
+ wf = wave.open(wav_path, "rb")
444
+
445
+ save_path = get_savepath(args.savepath, wav_path, ext='.wav')
446
+ wf_out = wave.open(save_path, "wb")
447
+ wf_out.setnchannels(1)
448
+ wf_out.setsampwidth(16 // 8)
449
+ wf_out.setframerate(48000)
450
+
451
+ pp = []
452
+ st = DenoiseState()
453
+ bar = tqdm(total=wf.getnframes())
454
+ while True:
455
+ buf = wf.readframes(FRAME_SIZE)
456
+ if not buf:
457
+ break
458
+ data = np.frombuffer(buf, dtype=np.int16)
459
+
460
+ X, P, Ex, Ep, Exp, feat = preprocess(st, data)
461
+ pp.append(dict(
462
+ X=X,
463
+ P=P,
464
+ Ex=Ex,
465
+ Ep=Ep,
466
+ Exp=Exp,
467
+ feat=feat
468
+ ))
469
+
470
+ if len(pp) == 100:
471
+ x = [p["feat"] for p in pp]
472
+ gains, vad_prob = rnnoise_process_frame(net, x)
473
+ outputs = postprocess(st, pp, gains, vad_prob)
474
+ pp.clear()
475
+
476
+ for out in outputs:
477
+ out = np.array(out, dtype=int)
478
+ out = np.clip(out, (-0x7fff - 1), 0x7fff)
479
+ out = struct.pack("h" * len(out), *out)
480
+ wf_out.writeframes(out)
481
+
482
+ bar.update(len(data))
483
+
484
+ if 0 < len(pp):
485
+ x = [p["feat"] for p in pp]
486
+ gains, vad_prob = rnnoise_process_frame(net, x)
487
+ outputs = postprocess(st, pp, gains, vad_prob)
488
+
489
+ for out in outputs:
490
+ out = np.array(out, dtype=int)
491
+ out = np.clip(out, (-0x7fff - 1), 0x7fff)
492
+ out = struct.pack("h" * len(out), *out)
493
+ wf_out.writeframes(out)
494
+
495
+ bar.close()
496
+ wf_out.close()
497
+ logger.info(f'saved at : {save_path}')
498
+
499
+ logger.info('Script finished successfully.')
500
+
501
+
502
+ def main():
503
+ # model files check and download
504
+ check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)
505
+
506
+ env_id = args.env_id
507
+
508
+ # initialize
509
+ if not args.onnx:
510
+ net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
511
+ else:
512
+ import onnxruntime
513
+ cuda = 0 < ailia.get_gpu_environment_id()
514
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
515
+ net = onnxruntime.InferenceSession(WEIGHT_PATH, providers=providers)
516
+
517
+ recognize_from_audio(net)
518
+
519
+
520
+ if __name__ == '__main__':
521
+ main()
models/ailia-models/rnn_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22123b85825b413e7c97de8334c6b609f0c6aa1cd5290ab672bb1b85bae20403
3
+ size 1020606
models/ailia-models/rnn_model.onnx.prototxt ADDED
The diff for this file is too large to render. See raw diff
 
models/ailia-models/source.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ https://github.com/axinc-ai/ailia-models/tree/master/audio_processing/rnnoise
2
+
3
+ https://storage.googleapis.com/ailia-models/rnnoise/rnn_model.onnx
4
+ https://storage.googleapis.com/ailia-models/rnnoise/rnn_model.onnx.prototxt