RNNoise (libs, models)
Browse files- .gitattributes +11 -0
- libs/rnnoise-bin/.github/workflows/build.yml +256 -0
- libs/rnnoise-bin/README.md +19 -0
- libs/rnnoise-bin/releases/7f449bf8/7f449bf8.zip +3 -0
- libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-aarch64.so +3 -0
- libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armel.so +3 -0
- libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armhf.so +3 -0
- libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86-64.so +3 -0
- libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86.so +3 -0
- libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-aarch64.dylib +3 -0
- libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-x86-64.dylib +3 -0
- libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86-64.dll +3 -0
- libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86.dll +3 -0
- libs/rnnoise-bin/releases/7f449bf8/rnnoise-bin-7f449bf8.zip +3 -0
- models/ailia-models/code/README.md +51 -0
- models/ailia-models/code/babble_15dB.wav +3 -0
- models/ailia-models/code/denoised.wav +3 -0
- models/ailia-models/code/kiss_fft.py +345 -0
- models/ailia-models/code/pitch.py +406 -0
- models/ailia-models/code/rnnoise.py +521 -0
- models/ailia-models/rnn_model.onnx +3 -0
- models/ailia-models/rnn_model.onnx.prototxt +0 -0
- models/ailia-models/source.txt +4 -0
.gitattributes
CHANGED
|
@@ -36,3 +36,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 36 |
A[[:space:]]Hybrid[[:space:]]DSP_Deep[[:space:]]Learning[[:space:]]Approach[[:space:]]to[[:space:]]Real-Time[[:space:]]Full-Band[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
RNNoise-Ex.[[:space:]]Hybrid[[:space:]]Speech[[:space:]]Enhancement[[:space:]]System[[:space:]]based[[:space:]]on[[:space:]]RNN[[:space:]]and[[:space:]]Spectral[[:space:]]Features.pdf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
RNNoise.[[:space:]]Learning[[:space:]]Noise[[:space:]]Suppression.pdf filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
A[[:space:]]Hybrid[[:space:]]DSP_Deep[[:space:]]Learning[[:space:]]Approach[[:space:]]to[[:space:]]Real-Time[[:space:]]Full-Band[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
RNNoise-Ex.[[:space:]]Hybrid[[:space:]]Speech[[:space:]]Enhancement[[:space:]]System[[:space:]]based[[:space:]]on[[:space:]]RNN[[:space:]]and[[:space:]]Spectral[[:space:]]Features.pdf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
RNNoise.[[:space:]]Learning[[:space:]]Noise[[:space:]]Suppression.pdf filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-aarch64.so filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armel.so filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armhf.so filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86-64.so filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86.so filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-aarch64.dylib filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-x86-64.dylib filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86-64.dll filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86.dll filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
models/ailia-models/code/babble_15dB.wav filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
models/ailia-models/code/denoised.wav filter=lfs diff=lfs merge=lfs -text
|
libs/rnnoise-bin/.github/workflows/build.yml
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: build
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
release:
|
| 5 |
+
types:
|
| 6 |
+
- created
|
| 7 |
+
workflow_dispatch:
|
| 8 |
+
|
| 9 |
+
env:
|
| 10 |
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
| 11 |
+
|
| 12 |
+
jobs:
|
| 13 |
+
build-linux-x86-64:
|
| 14 |
+
runs-on: ubuntu-20.04
|
| 15 |
+
steps:
|
| 16 |
+
- name: Clone RNNoise
|
| 17 |
+
uses: sudosubin/git-clone-action@v1.0.1
|
| 18 |
+
with:
|
| 19 |
+
repository: xiph/rnnoise
|
| 20 |
+
platform: gitlab.xiph.org
|
| 21 |
+
- name: autogen
|
| 22 |
+
run: ./autogen.sh
|
| 23 |
+
- name: configure
|
| 24 |
+
run: ./configure
|
| 25 |
+
- name: build
|
| 26 |
+
run: make
|
| 27 |
+
- name: Get release
|
| 28 |
+
id: get_release
|
| 29 |
+
uses: bruceadams/get-release@v1.2.2
|
| 30 |
+
- name: Upload
|
| 31 |
+
uses: actions/upload-release-asset@v1.0.2
|
| 32 |
+
with:
|
| 33 |
+
upload_url: ${{ steps.get_release.outputs.upload_url }}
|
| 34 |
+
asset_path: .libs/librnnoise.so
|
| 35 |
+
asset_name: librnnoise-linux-x86-64.so
|
| 36 |
+
asset_content_type: application/octet-stream
|
| 37 |
+
build-linux-x86:
|
| 38 |
+
runs-on: ubuntu-20.04
|
| 39 |
+
steps:
|
| 40 |
+
- name: Clone RNNoise
|
| 41 |
+
uses: sudosubin/git-clone-action@v1.0.1
|
| 42 |
+
with:
|
| 43 |
+
repository: xiph/rnnoise
|
| 44 |
+
platform: gitlab.xiph.org
|
| 45 |
+
- name: apt update
|
| 46 |
+
run: sudo apt-get update -y
|
| 47 |
+
- name: install gcc-i686-linux-gnu
|
| 48 |
+
run: sudo apt-get install gcc-i686-linux-gnu -y
|
| 49 |
+
- name: autogen
|
| 50 |
+
run: ./autogen.sh
|
| 51 |
+
- name: configure
|
| 52 |
+
run: ./configure --host=i686-linux-gnu "CFLAGS=-m32" "CXXFLAGS=-m32" "LDFLAGS=-m32"
|
| 53 |
+
- name: build
|
| 54 |
+
run: make
|
| 55 |
+
- name: Get release
|
| 56 |
+
id: get_release
|
| 57 |
+
uses: bruceadams/get-release@v1.2.2
|
| 58 |
+
- name: Upload
|
| 59 |
+
uses: actions/upload-release-asset@v1.0.2
|
| 60 |
+
with:
|
| 61 |
+
upload_url: ${{ steps.get_release.outputs.upload_url }}
|
| 62 |
+
asset_path: .libs/librnnoise.so
|
| 63 |
+
asset_name: librnnoise-linux-x86.so
|
| 64 |
+
asset_content_type: application/octet-stream
|
| 65 |
+
build-linux-aarch64:
|
| 66 |
+
runs-on: ubuntu-20.04
|
| 67 |
+
steps:
|
| 68 |
+
- name: Clone RNNoise
|
| 69 |
+
uses: sudosubin/git-clone-action@v1.0.1
|
| 70 |
+
with:
|
| 71 |
+
repository: xiph/rnnoise
|
| 72 |
+
platform: gitlab.xiph.org
|
| 73 |
+
- name: apt update
|
| 74 |
+
run: sudo apt-get update -y
|
| 75 |
+
- name: install gcc-aarch64-linux-gnu
|
| 76 |
+
run: sudo apt-get install gcc-aarch64-linux-gnu -y
|
| 77 |
+
- name: autogen
|
| 78 |
+
run: ./autogen.sh
|
| 79 |
+
- name: configure
|
| 80 |
+
run: ./configure --host=aarch64-linux-gnu
|
| 81 |
+
- name: build
|
| 82 |
+
run: make
|
| 83 |
+
- name: Get release
|
| 84 |
+
id: get_release
|
| 85 |
+
uses: bruceadams/get-release@v1.2.2
|
| 86 |
+
- name: Upload
|
| 87 |
+
uses: actions/upload-release-asset@v1.0.2
|
| 88 |
+
with:
|
| 89 |
+
upload_url: ${{ steps.get_release.outputs.upload_url }}
|
| 90 |
+
asset_path: .libs/librnnoise.so
|
| 91 |
+
asset_name: librnnoise-linux-aarch64.so
|
| 92 |
+
asset_content_type: application/octet-stream
|
| 93 |
+
build-linux-armel:
|
| 94 |
+
runs-on: ubuntu-20.04
|
| 95 |
+
steps:
|
| 96 |
+
- name: Clone RNNoise
|
| 97 |
+
uses: sudosubin/git-clone-action@v1.0.1
|
| 98 |
+
with:
|
| 99 |
+
repository: xiph/rnnoise
|
| 100 |
+
platform: gitlab.xiph.org
|
| 101 |
+
- name: apt update
|
| 102 |
+
run: sudo apt-get update -y
|
| 103 |
+
- name: install gcc-arm-linux-gnueabi
|
| 104 |
+
run: sudo apt-get install gcc-arm-linux-gnueabi -y
|
| 105 |
+
- name: autogen
|
| 106 |
+
run: ./autogen.sh
|
| 107 |
+
- name: configure
|
| 108 |
+
run: ./configure --host=arm-linux-gnueabi
|
| 109 |
+
- name: build
|
| 110 |
+
run: make
|
| 111 |
+
- name: Get release
|
| 112 |
+
id: get_release
|
| 113 |
+
uses: bruceadams/get-release@v1.2.2
|
| 114 |
+
- name: Upload
|
| 115 |
+
uses: actions/upload-release-asset@v1.0.2
|
| 116 |
+
with:
|
| 117 |
+
upload_url: ${{ steps.get_release.outputs.upload_url }}
|
| 118 |
+
asset_path: .libs/librnnoise.so
|
| 119 |
+
asset_name: librnnoise-linux-armel.so
|
| 120 |
+
asset_content_type: application/octet-stream
|
| 121 |
+
build-linux-armhf:
|
| 122 |
+
runs-on: ubuntu-20.04
|
| 123 |
+
steps:
|
| 124 |
+
- name: Clone RNNoise
|
| 125 |
+
uses: sudosubin/git-clone-action@v1.0.1
|
| 126 |
+
with:
|
| 127 |
+
repository: xiph/rnnoise
|
| 128 |
+
platform: gitlab.xiph.org
|
| 129 |
+
- name: apt update
|
| 130 |
+
run: sudo apt-get update -y
|
| 131 |
+
- name: install gcc-arm-linux-gnueabihf
|
| 132 |
+
run: sudo apt-get install gcc-arm-linux-gnueabihf -y
|
| 133 |
+
- name: autogen
|
| 134 |
+
run: ./autogen.sh
|
| 135 |
+
- name: configure
|
| 136 |
+
run: ./configure --host=arm-linux-gnueabihf
|
| 137 |
+
- name: build
|
| 138 |
+
run: make
|
| 139 |
+
- name: Get release
|
| 140 |
+
id: get_release
|
| 141 |
+
uses: bruceadams/get-release@v1.2.2
|
| 142 |
+
- name: Upload
|
| 143 |
+
uses: actions/upload-release-asset@v1.0.2
|
| 144 |
+
with:
|
| 145 |
+
upload_url: ${{ steps.get_release.outputs.upload_url }}
|
| 146 |
+
asset_path: .libs/librnnoise.so
|
| 147 |
+
asset_name: librnnoise-linux-armhf.so
|
| 148 |
+
asset_content_type: application/octet-stream
|
| 149 |
+
build-windows-x86-64:
|
| 150 |
+
runs-on: ubuntu-20.04
|
| 151 |
+
steps:
|
| 152 |
+
- name: Clone RNNoise
|
| 153 |
+
uses: sudosubin/git-clone-action@v1.0.1
|
| 154 |
+
with:
|
| 155 |
+
repository: xiph/rnnoise
|
| 156 |
+
platform: gitlab.xiph.org
|
| 157 |
+
- name: apt update
|
| 158 |
+
run: sudo apt-get update -y
|
| 159 |
+
- name: install mingw-w64
|
| 160 |
+
run: sudo apt-get install mingw-w64 -y
|
| 161 |
+
- name: autogen
|
| 162 |
+
run: ./autogen.sh
|
| 163 |
+
- name: configure
|
| 164 |
+
run: ./configure --host=x86_64-w64-mingw32
|
| 165 |
+
- name: build
|
| 166 |
+
run: make
|
| 167 |
+
- name: Get release
|
| 168 |
+
id: get_release
|
| 169 |
+
uses: bruceadams/get-release@v1.2.2
|
| 170 |
+
- name: Upload
|
| 171 |
+
uses: actions/upload-release-asset@v1.0.2
|
| 172 |
+
with:
|
| 173 |
+
upload_url: ${{ steps.get_release.outputs.upload_url }}
|
| 174 |
+
asset_path: .libs/librnnoise-0.dll
|
| 175 |
+
asset_name: librnnoise-windows-x86-64.dll
|
| 176 |
+
asset_content_type: application/octet-stream
|
| 177 |
+
build-windows-x86:
|
| 178 |
+
runs-on: ubuntu-20.04
|
| 179 |
+
steps:
|
| 180 |
+
- name: Clone RNNoise
|
| 181 |
+
uses: sudosubin/git-clone-action@v1.0.1
|
| 182 |
+
with:
|
| 183 |
+
repository: xiph/rnnoise
|
| 184 |
+
platform: gitlab.xiph.org
|
| 185 |
+
- name: apt update
|
| 186 |
+
run: sudo apt-get update -y
|
| 187 |
+
- name: install mingw-w64
|
| 188 |
+
run: sudo apt-get install mingw-w64 -y
|
| 189 |
+
- name: autogen
|
| 190 |
+
run: ./autogen.sh
|
| 191 |
+
- name: configure
|
| 192 |
+
run: ./configure --host=i686-w64-mingw32
|
| 193 |
+
- name: build
|
| 194 |
+
run: make
|
| 195 |
+
- name: Get release
|
| 196 |
+
id: get_release
|
| 197 |
+
uses: bruceadams/get-release@v1.2.2
|
| 198 |
+
- name: Upload
|
| 199 |
+
uses: actions/upload-release-asset@v1.0.2
|
| 200 |
+
with:
|
| 201 |
+
upload_url: ${{ steps.get_release.outputs.upload_url }}
|
| 202 |
+
asset_path: .libs/librnnoise-0.dll
|
| 203 |
+
asset_name: librnnoise-windows-x86.dll
|
| 204 |
+
asset_content_type: application/octet-stream
|
| 205 |
+
build-macos-x86-64:
|
| 206 |
+
runs-on: macos-11
|
| 207 |
+
steps:
|
| 208 |
+
- name: Clone RNNoise
|
| 209 |
+
uses: sudosubin/git-clone-action@v1.0.1
|
| 210 |
+
with:
|
| 211 |
+
repository: xiph/rnnoise
|
| 212 |
+
platform: gitlab.xiph.org
|
| 213 |
+
- name: install automake
|
| 214 |
+
run: brew install automake
|
| 215 |
+
- name: autogen
|
| 216 |
+
run: ./autogen.sh
|
| 217 |
+
- name: configure
|
| 218 |
+
run: ./configure
|
| 219 |
+
- name: build
|
| 220 |
+
run: make
|
| 221 |
+
- name: Get release
|
| 222 |
+
id: get_release
|
| 223 |
+
uses: bruceadams/get-release@v1.2.2
|
| 224 |
+
- name: Upload
|
| 225 |
+
uses: actions/upload-release-asset@v1.0.2
|
| 226 |
+
with:
|
| 227 |
+
upload_url: ${{ steps.get_release.outputs.upload_url }}
|
| 228 |
+
asset_path: .libs/librnnoise.dylib
|
| 229 |
+
asset_name: librnnoise-macos-x86-64.dylib
|
| 230 |
+
asset_content_type: application/octet-stream
|
| 231 |
+
build-macos-aarch64:
|
| 232 |
+
runs-on: macos-11
|
| 233 |
+
steps:
|
| 234 |
+
- name: Clone RNNoise
|
| 235 |
+
uses: sudosubin/git-clone-action@v1.0.1
|
| 236 |
+
with:
|
| 237 |
+
repository: xiph/rnnoise
|
| 238 |
+
platform: gitlab.xiph.org
|
| 239 |
+
- name: install automake
|
| 240 |
+
run: brew install automake
|
| 241 |
+
- name: autogen
|
| 242 |
+
run: ./autogen.sh
|
| 243 |
+
- name: configure
|
| 244 |
+
run: ./configure --host=aarch64-apple-darwin CFLAGS="-arch arm64"
|
| 245 |
+
- name: build
|
| 246 |
+
run: make
|
| 247 |
+
- name: Get release
|
| 248 |
+
id: get_release
|
| 249 |
+
uses: bruceadams/get-release@v1.2.2
|
| 250 |
+
- name: Upload
|
| 251 |
+
uses: actions/upload-release-asset@v1.0.2
|
| 252 |
+
with:
|
| 253 |
+
upload_url: ${{ steps.get_release.outputs.upload_url }}
|
| 254 |
+
asset_path: .libs/librnnoise.dylib
|
| 255 |
+
asset_name: librnnoise-macos-aarch64.dylib
|
| 256 |
+
asset_content_type: application/octet-stream
|
libs/rnnoise-bin/README.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# rnnoise-bin
|
| 2 |
+
|
| 3 |
+
this repository contains builds of RNNoise for the following platforms:
|
| 4 |
+
|
| 5 |
+
linux/x86-64
|
| 6 |
+
linux/x86
|
| 7 |
+
linux/aarch64
|
| 8 |
+
linux/armel
|
| 9 |
+
linux/armhf
|
| 10 |
+
windows/x86-64
|
| 11 |
+
windows/x86
|
| 12 |
+
macos/x86-64
|
| 13 |
+
macos/aarch64
|
| 14 |
+
|
| 15 |
+
the builds can be found in the [release section](https://github.com/mjwells2002/rnnoise-bin/releases)
|
| 16 |
+
|
| 17 |
+
each build will be tagged with the git commit it was built from in the [RNNoise repo](https://gitlab.xiph.org/xiph/rnnoise)
|
| 18 |
+
|
| 19 |
+
these builds are produced with github actions you can see the workflow file [here](https://github.com/mjwells2002/rnnoise-bin/blob/main/.github/workflows/build.yml)
|
libs/rnnoise-bin/releases/7f449bf8/7f449bf8.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa329b91913c0d3b2d2ebde40df022d618b65a251f7aa9554ac9cf6cbcbe4837
|
| 3 |
+
size 189611
|
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-aarch64.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05712a1801a3fd60af61abf03de08819955d88880fef6303d7f78a653f4230c0
|
| 3 |
+
size 242184
|
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armel.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:285c4f4bcbdb66f3e8b2031b666c7a2109284e16dde187c09239bc7e63d23ec5
|
| 3 |
+
size 235536
|
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armhf.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d69847b6f4ecb4b4a976dcec43138390c1ced37dd543a90341a4a7649195632
|
| 3 |
+
size 210068
|
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86-64.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53b823a1545ee9a9734ed3255ccfe5bed9069790cb95ee7fe7797a55eea7e3c0
|
| 3 |
+
size 253000
|
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61ee37a51356d8fcc15b12ffa1ff3e286f8b5055454f2ff8ef0657e8e9ea5990
|
| 3 |
+
size 141456
|
libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-aarch64.dylib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73c1d80f521daf7a6103aeffa6c7ed365c85276c75c72ea086c6c5a5f270b0f2
|
| 3 |
+
size 169167
|
libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-x86-64.dylib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b24d9cd6ebc840e3ef7edbd92f5ae046c5c59439252d5702202c3b3996d60f4b
|
| 3 |
+
size 171504
|
libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86-64.dll
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0be864c6f8f16e854a3d7a35e5b9d133aea50dac4ec80600df78dc834686570f
|
| 3 |
+
size 551036
|
libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86.dll
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c299f873df269f1798d66a6fb3797c696bffcd71e719f65e460b663685dcc94
|
| 3 |
+
size 471627
|
libs/rnnoise-bin/releases/7f449bf8/rnnoise-bin-7f449bf8.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4abc4f144495a6f509dfaff0120ddfae523c37bf1afe252597b63e9773761727
|
| 3 |
+
size 1959
|
models/ailia-models/code/README.md
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# rnnoise
|
| 2 |
+
|
| 3 |
+
## Input
|
| 4 |
+
|
| 5 |
+
Audio file
|
| 6 |
+
|
| 7 |
+
- Sample rate: 48 kHz
|
| 8 |
+
- Bit per sample: 16-bit
|
| 9 |
+
- Bit rate: 768 kbps
|
| 10 |
+
|
| 11 |
+
https://github.com/axinc-ai/ailia-models/assets/29946532/f1908958-d3be-44a7-9180-59c375bb488c
|
| 12 |
+
|
| 13 |
+
(Audio from https://jmvalin.ca/demo/rnnoise/)
|
| 14 |
+
|
| 15 |
+
## Output
|
| 16 |
+
|
| 17 |
+
Audio file
|
| 18 |
+
|
| 19 |
+
https://github.com/axinc-ai/ailia-models/assets/29946532/21eaf44d-bffd-428a-9637-f5d385364698
|
| 20 |
+
|
| 21 |
+
## Usage
|
| 22 |
+
Automatically downloads the onnx and prototxt files on the first run.
|
| 23 |
+
It is necessary to be connected to the Internet while downloading.
|
| 24 |
+
|
| 25 |
+
For the sample wav,
|
| 26 |
+
```bash
|
| 27 |
+
$ python3 rnnoise.py
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
If you want to specify the audio, put the file path after the `--input` option.
|
| 31 |
+
You can use `--savepath` option to change the name of the output file to save.
|
| 32 |
+
```bash
|
| 33 |
+
$ python3 rnnoise.py --input AUDIO_FILE --savepath SAVE_AUDIO_FILE
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
## Reference
|
| 37 |
+
|
| 38 |
+
- [rnnoise](https://github.com/xiph/rnnoise)
|
| 39 |
+
- [xiph.org / moz://a](https://jmvalin.ca/demo/rnnoise/)
|
| 40 |
+
|
| 41 |
+
## Framework
|
| 42 |
+
|
| 43 |
+
Keras
|
| 44 |
+
|
| 45 |
+
## Model Format
|
| 46 |
+
|
| 47 |
+
ONNX opset=14
|
| 48 |
+
|
| 49 |
+
## Netron
|
| 50 |
+
|
| 51 |
+
[rnn_model.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/rnnoise/rnn_model.onnx.prototxt)
|
models/ailia-models/code/babble_15dB.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80ea7f570a750027c97ef86a2f9931d25ecc1886973bade22b06d774d71d1565
|
| 3 |
+
size 259244
|
models/ailia-models/code/denoised.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d293e13ee78336fd497bba43eff580f48c1b30f95819caa8d2a249123c3ef84
|
| 3 |
+
size 259244
|
models/ailia-models/code/kiss_fft.py
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
MAXFACTORS = 8
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Complex:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.r = 0.0
|
| 11 |
+
self.i = 0.0
|
| 12 |
+
|
| 13 |
+
def __repr__(self):
|
| 14 |
+
return '{:.6f}{}{:.6f}j'.format(self.r, '-' if 0 > self.i else '+', abs(self.i))
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class FFTState:
|
| 18 |
+
nfft = 0
|
| 19 |
+
scale = 0
|
| 20 |
+
shift = 0
|
| 21 |
+
factors = np.zeros(2 * MAXFACTORS, dtype=int)
|
| 22 |
+
bitrev = None
|
| 23 |
+
twiddles = None
|
| 24 |
+
arch_fft = None
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def C_ADD(res, a, b):
|
| 28 |
+
res.r = a.r + b.r
|
| 29 |
+
res.i = a.i + b.i
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def C_SUB(res, a, b):
|
| 33 |
+
res.r = a.r - b.r
|
| 34 |
+
res.i = a.i - b.i
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def C_ADDTO(res, a):
|
| 38 |
+
res.r = res.r + a.r
|
| 39 |
+
res.i = res.i + a.i
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def C_MUL(m, a, b):
|
| 43 |
+
m.r = a.r * b.r - a.i * b.i
|
| 44 |
+
m.i = a.r * b.i + a.i * b.r
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def C_MULBYSCALAR(c, s):
|
| 48 |
+
c.r *= s
|
| 49 |
+
c.i *= s
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def kf_bfly2(Fout, m, N):
|
| 53 |
+
tw = 0.7071067812
|
| 54 |
+
|
| 55 |
+
for i in range(N):
|
| 56 |
+
Fout2 = Fout[4:]
|
| 57 |
+
t = Fout2[0]
|
| 58 |
+
|
| 59 |
+
C_SUB(Fout2[0], Fout[0], t)
|
| 60 |
+
C_ADDTO(Fout[0], t)
|
| 61 |
+
|
| 62 |
+
t.r = (Fout2[1].r + Fout2[1].i) * tw
|
| 63 |
+
t.i = (Fout2[1].i - Fout2[1].r) * tw
|
| 64 |
+
C_SUB(Fout2[1], Fout[1], t)
|
| 65 |
+
C_ADDTO(Fout[1], t)
|
| 66 |
+
|
| 67 |
+
t.r = Fout2[2].i
|
| 68 |
+
t.i = -Fout2[2].r
|
| 69 |
+
C_SUB(Fout2[2], Fout[2], t)
|
| 70 |
+
C_ADDTO(Fout[2], t)
|
| 71 |
+
|
| 72 |
+
t.r = (Fout2[3].i - Fout2[3].r) * tw
|
| 73 |
+
t.i = -(Fout2[3].i + Fout2[3].r) * tw
|
| 74 |
+
C_SUB(Fout2[3], Fout[3], t)
|
| 75 |
+
C_ADDTO(Fout[3], t)
|
| 76 |
+
|
| 77 |
+
Fout = Fout[8:]
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def kf_bfly4(Fout, fstride, st, m, N, mm):
|
| 81 |
+
if m == 1:
|
| 82 |
+
# Degenerate case where all the twiddles are 1.
|
| 83 |
+
for i in range(N):
|
| 84 |
+
scratch0 = Complex()
|
| 85 |
+
scratch1 = Complex()
|
| 86 |
+
|
| 87 |
+
C_SUB(scratch0, Fout[0], Fout[2])
|
| 88 |
+
C_ADDTO(Fout[0], Fout[2])
|
| 89 |
+
C_ADD(scratch1, Fout[1], Fout[3])
|
| 90 |
+
C_SUB(Fout[2], Fout[0], scratch1)
|
| 91 |
+
C_ADDTO(Fout[0], scratch1)
|
| 92 |
+
C_SUB(scratch1, Fout[1], Fout[3])
|
| 93 |
+
|
| 94 |
+
Fout[1].r = scratch0.r + scratch1.i
|
| 95 |
+
Fout[1].i = scratch0.i - scratch1.r
|
| 96 |
+
Fout[3].r = scratch0.r - scratch1.i
|
| 97 |
+
Fout[3].i = scratch0.i + scratch1.r
|
| 98 |
+
Fout = Fout[4:]
|
| 99 |
+
else:
|
| 100 |
+
scratch = [Complex() for _ in range(6)]
|
| 101 |
+
m2 = 2 * m
|
| 102 |
+
m3 = 3 * m
|
| 103 |
+
Fout_beg = Fout
|
| 104 |
+
for i in range(N):
|
| 105 |
+
Fout = Fout_beg[i * mm:]
|
| 106 |
+
tw3 = tw2 = tw1 = st.twiddles
|
| 107 |
+
# m is guaranteed to be a multiple of 4.
|
| 108 |
+
for j in range(m):
|
| 109 |
+
C_MUL(scratch[0], Fout[m], tw1[0])
|
| 110 |
+
C_MUL(scratch[1], Fout[m2], tw2[0])
|
| 111 |
+
C_MUL(scratch[2], Fout[m3], tw3[0])
|
| 112 |
+
|
| 113 |
+
C_SUB(scratch[5], Fout[0], scratch[1])
|
| 114 |
+
C_ADDTO(Fout[0], scratch[1])
|
| 115 |
+
C_ADD(scratch[3], scratch[0], scratch[2])
|
| 116 |
+
C_SUB(scratch[4], scratch[0], scratch[2])
|
| 117 |
+
C_SUB(Fout[m2], Fout[0], scratch[3])
|
| 118 |
+
tw1 = tw1[fstride:]
|
| 119 |
+
tw2 = tw2[fstride * 2:]
|
| 120 |
+
tw3 = tw3[fstride * 3:]
|
| 121 |
+
C_ADDTO(Fout[0], scratch[3])
|
| 122 |
+
|
| 123 |
+
Fout[m].r = scratch[5].r + scratch[4].i
|
| 124 |
+
Fout[m].i = scratch[5].i - scratch[4].r
|
| 125 |
+
Fout[m3].r = scratch[5].r - scratch[4].i
|
| 126 |
+
Fout[m3].i = scratch[5].i + scratch[4].r
|
| 127 |
+
|
| 128 |
+
Fout = Fout[1:]
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def kf_bfly3(Fout, fstride, st, m, N, mm):
|
| 132 |
+
m2 = 2 * m
|
| 133 |
+
scratch = [Complex() for _ in range(5)]
|
| 134 |
+
|
| 135 |
+
Fout_beg = Fout
|
| 136 |
+
epi3 = st.twiddles[fstride * m]
|
| 137 |
+
for i in range(N):
|
| 138 |
+
Fout = Fout_beg[i * mm:]
|
| 139 |
+
tw1 = tw2 = st.twiddles
|
| 140 |
+
# For non-custom modes, m is guaranteed to be a multiple of 4.
|
| 141 |
+
k = m
|
| 142 |
+
while 0 < k:
|
| 143 |
+
C_MUL(scratch[1], Fout[m], tw1[0])
|
| 144 |
+
C_MUL(scratch[2], Fout[m2], tw2[0])
|
| 145 |
+
|
| 146 |
+
C_ADD(scratch[3], scratch[1], scratch[2])
|
| 147 |
+
C_SUB(scratch[0], scratch[1], scratch[2])
|
| 148 |
+
tw1 = tw1[fstride:]
|
| 149 |
+
tw2 = tw2[fstride * 2:]
|
| 150 |
+
|
| 151 |
+
Fout[m].r = Fout[0].r - scratch[3].r / 2
|
| 152 |
+
Fout[m].i = Fout[0].i - scratch[3].i / 2
|
| 153 |
+
|
| 154 |
+
C_MULBYSCALAR(scratch[0], epi3.i)
|
| 155 |
+
|
| 156 |
+
C_ADDTO(Fout[0], scratch[3])
|
| 157 |
+
|
| 158 |
+
Fout[m2].r = Fout[m].r + scratch[0].i
|
| 159 |
+
Fout[m2].i = Fout[m].i - scratch[0].r
|
| 160 |
+
|
| 161 |
+
Fout[m].r = Fout[m].r - scratch[0].i
|
| 162 |
+
Fout[m].i = Fout[m].i + scratch[0].r
|
| 163 |
+
|
| 164 |
+
Fout = Fout[1:]
|
| 165 |
+
k = k - 1
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def kf_bfly5(Fout, fstride, st, m, N, mm):
|
| 169 |
+
scratch = [Complex() for _ in range(13)]
|
| 170 |
+
Fout_beg = Fout
|
| 171 |
+
|
| 172 |
+
ya = st.twiddles[fstride * m]
|
| 173 |
+
yb = st.twiddles[fstride * 2 * m]
|
| 174 |
+
tw = st.twiddles
|
| 175 |
+
for i in range(N):
|
| 176 |
+
Fout = Fout_beg[i * mm:]
|
| 177 |
+
Fout0 = Fout
|
| 178 |
+
Fout1 = Fout0[m:]
|
| 179 |
+
Fout2 = Fout0[2 * m:]
|
| 180 |
+
Fout3 = Fout0[3 * m:]
|
| 181 |
+
Fout4 = Fout0[4 * m:]
|
| 182 |
+
|
| 183 |
+
# For non-custom modes, m is guaranteed to be a multiple of 4.
|
| 184 |
+
for u in range(m):
|
| 185 |
+
scratch[0].r = Fout0[0].r
|
| 186 |
+
scratch[0].i = Fout0[0].i
|
| 187 |
+
|
| 188 |
+
C_MUL(scratch[1], Fout1[0], tw[u * fstride])
|
| 189 |
+
C_MUL(scratch[2], Fout2[0], tw[2 * u * fstride])
|
| 190 |
+
C_MUL(scratch[3], Fout3[0], tw[3 * u * fstride])
|
| 191 |
+
C_MUL(scratch[4], Fout4[0], tw[4 * u * fstride])
|
| 192 |
+
|
| 193 |
+
C_ADD(scratch[7], scratch[1], scratch[4])
|
| 194 |
+
C_SUB(scratch[10], scratch[1], scratch[4])
|
| 195 |
+
C_ADD(scratch[8], scratch[2], scratch[3])
|
| 196 |
+
C_SUB(scratch[9], scratch[2], scratch[3])
|
| 197 |
+
|
| 198 |
+
Fout0[0].r = Fout0[0].r + (scratch[7].r + scratch[8].r)
|
| 199 |
+
Fout0[0].i = Fout0[0].i + (scratch[7].i + scratch[8].i)
|
| 200 |
+
|
| 201 |
+
scratch[5].r = scratch[0].r + ((scratch[7].r * ya.r) + (scratch[8].r * yb.r))
|
| 202 |
+
scratch[5].i = scratch[0].i + ((scratch[7].i * ya.r) + (scratch[8].i * yb.r))
|
| 203 |
+
|
| 204 |
+
scratch[6].r = (scratch[10].i * ya.i) + (scratch[9].i * yb.i)
|
| 205 |
+
scratch[6].i = -((scratch[10].r * ya.i) + (scratch[9].r * yb.i))
|
| 206 |
+
|
| 207 |
+
C_SUB(Fout1[0], scratch[5], scratch[6])
|
| 208 |
+
C_ADD(Fout4[0], scratch[5], scratch[6])
|
| 209 |
+
|
| 210 |
+
scratch[11].r = scratch[0].r + ((scratch[7].r * yb.r) + (scratch[8].r * ya.r))
|
| 211 |
+
scratch[11].i = scratch[0].i + ((scratch[7].i * yb.r) + (scratch[8].i * ya.r))
|
| 212 |
+
scratch[12].r = (scratch[9].i * ya.i) - (scratch[10].i * yb.i)
|
| 213 |
+
scratch[12].i = (scratch[10].r * yb.i) - (scratch[9].r * ya.i)
|
| 214 |
+
|
| 215 |
+
C_ADD(Fout2[0], scratch[11], scratch[12])
|
| 216 |
+
C_SUB(Fout3[0], scratch[11], scratch[12])
|
| 217 |
+
|
| 218 |
+
Fout0 = Fout0[1:]
|
| 219 |
+
Fout1 = Fout1[1:]
|
| 220 |
+
Fout2 = Fout2[1:]
|
| 221 |
+
Fout3 = Fout3[1:]
|
| 222 |
+
Fout4 = Fout4[1:]
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def compute_bitrev_table(Fout, f, fstride, in_stride, factors, st):
|
| 226 |
+
p = int(factors[0]) # the radix
|
| 227 |
+
m = int(factors[1]) # stage's fft length/p
|
| 228 |
+
|
| 229 |
+
if m == 1:
|
| 230 |
+
for j in range(p):
|
| 231 |
+
f[0] = Fout + j
|
| 232 |
+
f = f[fstride * in_stride:]
|
| 233 |
+
else:
|
| 234 |
+
for j in range(p):
|
| 235 |
+
compute_bitrev_table(Fout, f, fstride * p, in_stride, factors[2:], st)
|
| 236 |
+
f = f[fstride * in_stride:]
|
| 237 |
+
Fout += m
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def kf_factor(n, facbuf):
|
| 241 |
+
p = 4
|
| 242 |
+
stages = 0
|
| 243 |
+
nbak = n
|
| 244 |
+
|
| 245 |
+
while n > 1:
|
| 246 |
+
while n % p:
|
| 247 |
+
p = 2 if p == 4 else 3 if p == 2 else p + 2
|
| 248 |
+
if p > 32000 or p * p > n:
|
| 249 |
+
p = n
|
| 250 |
+
n /= p
|
| 251 |
+
if p > 5:
|
| 252 |
+
return 0
|
| 253 |
+
|
| 254 |
+
facbuf[2 * stages] = p
|
| 255 |
+
if p == 2 and stages > 1:
|
| 256 |
+
facbuf[2 * stages] = 4
|
| 257 |
+
facbuf[2] = 2
|
| 258 |
+
stages = stages + 1
|
| 259 |
+
|
| 260 |
+
n = nbak
|
| 261 |
+
|
| 262 |
+
for i in range(stages // 2):
|
| 263 |
+
tmp = facbuf[2 * i]
|
| 264 |
+
facbuf[2 * i] = facbuf[2 * (stages - i - 1)]
|
| 265 |
+
facbuf[2 * (stages - i - 1)] = tmp
|
| 266 |
+
|
| 267 |
+
for i in range(stages):
|
| 268 |
+
n /= facbuf[2 * i]
|
| 269 |
+
facbuf[2 * i + 1] = n
|
| 270 |
+
|
| 271 |
+
return 1
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def compute_twiddles(twiddles, nfft):
|
| 275 |
+
for i in range(nfft):
|
| 276 |
+
phase = (-2 * math.pi / nfft) * i
|
| 277 |
+
twiddles[i].r = math.cos(phase)
|
| 278 |
+
twiddles[i].i = math.sin(phase)
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def opus_fft_alloc_twiddles(nfft):
|
| 282 |
+
st = FFTState()
|
| 283 |
+
|
| 284 |
+
st.nfft = nfft
|
| 285 |
+
st.scale = 1. / nfft
|
| 286 |
+
|
| 287 |
+
st.twiddles = twiddles = [Complex() for _ in range(nfft)]
|
| 288 |
+
compute_twiddles(twiddles, nfft)
|
| 289 |
+
st.shift = -1
|
| 290 |
+
kf_factor(nfft, st.factors)
|
| 291 |
+
|
| 292 |
+
# bitrev
|
| 293 |
+
st.bitrev = bitrev = np.zeros(nfft, dtype=int)
|
| 294 |
+
|
| 295 |
+
compute_bitrev_table(0, bitrev, 1, 1, st.factors, st)
|
| 296 |
+
|
| 297 |
+
return st
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
def opus_fft_impl(st, fout):
|
| 301 |
+
fstride = np.zeros(MAXFACTORS, dtype=int)
|
| 302 |
+
|
| 303 |
+
# shift can be -1
|
| 304 |
+
shift = st.shift if st.shift > 0 else 0
|
| 305 |
+
|
| 306 |
+
fstride[0] = 1
|
| 307 |
+
L = 0
|
| 308 |
+
while True:
|
| 309 |
+
p = st.factors[2 * L]
|
| 310 |
+
m = st.factors[2 * L + 1]
|
| 311 |
+
fstride[L + 1] = fstride[L] * p
|
| 312 |
+
L += 1
|
| 313 |
+
if m == 1:
|
| 314 |
+
break
|
| 315 |
+
|
| 316 |
+
m = st.factors[2 * L - 1]
|
| 317 |
+
for i in range(L - 1, -1, -1):
|
| 318 |
+
if i != 0:
|
| 319 |
+
m2 = st.factors[2 * i - 1]
|
| 320 |
+
else:
|
| 321 |
+
m2 = 1
|
| 322 |
+
|
| 323 |
+
x = st.factors[2 * i]
|
| 324 |
+
if x == 2:
|
| 325 |
+
kf_bfly2(fout, m, fstride[i])
|
| 326 |
+
elif x == 4:
|
| 327 |
+
kf_bfly4(fout, fstride[i] << shift, st, m, fstride[i], m2)
|
| 328 |
+
elif x == 3:
|
| 329 |
+
kf_bfly3(fout, fstride[i] << shift, st, m, fstride[i], m2)
|
| 330 |
+
elif x == 5:
|
| 331 |
+
kf_bfly5(fout, fstride[i] << shift, st, m, fstride[i], m2)
|
| 332 |
+
|
| 333 |
+
m = m2
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def opus_fft(st, fin, fout):
|
| 337 |
+
scale = st.scale
|
| 338 |
+
|
| 339 |
+
# Bit-reverse the input
|
| 340 |
+
for i in range(st.nfft):
|
| 341 |
+
x = fin[i]
|
| 342 |
+
fout[st.bitrev[i]].r = scale * x.r
|
| 343 |
+
fout[st.bitrev[i]].i = scale * x.i
|
| 344 |
+
|
| 345 |
+
opus_fft_impl(st, fout)
|
models/ailia-models/code/pitch.py
ADDED
|
@@ -0,0 +1,406 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def find_best_pitch(xcorr, y, _len, max_pitch, best_pitch):
|
| 5 |
+
Syy = 1
|
| 6 |
+
best_num = [-1, -1]
|
| 7 |
+
best_den = [0, 0]
|
| 8 |
+
|
| 9 |
+
best_pitch[0] = 0
|
| 10 |
+
best_pitch[1] = 1
|
| 11 |
+
for j in range(_len):
|
| 12 |
+
Syy = Syy + (y[j] * y[j])
|
| 13 |
+
for i in range(max_pitch):
|
| 14 |
+
if xcorr[i] > 0:
|
| 15 |
+
num = xcorr[i] * xcorr[i]
|
| 16 |
+
if num * best_den[1] > best_num[1] * Syy:
|
| 17 |
+
if num * best_den[0] > best_num[0] * Syy:
|
| 18 |
+
best_num[1] = best_num[0]
|
| 19 |
+
best_den[1] = best_den[0]
|
| 20 |
+
best_pitch[1] = best_pitch[0]
|
| 21 |
+
best_num[0] = num
|
| 22 |
+
best_den[0] = Syy
|
| 23 |
+
best_pitch[0] = i
|
| 24 |
+
else:
|
| 25 |
+
best_num[1] = num
|
| 26 |
+
best_den[1] = Syy
|
| 27 |
+
best_pitch[1] = i
|
| 28 |
+
|
| 29 |
+
Syy += (y[i + _len] * y[i + _len]) - (y[i] * y[i])
|
| 30 |
+
Syy = max(1, Syy)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _celt_lpc(lpc, ac, p):
|
| 34 |
+
"""
|
| 35 |
+
lpc (out): [0...p-1] LPC coefficients
|
| 36 |
+
ac (in): [0...p] autocorrelation values
|
| 37 |
+
"""
|
| 38 |
+
error = ac[0]
|
| 39 |
+
|
| 40 |
+
for i in range(p):
|
| 41 |
+
lpc[i] = 0
|
| 42 |
+
|
| 43 |
+
if ac[0] != 0:
|
| 44 |
+
for i in range(p):
|
| 45 |
+
# Sum up this iteration's reflection coefficient
|
| 46 |
+
rr = 0
|
| 47 |
+
for j in range(i):
|
| 48 |
+
rr += lpc[j] * ac[i - j]
|
| 49 |
+
rr += ac[i + 1]
|
| 50 |
+
r = -rr / error
|
| 51 |
+
# Update LPC coefficients and total error
|
| 52 |
+
lpc[i] = r
|
| 53 |
+
|
| 54 |
+
for j in range((i + 1) >> 1):
|
| 55 |
+
tmp1 = lpc[j]
|
| 56 |
+
tmp2 = lpc[i - 1 - j]
|
| 57 |
+
lpc[j] = tmp1 + (r * tmp2)
|
| 58 |
+
lpc[i - 1 - j] = tmp2 + (r * tmp1)
|
| 59 |
+
|
| 60 |
+
error = error - ((r * r) * error)
|
| 61 |
+
# Bail out once we get 30 dB gain
|
| 62 |
+
if error < .001 * ac[0]:
|
| 63 |
+
break
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _celt_autocorr(x, ac, window, overlap, lag, n):
|
| 67 |
+
"""
|
| 68 |
+
x: (in) [0...n-1] samples x
|
| 69 |
+
ac: (out) [0...lag-1] ac values
|
| 70 |
+
"""
|
| 71 |
+
|
| 72 |
+
fastN = n - lag
|
| 73 |
+
if overlap == 0:
|
| 74 |
+
xptr = x
|
| 75 |
+
else:
|
| 76 |
+
xx = [0] * n
|
| 77 |
+
for i in range(n):
|
| 78 |
+
xx[i] = x[i]
|
| 79 |
+
for i in range(overlap):
|
| 80 |
+
xx[i] = x[i] * window[i]
|
| 81 |
+
xx[n - i - 1] = x[n - i - 1] * window[i]
|
| 82 |
+
xptr = xx
|
| 83 |
+
|
| 84 |
+
shift = 0
|
| 85 |
+
celt_pitch_xcorr(xptr, xptr, ac, fastN, lag + 1)
|
| 86 |
+
|
| 87 |
+
for k in range(lag + 1):
|
| 88 |
+
d = 0
|
| 89 |
+
for i in range(k + fastN, n):
|
| 90 |
+
d = d + (xptr[i] * xptr[i - k])
|
| 91 |
+
ac[k] += d
|
| 92 |
+
|
| 93 |
+
return shift
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def celt_fir5(x, num, y, N, mem):
|
| 97 |
+
num0 = num[0]
|
| 98 |
+
num1 = num[1]
|
| 99 |
+
num2 = num[2]
|
| 100 |
+
num3 = num[3]
|
| 101 |
+
num4 = num[4]
|
| 102 |
+
mem0 = mem[0]
|
| 103 |
+
mem1 = mem[1]
|
| 104 |
+
mem2 = mem[2]
|
| 105 |
+
mem3 = mem[3]
|
| 106 |
+
mem4 = mem[4]
|
| 107 |
+
for i in range(N):
|
| 108 |
+
_sum = x[i]
|
| 109 |
+
_sum = _sum + num0 * mem0
|
| 110 |
+
_sum = _sum + num1 * mem1
|
| 111 |
+
_sum = _sum + num2 * mem2
|
| 112 |
+
_sum = _sum + num3 * mem3
|
| 113 |
+
_sum = _sum + num4 * mem4
|
| 114 |
+
mem4 = mem3
|
| 115 |
+
mem3 = mem2
|
| 116 |
+
mem2 = mem1
|
| 117 |
+
mem1 = mem0
|
| 118 |
+
mem0 = x[i]
|
| 119 |
+
y[i] = _sum
|
| 120 |
+
|
| 121 |
+
mem[0] = mem0
|
| 122 |
+
mem[1] = mem1
|
| 123 |
+
mem[2] = mem2
|
| 124 |
+
mem[3] = mem3
|
| 125 |
+
mem[4] = mem4
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def pitch_downsample(x, x_lp, _len, C):
|
| 129 |
+
ac = [0] * 5
|
| 130 |
+
tmp = 1.
|
| 131 |
+
lpc = [0] * 4
|
| 132 |
+
mem = [0] * 5
|
| 133 |
+
lpc2 = [0] * 5
|
| 134 |
+
c1 = .8
|
| 135 |
+
|
| 136 |
+
for i in range(1, _len >> 1):
|
| 137 |
+
x_lp[i] = .5 * (.5 * (x[0][(2 * i - 1)] + x[0][(2 * i + 1)]) + x[0][2 * i])
|
| 138 |
+
x_lp[0] = .5 * (.5 * (x[0][1]) + x[0][0])
|
| 139 |
+
if C == 2:
|
| 140 |
+
for i in range(1, _len >> 2):
|
| 141 |
+
x_lp[i] += .5 * (.5 * (x[1][(2 * i - 1)] + x[1][(2 * i + 1)]) + x[1][2 * i])
|
| 142 |
+
x_lp[0] += .5 * (.5 * (x[1][1]) + x[1][0])
|
| 143 |
+
|
| 144 |
+
_celt_autocorr(x_lp, ac, None, 0, 4, _len >> 1)
|
| 145 |
+
|
| 146 |
+
# Noise floor -40 dB
|
| 147 |
+
ac[0] *= 1.0001
|
| 148 |
+
|
| 149 |
+
# Lag windowing
|
| 150 |
+
for i in range(1, 4 + 1):
|
| 151 |
+
ac[i] -= ac[i] * (.008 * i) * (.008 * i)
|
| 152 |
+
|
| 153 |
+
_celt_lpc(lpc, ac, 4)
|
| 154 |
+
for i in range(4):
|
| 155 |
+
tmp = .9 * tmp
|
| 156 |
+
lpc[i] = lpc[i] * tmp
|
| 157 |
+
|
| 158 |
+
# Add a zero
|
| 159 |
+
lpc2[0] = lpc[0] + .8
|
| 160 |
+
lpc2[1] = lpc[1] + c1 * lpc[0]
|
| 161 |
+
lpc2[2] = lpc[2] + c1 * lpc[1]
|
| 162 |
+
lpc2[3] = lpc[3] + c1 * lpc[2]
|
| 163 |
+
lpc2[4] = c1 * lpc[3]
|
| 164 |
+
celt_fir5(x_lp, lpc2, x_lp, _len >> 1, mem)
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def xcorr_kernel(x, y, _sum, _len):
|
| 168 |
+
y_0 = y[0]
|
| 169 |
+
y_1 = y[1]
|
| 170 |
+
y_2 = y[2]
|
| 171 |
+
y = y[3:]
|
| 172 |
+
for j in range(0, _len - 3, 4):
|
| 173 |
+
tmp = x[0]
|
| 174 |
+
y_3 = y[0]
|
| 175 |
+
x = x[1:]
|
| 176 |
+
y = y[1:]
|
| 177 |
+
_sum[0] = _sum[0] + tmp * y_0
|
| 178 |
+
_sum[1] = _sum[1] + tmp * y_1
|
| 179 |
+
_sum[2] = _sum[2] + tmp * y_2
|
| 180 |
+
_sum[3] = _sum[3] + tmp * y_3
|
| 181 |
+
tmp = x[0]
|
| 182 |
+
y_0 = y[0]
|
| 183 |
+
x = x[1:]
|
| 184 |
+
y = y[1:]
|
| 185 |
+
_sum[0] = _sum[0] + tmp * y_1
|
| 186 |
+
_sum[1] = _sum[1] + tmp * y_2
|
| 187 |
+
_sum[2] = _sum[2] + tmp * y_3
|
| 188 |
+
_sum[3] = _sum[3] + tmp * y_0
|
| 189 |
+
tmp = x[0]
|
| 190 |
+
y_1 = y[0]
|
| 191 |
+
x = x[1:]
|
| 192 |
+
y = y[1:]
|
| 193 |
+
_sum[0] = _sum[0] + tmp * y_2
|
| 194 |
+
_sum[1] = _sum[1] + tmp * y_3
|
| 195 |
+
_sum[2] = _sum[2] + tmp * y_0
|
| 196 |
+
_sum[3] = _sum[3] + tmp * y_1
|
| 197 |
+
tmp = x[0]
|
| 198 |
+
y_2 = y[0]
|
| 199 |
+
x = x[1:]
|
| 200 |
+
y = y[1:]
|
| 201 |
+
_sum[0] = _sum[0] + tmp * y_3
|
| 202 |
+
_sum[1] = _sum[1] + tmp * y_0
|
| 203 |
+
_sum[2] = _sum[2] + tmp * y_1
|
| 204 |
+
_sum[3] = _sum[3] + tmp * y_2
|
| 205 |
+
j += 4
|
| 206 |
+
if j < _len:
|
| 207 |
+
tmp = x[0]
|
| 208 |
+
y_3 = y[0]
|
| 209 |
+
x = x[1:]
|
| 210 |
+
y = y[1:]
|
| 211 |
+
_sum[0] = _sum[0] + tmp * y_0
|
| 212 |
+
_sum[1] = _sum[1] + tmp * y_1
|
| 213 |
+
_sum[2] = _sum[2] + tmp * y_2
|
| 214 |
+
_sum[3] = _sum[3] + tmp * y_3
|
| 215 |
+
j += 1
|
| 216 |
+
if j < _len:
|
| 217 |
+
tmp = x[0]
|
| 218 |
+
y_0 = y[0]
|
| 219 |
+
x = x[1:]
|
| 220 |
+
y = y[1:]
|
| 221 |
+
_sum[0] = _sum[0] + tmp * y_1
|
| 222 |
+
_sum[1] = _sum[1] + tmp * y_2
|
| 223 |
+
_sum[2] = _sum[2] + tmp * y_3
|
| 224 |
+
_sum[3] = _sum[3] + tmp * y_0
|
| 225 |
+
j += 1
|
| 226 |
+
if j < _len:
|
| 227 |
+
tmp = x[0]
|
| 228 |
+
y_1 = y[0]
|
| 229 |
+
_sum[0] = _sum[0] + tmp * y_2
|
| 230 |
+
_sum[1] = _sum[1] + tmp * y_3
|
| 231 |
+
_sum[2] = _sum[2] + tmp * y_0
|
| 232 |
+
_sum[3] = _sum[3] + tmp * y_1
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def dual_inner_prod(x, y01, y02, N):
|
| 236 |
+
xy01 = xy02 = 0
|
| 237 |
+
for i in range(N):
|
| 238 |
+
xy01 = xy01 + x[i] * y01[i]
|
| 239 |
+
xy02 = xy02 + x[i] * y02[i]
|
| 240 |
+
return xy01, xy02
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def celt_inner_prod(x, y, N):
|
| 244 |
+
xy = 0
|
| 245 |
+
for i in range(N):
|
| 246 |
+
xy = xy + x[i] * y[i]
|
| 247 |
+
return xy
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def celt_pitch_xcorr(_x, _y, xcorr, _len, max_pitch):
|
| 251 |
+
# The EDSP version requires that max_pitch is at least 1, and that _x is 32-bit aligned.
|
| 252 |
+
# Since it's hard to put asserts in assembly, put them here.
|
| 253 |
+
for i in range(0, max_pitch - 3, 4):
|
| 254 |
+
_sum = [0, 0, 0, 0]
|
| 255 |
+
xcorr_kernel(_x, _y[i:], _sum, _len)
|
| 256 |
+
xcorr[i] = _sum[0]
|
| 257 |
+
xcorr[i + 1] = _sum[1]
|
| 258 |
+
xcorr[i + 2] = _sum[2]
|
| 259 |
+
xcorr[i + 3] = _sum[3]
|
| 260 |
+
i += 4
|
| 261 |
+
|
| 262 |
+
# In case max_pitch isn't a multiple of 4, do non-unrolled version.
|
| 263 |
+
for i in range(i, max_pitch):
|
| 264 |
+
_sum = celt_inner_prod(_x, _y[i:], _len)
|
| 265 |
+
xcorr[i] = _sum
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def pitch_search(x_lp, y, _len, max_pitch):
|
| 269 |
+
best_pitch = [0, 0]
|
| 270 |
+
lag = _len + max_pitch
|
| 271 |
+
|
| 272 |
+
x_lp4 = [0] * (_len >> 2)
|
| 273 |
+
y_lp4 = [0] * (lag >> 2)
|
| 274 |
+
xcorr = [0] * (max_pitch >> 1)
|
| 275 |
+
|
| 276 |
+
# Downsample by 2 again
|
| 277 |
+
for j in range(_len >> 2):
|
| 278 |
+
x_lp4[j] = x_lp[2 * j]
|
| 279 |
+
for j in range(lag >> 2):
|
| 280 |
+
y_lp4[j] = y[2 * j]
|
| 281 |
+
|
| 282 |
+
# Coarse search with 4x decimation
|
| 283 |
+
|
| 284 |
+
celt_pitch_xcorr(x_lp4, y_lp4, xcorr, _len >> 2, max_pitch >> 2)
|
| 285 |
+
|
| 286 |
+
find_best_pitch(xcorr, y_lp4, _len >> 2, max_pitch >> 2, best_pitch)
|
| 287 |
+
|
| 288 |
+
# Finer search with 2x decimation
|
| 289 |
+
for i in range(max_pitch >> 1):
|
| 290 |
+
xcorr[i] = 0
|
| 291 |
+
if abs(i - 2 * best_pitch[0]) > 2 and abs(i - 2 * best_pitch[1]) > 2:
|
| 292 |
+
continue
|
| 293 |
+
_sum = celt_inner_prod(x_lp, y[i:], _len >> 1)
|
| 294 |
+
xcorr[i] = max(-1, _sum)
|
| 295 |
+
find_best_pitch(xcorr, y, _len >> 1, max_pitch >> 1, best_pitch)
|
| 296 |
+
|
| 297 |
+
# Refine by pseudo-interpolation
|
| 298 |
+
offset = 0
|
| 299 |
+
if 0 < best_pitch[0] < (max_pitch >> 1) - 1:
|
| 300 |
+
a = xcorr[best_pitch[0] - 1]
|
| 301 |
+
b = xcorr[best_pitch[0]]
|
| 302 |
+
c = xcorr[best_pitch[0] + 1]
|
| 303 |
+
if (c - a) > .7 * (b - a):
|
| 304 |
+
offset = 1
|
| 305 |
+
elif (a - c) > .7 * (b - c):
|
| 306 |
+
offset = -1
|
| 307 |
+
|
| 308 |
+
pitch = 2 * best_pitch[0] - offset
|
| 309 |
+
return pitch
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def compute_pitch_gain(xy, xx, yy):
|
| 313 |
+
return xy / math.sqrt(1 + xx * yy)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
second_check = [0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2]
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
def remove_doubling(x, maxperiod, minperiod, N, T0_, prev_period, prev_gain):
|
| 320 |
+
xcorr = [0] * 3
|
| 321 |
+
|
| 322 |
+
minperiod0 = minperiod
|
| 323 |
+
maxperiod //= 2
|
| 324 |
+
minperiod //= 2
|
| 325 |
+
T0_[0] //= 2
|
| 326 |
+
prev_period //= 2
|
| 327 |
+
N //= 2
|
| 328 |
+
x0 = x
|
| 329 |
+
x = x0[maxperiod:]
|
| 330 |
+
if T0_[0] >= maxperiod:
|
| 331 |
+
T0_[0] = maxperiod - 1
|
| 332 |
+
|
| 333 |
+
T = T0 = T0_[0]
|
| 334 |
+
yy_lookup = [0] * (maxperiod + 1)
|
| 335 |
+
xx, xy = dual_inner_prod(x, x, x0[maxperiod - T0:], N)
|
| 336 |
+
yy_lookup[0] = xx
|
| 337 |
+
yy = xx
|
| 338 |
+
for i in range(1, maxperiod + 1):
|
| 339 |
+
yy = yy + (x0[maxperiod - i] * x0[maxperiod - i]) - (x[N - i] * x[N - i])
|
| 340 |
+
yy_lookup[i] = max(0, yy)
|
| 341 |
+
yy = yy_lookup[T0]
|
| 342 |
+
best_xy = xy
|
| 343 |
+
best_yy = yy
|
| 344 |
+
g = g0 = compute_pitch_gain(xy, xx, yy)
|
| 345 |
+
# Look for any pitch at T/k
|
| 346 |
+
for k in range(2, 15 + 1):
|
| 347 |
+
T1 = (2 * T0 + k) // (2 * k)
|
| 348 |
+
if T1 < minperiod:
|
| 349 |
+
break
|
| 350 |
+
# Look for another strong correlation at T1b
|
| 351 |
+
if k == 2:
|
| 352 |
+
if T1 + T0 > maxperiod:
|
| 353 |
+
T1b = T0
|
| 354 |
+
else:
|
| 355 |
+
T1b = T0 + T1
|
| 356 |
+
else:
|
| 357 |
+
T1b = (2 * second_check[k] * T0 + k) // (2 * k);
|
| 358 |
+
|
| 359 |
+
xy, xy2 = dual_inner_prod(x, x0[maxperiod - T1:], x0[maxperiod - T1b:], N)
|
| 360 |
+
xy = .5 * (xy + xy2)
|
| 361 |
+
yy = .5 * (yy_lookup[T1] + yy_lookup[T1b])
|
| 362 |
+
g1 = compute_pitch_gain(xy, xx, yy)
|
| 363 |
+
if abs(T1 - prev_period) <= 1:
|
| 364 |
+
cont = prev_gain
|
| 365 |
+
elif abs(T1 - prev_period) <= 2 and 5 * k * k < T0:
|
| 366 |
+
cont = .5 * prev_gain
|
| 367 |
+
else:
|
| 368 |
+
cont = 0
|
| 369 |
+
thresh = max(.3, (.7 * g0) - cont)
|
| 370 |
+
|
| 371 |
+
# Bias against very high pitch (very short period) to avoid false-positives
|
| 372 |
+
# due to short-term correlation
|
| 373 |
+
|
| 374 |
+
if T1 < 3 * minperiod:
|
| 375 |
+
thresh = max(.4, (.85 * g0) - cont)
|
| 376 |
+
elif T1 < 2 * minperiod:
|
| 377 |
+
thresh = max(.5, (.9 * g0) - cont)
|
| 378 |
+
if g1 > thresh:
|
| 379 |
+
best_xy = xy
|
| 380 |
+
best_yy = yy
|
| 381 |
+
T = T1
|
| 382 |
+
g = g1
|
| 383 |
+
|
| 384 |
+
best_xy = max(0, best_xy)
|
| 385 |
+
if best_yy <= best_xy:
|
| 386 |
+
pg = 1.
|
| 387 |
+
else:
|
| 388 |
+
pg = best_xy / (best_yy + 1)
|
| 389 |
+
|
| 390 |
+
for k in range(3):
|
| 391 |
+
xcorr[k] = celt_inner_prod(x, x0[maxperiod - (T + k - 1):], N)
|
| 392 |
+
if xcorr[2] - xcorr[0] > .7 * (xcorr[1] - xcorr[0]):
|
| 393 |
+
offset = 1
|
| 394 |
+
elif xcorr[0] - xcorr[2] > .7 * (xcorr[1] - xcorr[2]):
|
| 395 |
+
offset = -1
|
| 396 |
+
else:
|
| 397 |
+
offset = 0
|
| 398 |
+
|
| 399 |
+
if pg > g:
|
| 400 |
+
pg = g
|
| 401 |
+
T0_[0] = 2 * T + offset
|
| 402 |
+
|
| 403 |
+
if T0_[0] < minperiod0:
|
| 404 |
+
T0_[0] = minperiod0
|
| 405 |
+
|
| 406 |
+
return pg
|
models/ailia-models/code/rnnoise.py
ADDED
|
@@ -0,0 +1,521 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import math
|
| 3 |
+
import wave
|
| 4 |
+
import struct
|
| 5 |
+
from logging import getLogger
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
+
|
| 10 |
+
import ailia
|
| 11 |
+
|
| 12 |
+
# import original modules
|
| 13 |
+
sys.path.append('../../util')
|
| 14 |
+
from arg_utils import get_base_parser, update_parser, get_savepath # noqa
|
| 15 |
+
from model_utils import check_and_download_models # noqa
|
| 16 |
+
|
| 17 |
+
from kiss_fft import Complex, opus_fft_alloc_twiddles, opus_fft
|
| 18 |
+
from pitch import pitch_downsample, pitch_search, remove_doubling
|
| 19 |
+
|
| 20 |
+
logger = getLogger(__name__)
|
| 21 |
+
|
| 22 |
+
# ======================
|
| 23 |
+
# Parameters
|
| 24 |
+
# ======================
|
| 25 |
+
|
| 26 |
+
WEIGHT_PATH = 'rnn_model.onnx'
|
| 27 |
+
MODEL_PATH = 'rnn_model.onnx.prototxt'
|
| 28 |
+
REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/rnnoise/'
|
| 29 |
+
|
| 30 |
+
AUDIO_PATH = 'babble_15dB.wav'
|
| 31 |
+
OUTPUT_PATH = 'denoised.wav'
|
| 32 |
+
|
| 33 |
+
PITCH_MIN_PERIOD = 60
|
| 34 |
+
PITCH_MAX_PERIOD = 768
|
| 35 |
+
PITCH_FRAME_SIZE = 960
|
| 36 |
+
PITCH_BUF_SIZE = PITCH_MAX_PERIOD + PITCH_FRAME_SIZE
|
| 37 |
+
|
| 38 |
+
NB_BANDS = 22
|
| 39 |
+
CEPS_MEM = 8
|
| 40 |
+
NB_DELTA_CEPS = 6
|
| 41 |
+
NB_FEATURES = NB_BANDS + 3 * NB_DELTA_CEPS + 2
|
| 42 |
+
|
| 43 |
+
FRAME_SIZE_SHIFT = 2
|
| 44 |
+
FRAME_SIZE = 120 << FRAME_SIZE_SHIFT
|
| 45 |
+
WINDOW_SIZE = 2 * FRAME_SIZE
|
| 46 |
+
FREQ_SIZE = FRAME_SIZE + 1
|
| 47 |
+
|
| 48 |
+
# ======================
|
| 49 |
+
# Arguemnt Parser Config
|
| 50 |
+
# ======================
|
| 51 |
+
|
| 52 |
+
parser = get_base_parser(
|
| 53 |
+
'rnnoise', AUDIO_PATH, OUTPUT_PATH
|
| 54 |
+
)
|
| 55 |
+
parser.add_argument(
|
| 56 |
+
'--onnx',
|
| 57 |
+
action='store_true',
|
| 58 |
+
help='execute onnxruntime version.'
|
| 59 |
+
)
|
| 60 |
+
args = update_parser(parser)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# ======================
|
| 64 |
+
# Secondaty Functions
|
| 65 |
+
# ======================
|
| 66 |
+
|
| 67 |
+
class CommonState:
|
| 68 |
+
init = False
|
| 69 |
+
kfft = None
|
| 70 |
+
half_window = np.zeros(FRAME_SIZE)
|
| 71 |
+
dct_table = np.zeros(NB_BANDS * NB_BANDS)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
common = CommonState()
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class DenoiseState:
|
| 78 |
+
analysis_mem = np.zeros(FRAME_SIZE)
|
| 79 |
+
cepstral_mem = np.zeros((CEPS_MEM, NB_BANDS))
|
| 80 |
+
memid = 0
|
| 81 |
+
synthesis_mem = np.zeros(FRAME_SIZE)
|
| 82 |
+
pitch_buf = np.zeros(PITCH_BUF_SIZE)
|
| 83 |
+
pitch_enh_buf = np.zeros(PITCH_BUF_SIZE)
|
| 84 |
+
last_gain = 0.0
|
| 85 |
+
last_period = 0
|
| 86 |
+
mem_hp_x = np.zeros(2)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def compute_band_energy(bandE, X):
|
| 90 |
+
eband5ms = [
|
| 91 |
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
|
| 92 |
+
]
|
| 93 |
+
|
| 94 |
+
_sum = [0] * NB_BANDS
|
| 95 |
+
for i in range(NB_BANDS - 1):
|
| 96 |
+
band_size = (eband5ms[i + 1] - eband5ms[i]) << FRAME_SIZE_SHIFT
|
| 97 |
+
for j in range(band_size):
|
| 98 |
+
frac = j / band_size
|
| 99 |
+
tmp = X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].r ** 2
|
| 100 |
+
tmp += X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].i ** 2
|
| 101 |
+
_sum[i] += (1 - frac) * tmp
|
| 102 |
+
_sum[i + 1] += frac * tmp
|
| 103 |
+
|
| 104 |
+
_sum[0] *= 2
|
| 105 |
+
_sum[NB_BANDS - 1] *= 2
|
| 106 |
+
for i in range(NB_BANDS):
|
| 107 |
+
bandE[i] = _sum[i]
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
eband5ms = [
|
| 111 |
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
|
| 112 |
+
]
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def compute_band_corr(bandE, X, P):
|
| 116 |
+
_sum = [0] * NB_BANDS
|
| 117 |
+
|
| 118 |
+
for i in range(NB_BANDS - 1):
|
| 119 |
+
band_size = (eband5ms[i + 1] - eband5ms[i]) << FRAME_SIZE_SHIFT
|
| 120 |
+
for j in range(band_size):
|
| 121 |
+
frac = j / band_size
|
| 122 |
+
tmp = X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].r
|
| 123 |
+
tmp += X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].i
|
| 124 |
+
_sum[i] += (1 - frac) * tmp
|
| 125 |
+
_sum[i + 1] += frac * tmp
|
| 126 |
+
|
| 127 |
+
_sum[0] *= 2
|
| 128 |
+
_sum[NB_BANDS - 1] *= 2
|
| 129 |
+
for i in range(NB_BANDS):
|
| 130 |
+
bandE[i] = _sum[i]
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def interp_band_gain(g, bandE):
|
| 134 |
+
g[...] = 0
|
| 135 |
+
for i in range(NB_BANDS - 1):
|
| 136 |
+
band_size = (eband5ms[i + 1] - eband5ms[i]) << FRAME_SIZE_SHIFT
|
| 137 |
+
for j in range(band_size):
|
| 138 |
+
frac = j / band_size
|
| 139 |
+
g[(eband5ms[i] << FRAME_SIZE_SHIFT) + j] = (1 - frac) * bandE[i] + frac * bandE[i + 1]
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def check_init():
|
| 143 |
+
if common.init:
|
| 144 |
+
return
|
| 145 |
+
|
| 146 |
+
common.kfft = opus_fft_alloc_twiddles(2 * FRAME_SIZE)
|
| 147 |
+
|
| 148 |
+
for i in range(FRAME_SIZE):
|
| 149 |
+
common.half_window[i] = math.sin(
|
| 150 |
+
.5 * math.pi * math.sin(.5 * math.pi * (i + .5) / FRAME_SIZE)
|
| 151 |
+
* math.sin(.5 * math.pi * (i + .5) / FRAME_SIZE)
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
for i in range(NB_BANDS):
|
| 155 |
+
for j in range(NB_BANDS):
|
| 156 |
+
common.dct_table[i * NB_BANDS + j] = math.cos((i + .5) * j * math.pi / NB_BANDS)
|
| 157 |
+
if j == 0:
|
| 158 |
+
common.dct_table[i * NB_BANDS + j] *= math.sqrt(.5)
|
| 159 |
+
|
| 160 |
+
common.init = True
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def dct(out, in_data):
|
| 164 |
+
check_init()
|
| 165 |
+
|
| 166 |
+
for i in range(NB_BANDS):
|
| 167 |
+
_sum = 0
|
| 168 |
+
for j in range(NB_BANDS):
|
| 169 |
+
_sum += in_data[j] * common.dct_table[j * NB_BANDS + i]
|
| 170 |
+
out[i] = _sum * math.sqrt(2. / 22)
|
| 171 |
+
|
| 172 |
+
return out
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def forward_transform(out, in_data):
|
| 176 |
+
check_init()
|
| 177 |
+
|
| 178 |
+
x = [Complex() for _ in range(WINDOW_SIZE)]
|
| 179 |
+
y = [Complex() for _ in range(WINDOW_SIZE)]
|
| 180 |
+
|
| 181 |
+
for i in range(WINDOW_SIZE):
|
| 182 |
+
x[i].r = in_data[i]
|
| 183 |
+
x[i].i = 0
|
| 184 |
+
|
| 185 |
+
opus_fft(common.kfft, x, y)
|
| 186 |
+
for i in range(FREQ_SIZE):
|
| 187 |
+
out[i] = y[i]
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def inverse_transform(out, in_data):
|
| 191 |
+
check_init()
|
| 192 |
+
|
| 193 |
+
x = [Complex() for _ in range(WINDOW_SIZE)]
|
| 194 |
+
y = [Complex() for _ in range(WINDOW_SIZE)]
|
| 195 |
+
|
| 196 |
+
for i in range(FREQ_SIZE):
|
| 197 |
+
x[i] = in_data[i]
|
| 198 |
+
for i in range(i + 1, WINDOW_SIZE):
|
| 199 |
+
x[i].r = x[WINDOW_SIZE - i].r
|
| 200 |
+
x[i].i = -x[WINDOW_SIZE - i].i
|
| 201 |
+
|
| 202 |
+
opus_fft(common.kfft, x, y)
|
| 203 |
+
|
| 204 |
+
# output in reverse order for IFFT.
|
| 205 |
+
out[0] = WINDOW_SIZE * y[0].r
|
| 206 |
+
for i in range(1, WINDOW_SIZE):
|
| 207 |
+
out[i] = WINDOW_SIZE * y[WINDOW_SIZE - i].r
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def apply_window(x):
|
| 211 |
+
check_init()
|
| 212 |
+
|
| 213 |
+
for i in range(FRAME_SIZE):
|
| 214 |
+
x[i] *= common.half_window[i]
|
| 215 |
+
x[WINDOW_SIZE - 1 - i] *= common.half_window[i]
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def frame_analysis(st, X, Ex, in_data):
|
| 219 |
+
x = np.zeros(WINDOW_SIZE)
|
| 220 |
+
x[:FRAME_SIZE] = st.analysis_mem
|
| 221 |
+
x[FRAME_SIZE:] = in_data
|
| 222 |
+
st.analysis_mem[...] = in_data
|
| 223 |
+
|
| 224 |
+
apply_window(x)
|
| 225 |
+
forward_transform(X, x)
|
| 226 |
+
|
| 227 |
+
compute_band_energy(Ex, X)
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def compute_frame_features(st, X, P, Ex, Ep, Exp, features, x):
|
| 231 |
+
E = 0
|
| 232 |
+
spec_variability = 0
|
| 233 |
+
Ly = np.zeros(NB_BANDS)
|
| 234 |
+
p = np.zeros(WINDOW_SIZE)
|
| 235 |
+
pitch_buf = np.zeros(PITCH_BUF_SIZE >> 1)
|
| 236 |
+
tmp = np.zeros(NB_BANDS)
|
| 237 |
+
|
| 238 |
+
frame_analysis(st, X, Ex, x)
|
| 239 |
+
|
| 240 |
+
st.pitch_buf[:PITCH_BUF_SIZE - FRAME_SIZE] = st.pitch_buf[FRAME_SIZE:]
|
| 241 |
+
st.pitch_buf[PITCH_BUF_SIZE - FRAME_SIZE:] = x
|
| 242 |
+
pre = [st.pitch_buf]
|
| 243 |
+
pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1)
|
| 244 |
+
pitch_index = pitch_search(
|
| 245 |
+
pitch_buf[PITCH_MAX_PERIOD >> 1:], pitch_buf, PITCH_FRAME_SIZE,
|
| 246 |
+
PITCH_MAX_PERIOD - 3 * PITCH_MIN_PERIOD)
|
| 247 |
+
pitch_index = PITCH_MAX_PERIOD - pitch_index
|
| 248 |
+
|
| 249 |
+
p_pitch_index = [pitch_index]
|
| 250 |
+
gain = remove_doubling(
|
| 251 |
+
pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
|
| 252 |
+
PITCH_FRAME_SIZE, p_pitch_index, st.last_period, st.last_gain)
|
| 253 |
+
st.last_period = pitch_index = p_pitch_index[0]
|
| 254 |
+
st.last_gain = gain
|
| 255 |
+
|
| 256 |
+
for i in range(WINDOW_SIZE):
|
| 257 |
+
p[i] = st.pitch_buf[PITCH_BUF_SIZE - WINDOW_SIZE - pitch_index + i]
|
| 258 |
+
apply_window(p)
|
| 259 |
+
forward_transform(P, p)
|
| 260 |
+
compute_band_energy(Ep, P)
|
| 261 |
+
compute_band_corr(Exp, X, P)
|
| 262 |
+
for i in range(NB_BANDS):
|
| 263 |
+
Exp[i] = Exp[i] / math.sqrt(.001 + Ex[i] * Ep[i])
|
| 264 |
+
dct(tmp, Exp)
|
| 265 |
+
|
| 266 |
+
for i in range(NB_DELTA_CEPS):
|
| 267 |
+
features[NB_BANDS + 2 * NB_DELTA_CEPS + i] = tmp[i]
|
| 268 |
+
features[NB_BANDS + 2 * NB_DELTA_CEPS] -= 1.3
|
| 269 |
+
features[NB_BANDS + 2 * NB_DELTA_CEPS + 1] -= 0.9
|
| 270 |
+
features[NB_BANDS + 3 * NB_DELTA_CEPS] = .01 * (pitch_index - 300)
|
| 271 |
+
|
| 272 |
+
logMax = -2
|
| 273 |
+
follow = -2
|
| 274 |
+
for i in range(NB_BANDS):
|
| 275 |
+
Ly[i] = math.log10(1e-2 + Ex[i])
|
| 276 |
+
Ly[i] = max(logMax - 7, max(follow - 1.5, Ly[i]))
|
| 277 |
+
logMax = max(logMax, Ly[i])
|
| 278 |
+
follow = max(follow - 1.5, Ly[i])
|
| 279 |
+
E += Ex[i]
|
| 280 |
+
|
| 281 |
+
if E < 0.04:
|
| 282 |
+
# If there's no audio, avoid messing up the state.
|
| 283 |
+
features[...] = 0
|
| 284 |
+
return 1
|
| 285 |
+
|
| 286 |
+
dct(features, Ly)
|
| 287 |
+
|
| 288 |
+
features[0] -= 12
|
| 289 |
+
features[1] -= 4
|
| 290 |
+
ceps_0 = st.cepstral_mem[st.memid]
|
| 291 |
+
ceps_1 = st.cepstral_mem[CEPS_MEM + st.memid - 1] \
|
| 292 |
+
if st.memid < 1 else st.cepstral_mem[st.memid - 1]
|
| 293 |
+
ceps_2 = st.cepstral_mem[CEPS_MEM + st.memid - 2] \
|
| 294 |
+
if st.memid < 2 else st.cepstral_mem[st.memid - 2]
|
| 295 |
+
for i in range(NB_BANDS):
|
| 296 |
+
ceps_0[i] = features[i]
|
| 297 |
+
st.memid += 1
|
| 298 |
+
|
| 299 |
+
for i in range(NB_DELTA_CEPS):
|
| 300 |
+
features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i]
|
| 301 |
+
features[NB_BANDS + i] = ceps_0[i] - ceps_2[i]
|
| 302 |
+
features[NB_BANDS + NB_DELTA_CEPS + i] = ceps_0[i] - 2 * ceps_1[i] + ceps_2[i]
|
| 303 |
+
|
| 304 |
+
# Spectral variability features.
|
| 305 |
+
if st.memid == CEPS_MEM:
|
| 306 |
+
st.memid = 0
|
| 307 |
+
|
| 308 |
+
for i in range(CEPS_MEM):
|
| 309 |
+
mindist = 1e15
|
| 310 |
+
for j in range(CEPS_MEM):
|
| 311 |
+
dist = 0.
|
| 312 |
+
for k in range(NB_BANDS):
|
| 313 |
+
tmp = st.cepstral_mem[i][k] - st.cepstral_mem[j][k]
|
| 314 |
+
dist += tmp * tmp
|
| 315 |
+
if j != i:
|
| 316 |
+
mindist = min(mindist, dist)
|
| 317 |
+
|
| 318 |
+
spec_variability += mindist
|
| 319 |
+
|
| 320 |
+
features[NB_BANDS + 3 * NB_DELTA_CEPS + 1] = spec_variability / CEPS_MEM - 2.1
|
| 321 |
+
|
| 322 |
+
return E < 0.1
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def frame_synthesis(st, out, y):
|
| 326 |
+
x = np.zeros(WINDOW_SIZE)
|
| 327 |
+
inverse_transform(x, y)
|
| 328 |
+
apply_window(x)
|
| 329 |
+
for i in range(FRAME_SIZE):
|
| 330 |
+
out[i] = x[i] + st.synthesis_mem[i]
|
| 331 |
+
st.synthesis_mem[...] = x[FRAME_SIZE:]
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def biquad(y, mem, x, b, a, N):
|
| 335 |
+
for i in range(N):
|
| 336 |
+
xi = x[i]
|
| 337 |
+
yi = x[i] + mem[0]
|
| 338 |
+
mem[0] = mem[1] + (b[0] * xi - a[0] * yi)
|
| 339 |
+
mem[1] = b[1] * xi - a[1] * yi
|
| 340 |
+
y[i] = yi
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
def pitch_filter(X, P, Ex, Ep, Exp, g):
|
| 344 |
+
r = np.zeros(NB_BANDS)
|
| 345 |
+
rf = np.zeros(FREQ_SIZE)
|
| 346 |
+
|
| 347 |
+
for i in range(NB_BANDS):
|
| 348 |
+
if Exp[i] > g[i]:
|
| 349 |
+
r[i] = 1
|
| 350 |
+
else:
|
| 351 |
+
r[i] = Exp[i] ** 2 * (1 - g[i] ** 2) / (.001 + (g[i] ** 2) * (1 - Exp[i] ** 2))
|
| 352 |
+
r[i] = math.sqrt(min(1, max(0, r[i])))
|
| 353 |
+
r[i] *= math.sqrt(Ex[i] / (1e-8 + Ep[i]))
|
| 354 |
+
|
| 355 |
+
interp_band_gain(rf, r)
|
| 356 |
+
for i in range(FREQ_SIZE):
|
| 357 |
+
X[i].r += rf[i] * P[i].r
|
| 358 |
+
X[i].i += rf[i] * P[i].i
|
| 359 |
+
|
| 360 |
+
newE = np.zeros(NB_BANDS)
|
| 361 |
+
compute_band_energy(newE, X)
|
| 362 |
+
norm = np.zeros(NB_BANDS)
|
| 363 |
+
normf = np.zeros(FREQ_SIZE)
|
| 364 |
+
for i in range(NB_BANDS):
|
| 365 |
+
norm[i] = math.sqrt(Ex[i] / (1e-8 + newE[i]))
|
| 366 |
+
|
| 367 |
+
interp_band_gain(normf, norm)
|
| 368 |
+
for i in range(FREQ_SIZE):
|
| 369 |
+
X[i].r *= normf[i]
|
| 370 |
+
X[i].i *= normf[i]
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
# ======================
|
| 374 |
+
# Main functions
|
| 375 |
+
# ======================
|
| 376 |
+
|
| 377 |
+
def preprocess(st, data):
|
| 378 |
+
X = [Complex() for _ in range(FREQ_SIZE)]
|
| 379 |
+
P = [Complex() for _ in range(WINDOW_SIZE)]
|
| 380 |
+
x = np.zeros(FRAME_SIZE)
|
| 381 |
+
Ex = np.zeros(NB_BANDS)
|
| 382 |
+
Ep = np.zeros(NB_BANDS)
|
| 383 |
+
Exp = np.zeros(NB_BANDS)
|
| 384 |
+
features = np.zeros(NB_FEATURES)
|
| 385 |
+
|
| 386 |
+
a_hp = (-1.99599, 0.99600)
|
| 387 |
+
b_hp = (-2., 1.)
|
| 388 |
+
biquad(x, st.mem_hp_x, data, b_hp, a_hp, FRAME_SIZE)
|
| 389 |
+
compute_frame_features(st, X, P, Ex, Ep, Exp, features, x)
|
| 390 |
+
|
| 391 |
+
return X, P, Ex, Ep, Exp, features
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
def postprocess(st, pp, gains, vad_prob):
|
| 395 |
+
outputs = []
|
| 396 |
+
for p, g, prob in zip(pp, gains, vad_prob):
|
| 397 |
+
X = p["X"]
|
| 398 |
+
P = p["P"]
|
| 399 |
+
Ex = p["Ex"]
|
| 400 |
+
Ep = p["Ep"]
|
| 401 |
+
Exp = p["Exp"]
|
| 402 |
+
gf = np.ones(FREQ_SIZE)
|
| 403 |
+
pitch_filter(X, P, Ex, Ep, Exp, g)
|
| 404 |
+
interp_band_gain(gf, g)
|
| 405 |
+
|
| 406 |
+
for i in range(FREQ_SIZE):
|
| 407 |
+
X[i].r *= gf[i]
|
| 408 |
+
X[i].i *= gf[i]
|
| 409 |
+
|
| 410 |
+
out = np.zeros(FRAME_SIZE)
|
| 411 |
+
frame_synthesis(st, out, X)
|
| 412 |
+
|
| 413 |
+
outputs.append(out)
|
| 414 |
+
|
| 415 |
+
return outputs
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
def rnnoise_process_frame(net, x):
|
| 419 |
+
x = np.array(x, dtype=np.float32)
|
| 420 |
+
if x.shape[0] < 100:
|
| 421 |
+
x = np.concatenate([
|
| 422 |
+
x,
|
| 423 |
+
np.zeros((100 - x.shape[0], NB_FEATURES), dtype=np.float32)
|
| 424 |
+
])
|
| 425 |
+
|
| 426 |
+
x = np.expand_dims(x, axis=0)
|
| 427 |
+
|
| 428 |
+
# feedforward
|
| 429 |
+
if not args.onnx:
|
| 430 |
+
output = net.predict([x])
|
| 431 |
+
else:
|
| 432 |
+
output = net.run(None, {'main_input:0': x})
|
| 433 |
+
gains, vad_prob = output
|
| 434 |
+
|
| 435 |
+
return gains[0], vad_prob[0]
|
| 436 |
+
|
| 437 |
+
|
| 438 |
+
def recognize_from_audio(net):
|
| 439 |
+
wav_path = args.input[0]
|
| 440 |
+
logger.info(wav_path)
|
| 441 |
+
|
| 442 |
+
logger.info('Start inference...')
|
| 443 |
+
wf = wave.open(wav_path, "rb")
|
| 444 |
+
|
| 445 |
+
save_path = get_savepath(args.savepath, wav_path, ext='.wav')
|
| 446 |
+
wf_out = wave.open(save_path, "wb")
|
| 447 |
+
wf_out.setnchannels(1)
|
| 448 |
+
wf_out.setsampwidth(16 // 8)
|
| 449 |
+
wf_out.setframerate(48000)
|
| 450 |
+
|
| 451 |
+
pp = []
|
| 452 |
+
st = DenoiseState()
|
| 453 |
+
bar = tqdm(total=wf.getnframes())
|
| 454 |
+
while True:
|
| 455 |
+
buf = wf.readframes(FRAME_SIZE)
|
| 456 |
+
if not buf:
|
| 457 |
+
break
|
| 458 |
+
data = np.frombuffer(buf, dtype=np.int16)
|
| 459 |
+
|
| 460 |
+
X, P, Ex, Ep, Exp, feat = preprocess(st, data)
|
| 461 |
+
pp.append(dict(
|
| 462 |
+
X=X,
|
| 463 |
+
P=P,
|
| 464 |
+
Ex=Ex,
|
| 465 |
+
Ep=Ep,
|
| 466 |
+
Exp=Exp,
|
| 467 |
+
feat=feat
|
| 468 |
+
))
|
| 469 |
+
|
| 470 |
+
if len(pp) == 100:
|
| 471 |
+
x = [p["feat"] for p in pp]
|
| 472 |
+
gains, vad_prob = rnnoise_process_frame(net, x)
|
| 473 |
+
outputs = postprocess(st, pp, gains, vad_prob)
|
| 474 |
+
pp.clear()
|
| 475 |
+
|
| 476 |
+
for out in outputs:
|
| 477 |
+
out = np.array(out, dtype=int)
|
| 478 |
+
out = np.clip(out, (-0x7fff - 1), 0x7fff)
|
| 479 |
+
out = struct.pack("h" * len(out), *out)
|
| 480 |
+
wf_out.writeframes(out)
|
| 481 |
+
|
| 482 |
+
bar.update(len(data))
|
| 483 |
+
|
| 484 |
+
if 0 < len(pp):
|
| 485 |
+
x = [p["feat"] for p in pp]
|
| 486 |
+
gains, vad_prob = rnnoise_process_frame(net, x)
|
| 487 |
+
outputs = postprocess(st, pp, gains, vad_prob)
|
| 488 |
+
|
| 489 |
+
for out in outputs:
|
| 490 |
+
out = np.array(out, dtype=int)
|
| 491 |
+
out = np.clip(out, (-0x7fff - 1), 0x7fff)
|
| 492 |
+
out = struct.pack("h" * len(out), *out)
|
| 493 |
+
wf_out.writeframes(out)
|
| 494 |
+
|
| 495 |
+
bar.close()
|
| 496 |
+
wf_out.close()
|
| 497 |
+
logger.info(f'saved at : {save_path}')
|
| 498 |
+
|
| 499 |
+
logger.info('Script finished successfully.')
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
def main():
|
| 503 |
+
# model files check and download
|
| 504 |
+
check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)
|
| 505 |
+
|
| 506 |
+
env_id = args.env_id
|
| 507 |
+
|
| 508 |
+
# initialize
|
| 509 |
+
if not args.onnx:
|
| 510 |
+
net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
|
| 511 |
+
else:
|
| 512 |
+
import onnxruntime
|
| 513 |
+
cuda = 0 < ailia.get_gpu_environment_id()
|
| 514 |
+
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
|
| 515 |
+
net = onnxruntime.InferenceSession(WEIGHT_PATH, providers=providers)
|
| 516 |
+
|
| 517 |
+
recognize_from_audio(net)
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
if __name__ == '__main__':
|
| 521 |
+
main()
|
models/ailia-models/rnn_model.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22123b85825b413e7c97de8334c6b609f0c6aa1cd5290ab672bb1b85bae20403
|
| 3 |
+
size 1020606
|
models/ailia-models/rnn_model.onnx.prototxt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/ailia-models/source.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
https://github.com/axinc-ai/ailia-models/tree/master/audio_processing/rnnoise
|
| 2 |
+
|
| 3 |
+
https://storage.googleapis.com/ailia-models/rnnoise/rnn_model.onnx
|
| 4 |
+
https://storage.googleapis.com/ailia-models/rnnoise/rnn_model.onnx.prototxt
|