niobures commited on Oct 25, 2025

Commit

d0b4949

verified ·

1 Parent(s): 0e49683

RNNoise (libs, models)

Browse files

Files changed (23) hide show

.gitattributes +11 -0
libs/rnnoise-bin/.github/workflows/build.yml +256 -0
libs/rnnoise-bin/README.md +19 -0
libs/rnnoise-bin/releases/7f449bf8/7f449bf8.zip +3 -0
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-aarch64.so +3 -0
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armel.so +3 -0
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armhf.so +3 -0
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86-64.so +3 -0
libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86.so +3 -0
libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-aarch64.dylib +3 -0
libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-x86-64.dylib +3 -0
libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86-64.dll +3 -0
libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86.dll +3 -0
libs/rnnoise-bin/releases/7f449bf8/rnnoise-bin-7f449bf8.zip +3 -0
models/ailia-models/code/README.md +51 -0
models/ailia-models/code/babble_15dB.wav +3 -0
models/ailia-models/code/denoised.wav +3 -0
models/ailia-models/code/kiss_fft.py +345 -0
models/ailia-models/code/pitch.py +406 -0
models/ailia-models/code/rnnoise.py +521 -0
models/ailia-models/rnn_model.onnx +3 -0
models/ailia-models/rnn_model.onnx.prototxt +0 -0
models/ailia-models/source.txt +4 -0

.gitattributes CHANGED Viewed

@@ -36,3 +36,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 A[[:space:]]Hybrid[[:space:]]DSP_Deep[[:space:]]Learning[[:space:]]Approach[[:space:]]to[[:space:]]Real-Time[[:space:]]Full-Band[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
 RNNoise-Ex.[[:space:]]Hybrid[[:space:]]Speech[[:space:]]Enhancement[[:space:]]System[[:space:]]based[[:space:]]on[[:space:]]RNN[[:space:]]and[[:space:]]Spectral[[:space:]]Features.pdf filter=lfs diff=lfs merge=lfs -text
 RNNoise.[[:space:]]Learning[[:space:]]Noise[[:space:]]Suppression.pdf filter=lfs diff=lfs merge=lfs -text

 A[[:space:]]Hybrid[[:space:]]DSP_Deep[[:space:]]Learning[[:space:]]Approach[[:space:]]to[[:space:]]Real-Time[[:space:]]Full-Band[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
 RNNoise-Ex.[[:space:]]Hybrid[[:space:]]Speech[[:space:]]Enhancement[[:space:]]System[[:space:]]based[[:space:]]on[[:space:]]RNN[[:space:]]and[[:space:]]Spectral[[:space:]]Features.pdf filter=lfs diff=lfs merge=lfs -text
 RNNoise.[[:space:]]Learning[[:space:]]Noise[[:space:]]Suppression.pdf filter=lfs diff=lfs merge=lfs -text
+libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-aarch64.so filter=lfs diff=lfs merge=lfs -text
+libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armel.so filter=lfs diff=lfs merge=lfs -text
+libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armhf.so filter=lfs diff=lfs merge=lfs -text
+libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86-64.so filter=lfs diff=lfs merge=lfs -text
+libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86.so filter=lfs diff=lfs merge=lfs -text
+libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-aarch64.dylib filter=lfs diff=lfs merge=lfs -text
+libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-x86-64.dylib filter=lfs diff=lfs merge=lfs -text
+libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86-64.dll filter=lfs diff=lfs merge=lfs -text
+libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86.dll filter=lfs diff=lfs merge=lfs -text
+models/ailia-models/code/babble_15dB.wav filter=lfs diff=lfs merge=lfs -text
+models/ailia-models/code/denoised.wav filter=lfs diff=lfs merge=lfs -text

libs/rnnoise-bin/.github/workflows/build.yml ADDED Viewed

	@@ -0,0 +1,256 @@

+name: build
+on:
+  release:
+    types:
+      - created
+  workflow_dispatch:
+env:
+  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+jobs:
+  build-linux-x86-64:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Clone RNNoise
+        uses: sudosubin/git-clone-action@v1.0.1
+        with:
+          repository: xiph/rnnoise
+          platform: gitlab.xiph.org
+      - name: autogen
+        run: ./autogen.sh
+      - name: configure
+        run: ./configure
+      - name: build
+        run: make
+      - name: Get release
+        id: get_release
+        uses: bruceadams/get-release@v1.2.2
+      - name: Upload
+        uses: actions/upload-release-asset@v1.0.2
+        with:
+          upload_url: ${{ steps.get_release.outputs.upload_url }}
+          asset_path: .libs/librnnoise.so
+          asset_name: librnnoise-linux-x86-64.so
+          asset_content_type: application/octet-stream
+  build-linux-x86:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Clone RNNoise
+        uses: sudosubin/git-clone-action@v1.0.1
+        with:
+          repository: xiph/rnnoise
+          platform: gitlab.xiph.org
+      - name: apt update
+        run: sudo apt-get update -y
+      - name: install  gcc-i686-linux-gnu
+        run: sudo apt-get install gcc-i686-linux-gnu -y
+      - name: autogen
+        run: ./autogen.sh
+      - name: configure
+        run: ./configure --host=i686-linux-gnu "CFLAGS=-m32" "CXXFLAGS=-m32" "LDFLAGS=-m32"
+      - name: build
+        run: make
+      - name: Get release
+        id: get_release
+        uses: bruceadams/get-release@v1.2.2
+      - name: Upload
+        uses: actions/upload-release-asset@v1.0.2
+        with:
+          upload_url: ${{ steps.get_release.outputs.upload_url }}
+          asset_path: .libs/librnnoise.so
+          asset_name: librnnoise-linux-x86.so
+          asset_content_type: application/octet-stream
+  build-linux-aarch64:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Clone RNNoise
+        uses: sudosubin/git-clone-action@v1.0.1
+        with:
+          repository: xiph/rnnoise
+          platform: gitlab.xiph.org
+      - name: apt update
+        run: sudo apt-get update -y
+      - name: install  gcc-aarch64-linux-gnu
+        run: sudo apt-get install gcc-aarch64-linux-gnu -y
+      - name: autogen
+        run: ./autogen.sh
+      - name: configure
+        run: ./configure --host=aarch64-linux-gnu
+      - name: build
+        run: make
+      - name: Get release
+        id: get_release
+        uses: bruceadams/get-release@v1.2.2
+      - name: Upload
+        uses: actions/upload-release-asset@v1.0.2
+        with:
+          upload_url: ${{ steps.get_release.outputs.upload_url }}
+          asset_path: .libs/librnnoise.so
+          asset_name: librnnoise-linux-aarch64.so
+          asset_content_type: application/octet-stream
+  build-linux-armel:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Clone RNNoise
+        uses: sudosubin/git-clone-action@v1.0.1
+        with:
+          repository: xiph/rnnoise
+          platform: gitlab.xiph.org
+      - name: apt update
+        run: sudo apt-get update -y
+      - name: install  gcc-arm-linux-gnueabi
+        run: sudo apt-get install gcc-arm-linux-gnueabi -y
+      - name: autogen
+        run: ./autogen.sh
+      - name: configure
+        run: ./configure --host=arm-linux-gnueabi
+      - name: build
+        run: make
+      - name: Get release
+        id: get_release
+        uses: bruceadams/get-release@v1.2.2
+      - name: Upload
+        uses: actions/upload-release-asset@v1.0.2
+        with:
+          upload_url: ${{ steps.get_release.outputs.upload_url }}
+          asset_path: .libs/librnnoise.so
+          asset_name: librnnoise-linux-armel.so
+          asset_content_type: application/octet-stream
+  build-linux-armhf:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Clone RNNoise
+        uses: sudosubin/git-clone-action@v1.0.1
+        with:
+          repository: xiph/rnnoise
+          platform: gitlab.xiph.org
+      - name: apt update
+        run: sudo apt-get update -y
+      - name: install  gcc-arm-linux-gnueabihf
+        run: sudo apt-get install gcc-arm-linux-gnueabihf -y
+      - name: autogen
+        run: ./autogen.sh
+      - name: configure
+        run: ./configure --host=arm-linux-gnueabihf
+      - name: build
+        run: make
+      - name: Get release
+        id: get_release
+        uses: bruceadams/get-release@v1.2.2
+      - name: Upload
+        uses: actions/upload-release-asset@v1.0.2
+        with:
+          upload_url: ${{ steps.get_release.outputs.upload_url }}
+          asset_path: .libs/librnnoise.so
+          asset_name: librnnoise-linux-armhf.so
+          asset_content_type: application/octet-stream
+  build-windows-x86-64:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Clone RNNoise
+        uses: sudosubin/git-clone-action@v1.0.1
+        with:
+          repository: xiph/rnnoise
+          platform: gitlab.xiph.org
+      - name: apt update
+        run: sudo apt-get update -y
+      - name: install mingw-w64
+        run: sudo apt-get install mingw-w64 -y
+      - name: autogen
+        run: ./autogen.sh
+      - name: configure
+        run: ./configure --host=x86_64-w64-mingw32
+      - name: build
+        run: make
+      - name: Get release
+        id: get_release
+        uses: bruceadams/get-release@v1.2.2
+      - name: Upload
+        uses: actions/upload-release-asset@v1.0.2
+        with:
+          upload_url: ${{ steps.get_release.outputs.upload_url }}
+          asset_path: .libs/librnnoise-0.dll
+          asset_name: librnnoise-windows-x86-64.dll
+          asset_content_type: application/octet-stream
+  build-windows-x86:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Clone RNNoise
+        uses: sudosubin/git-clone-action@v1.0.1
+        with:
+          repository: xiph/rnnoise
+          platform: gitlab.xiph.org
+      - name: apt update
+        run: sudo apt-get update -y
+      - name: install mingw-w64
+        run: sudo apt-get install mingw-w64 -y
+      - name: autogen
+        run: ./autogen.sh
+      - name: configure
+        run: ./configure --host=i686-w64-mingw32
+      - name: build
+        run: make
+      - name: Get release
+        id: get_release
+        uses: bruceadams/get-release@v1.2.2
+      - name: Upload
+        uses: actions/upload-release-asset@v1.0.2
+        with:
+          upload_url: ${{ steps.get_release.outputs.upload_url }}
+          asset_path: .libs/librnnoise-0.dll
+          asset_name: librnnoise-windows-x86.dll
+          asset_content_type: application/octet-stream
+  build-macos-x86-64:
+    runs-on: macos-11
+    steps:
+      - name: Clone RNNoise
+        uses: sudosubin/git-clone-action@v1.0.1
+        with:
+          repository: xiph/rnnoise
+          platform: gitlab.xiph.org
+      - name: install automake
+        run: brew install automake
+      - name: autogen
+        run: ./autogen.sh
+      - name: configure
+        run: ./configure
+      - name: build
+        run: make
+      - name: Get release
+        id: get_release
+        uses: bruceadams/get-release@v1.2.2
+      - name: Upload
+        uses: actions/upload-release-asset@v1.0.2
+        with:
+          upload_url: ${{ steps.get_release.outputs.upload_url }}
+          asset_path: .libs/librnnoise.dylib
+          asset_name: librnnoise-macos-x86-64.dylib
+          asset_content_type: application/octet-stream
+  build-macos-aarch64:
+    runs-on: macos-11
+    steps:
+      - name: Clone RNNoise
+        uses: sudosubin/git-clone-action@v1.0.1
+        with:
+          repository: xiph/rnnoise
+          platform: gitlab.xiph.org
+      - name: install automake
+        run: brew install automake
+      - name: autogen
+        run: ./autogen.sh
+      - name: configure
+        run: ./configure --host=aarch64-apple-darwin CFLAGS="-arch arm64"
+      - name: build
+        run: make
+      - name: Get release
+        id: get_release
+        uses: bruceadams/get-release@v1.2.2
+      - name: Upload
+        uses: actions/upload-release-asset@v1.0.2
+        with:
+          upload_url: ${{ steps.get_release.outputs.upload_url }}
+          asset_path: .libs/librnnoise.dylib
+          asset_name: librnnoise-macos-aarch64.dylib
+          asset_content_type: application/octet-stream

libs/rnnoise-bin/README.md ADDED Viewed

	@@ -0,0 +1,19 @@

+# rnnoise-bin
+this repository contains builds of  RNNoise for the following platforms:
+    linux/x86-64
+    linux/x86
+    linux/aarch64
+    linux/armel
+    linux/armhf
+    windows/x86-64
+    windows/x86
+    macos/x86-64
+    macos/aarch64
+the builds can be found in the [release section](https://github.com/mjwells2002/rnnoise-bin/releases)
+each build will be tagged with the git commit it was built from in the [RNNoise repo](https://gitlab.xiph.org/xiph/rnnoise)
+these builds are produced with github actions you can see the workflow file [here](https://github.com/mjwells2002/rnnoise-bin/blob/main/.github/workflows/build.yml)

libs/rnnoise-bin/releases/7f449bf8/7f449bf8.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa329b91913c0d3b2d2ebde40df022d618b65a251f7aa9554ac9cf6cbcbe4837
+size 189611

libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-aarch64.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05712a1801a3fd60af61abf03de08819955d88880fef6303d7f78a653f4230c0
+size 242184

libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armel.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:285c4f4bcbdb66f3e8b2031b666c7a2109284e16dde187c09239bc7e63d23ec5
+size 235536

libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-armhf.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d69847b6f4ecb4b4a976dcec43138390c1ced37dd543a90341a4a7649195632
+size 210068

libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86-64.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53b823a1545ee9a9734ed3255ccfe5bed9069790cb95ee7fe7797a55eea7e3c0
+size 253000

libs/rnnoise-bin/releases/7f449bf8/librnnoise-linux-x86.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61ee37a51356d8fcc15b12ffa1ff3e286f8b5055454f2ff8ef0657e8e9ea5990
+size 141456

libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-aarch64.dylib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73c1d80f521daf7a6103aeffa6c7ed365c85276c75c72ea086c6c5a5f270b0f2
+size 169167

libs/rnnoise-bin/releases/7f449bf8/librnnoise-macos-x86-64.dylib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b24d9cd6ebc840e3ef7edbd92f5ae046c5c59439252d5702202c3b3996d60f4b
+size 171504

libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86-64.dll ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0be864c6f8f16e854a3d7a35e5b9d133aea50dac4ec80600df78dc834686570f
+size 551036

libs/rnnoise-bin/releases/7f449bf8/librnnoise-windows-x86.dll ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c299f873df269f1798d66a6fb3797c696bffcd71e719f65e460b663685dcc94
+size 471627

libs/rnnoise-bin/releases/7f449bf8/rnnoise-bin-7f449bf8.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4abc4f144495a6f509dfaff0120ddfae523c37bf1afe252597b63e9773761727
+size 1959

models/ailia-models/code/README.md ADDED Viewed

	@@ -0,0 +1,51 @@

+# rnnoise
+## Input
+Audio file
+- Sample rate: 48 kHz
+- Bit per sample: 16-bit
+- Bit rate: 768 kbps
+https://github.com/axinc-ai/ailia-models/assets/29946532/f1908958-d3be-44a7-9180-59c375bb488c
+(Audio from https://jmvalin.ca/demo/rnnoise/)
+## Output
+Audio file
+https://github.com/axinc-ai/ailia-models/assets/29946532/21eaf44d-bffd-428a-9637-f5d385364698
+## Usage
+Automatically downloads the onnx and prototxt files on the first run.
+It is necessary to be connected to the Internet while downloading.
+For the sample wav,
+```bash
+$ python3 rnnoise.py
+```
+If you want to specify the audio, put the file path after the `--input` option.
+You can use `--savepath` option to change the name of the output file to save.
+```bash
+$ python3 rnnoise.py --input AUDIO_FILE --savepath SAVE_AUDIO_FILE
+```
+## Reference
+- [rnnoise](https://github.com/xiph/rnnoise)
+- [xiph.org / moz://a](https://jmvalin.ca/demo/rnnoise/)
+## Framework
+Keras
+## Model Format
+ONNX opset=14
+## Netron
+[rnn_model.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/rnnoise/rnn_model.onnx.prototxt)

models/ailia-models/code/babble_15dB.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:80ea7f570a750027c97ef86a2f9931d25ecc1886973bade22b06d774d71d1565
+size 259244

models/ailia-models/code/denoised.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d293e13ee78336fd497bba43eff580f48c1b30f95819caa8d2a249123c3ef84
+size 259244

models/ailia-models/code/kiss_fft.py ADDED Viewed

	@@ -0,0 +1,345 @@

+import math
+import numpy as np
+MAXFACTORS = 8
+class Complex:
+    def __init__(self):
+        self.r = 0.0
+        self.i = 0.0
+    def __repr__(self):
+        return '{:.6f}{}{:.6f}j'.format(self.r, '-' if 0 > self.i else '+', abs(self.i))
+class FFTState:
+    nfft = 0
+    scale = 0
+    shift = 0
+    factors = np.zeros(2 * MAXFACTORS, dtype=int)
+    bitrev = None
+    twiddles = None
+    arch_fft = None
+def C_ADD(res, a, b):
+    res.r = a.r + b.r
+    res.i = a.i + b.i
+def C_SUB(res, a, b):
+    res.r = a.r - b.r
+    res.i = a.i - b.i
+def C_ADDTO(res, a):
+    res.r = res.r + a.r
+    res.i = res.i + a.i
+def C_MUL(m, a, b):
+    m.r = a.r * b.r - a.i * b.i
+    m.i = a.r * b.i + a.i * b.r
+def C_MULBYSCALAR(c, s):
+    c.r *= s
+    c.i *= s
+def kf_bfly2(Fout, m, N):
+    tw = 0.7071067812
+    for i in range(N):
+        Fout2 = Fout[4:]
+        t = Fout2[0]
+        C_SUB(Fout2[0], Fout[0], t)
+        C_ADDTO(Fout[0], t)
+        t.r = (Fout2[1].r + Fout2[1].i) * tw
+        t.i = (Fout2[1].i - Fout2[1].r) * tw
+        C_SUB(Fout2[1], Fout[1], t)
+        C_ADDTO(Fout[1], t)
+        t.r = Fout2[2].i
+        t.i = -Fout2[2].r
+        C_SUB(Fout2[2], Fout[2], t)
+        C_ADDTO(Fout[2], t)
+        t.r = (Fout2[3].i - Fout2[3].r) * tw
+        t.i = -(Fout2[3].i + Fout2[3].r) * tw
+        C_SUB(Fout2[3], Fout[3], t)
+        C_ADDTO(Fout[3], t)
+        Fout = Fout[8:]
+def kf_bfly4(Fout, fstride, st, m, N, mm):
+    if m == 1:
+        # Degenerate case where all the twiddles are 1.
+        for i in range(N):
+            scratch0 = Complex()
+            scratch1 = Complex()
+            C_SUB(scratch0, Fout[0], Fout[2])
+            C_ADDTO(Fout[0], Fout[2])
+            C_ADD(scratch1, Fout[1], Fout[3])
+            C_SUB(Fout[2], Fout[0], scratch1)
+            C_ADDTO(Fout[0], scratch1)
+            C_SUB(scratch1, Fout[1], Fout[3])
+            Fout[1].r = scratch0.r + scratch1.i
+            Fout[1].i = scratch0.i - scratch1.r
+            Fout[3].r = scratch0.r - scratch1.i
+            Fout[3].i = scratch0.i + scratch1.r
+            Fout = Fout[4:]
+    else:
+        scratch = [Complex() for _ in range(6)]
+        m2 = 2 * m
+        m3 = 3 * m
+        Fout_beg = Fout
+        for i in range(N):
+            Fout = Fout_beg[i * mm:]
+            tw3 = tw2 = tw1 = st.twiddles
+            # m is guaranteed to be a multiple of 4.
+            for j in range(m):
+                C_MUL(scratch[0], Fout[m], tw1[0])
+                C_MUL(scratch[1], Fout[m2], tw2[0])
+                C_MUL(scratch[2], Fout[m3], tw3[0])
+                C_SUB(scratch[5], Fout[0], scratch[1])
+                C_ADDTO(Fout[0], scratch[1])
+                C_ADD(scratch[3], scratch[0], scratch[2])
+                C_SUB(scratch[4], scratch[0], scratch[2])
+                C_SUB(Fout[m2], Fout[0], scratch[3])
+                tw1 = tw1[fstride:]
+                tw2 = tw2[fstride * 2:]
+                tw3 = tw3[fstride * 3:]
+                C_ADDTO(Fout[0], scratch[3])
+                Fout[m].r = scratch[5].r + scratch[4].i
+                Fout[m].i = scratch[5].i - scratch[4].r
+                Fout[m3].r = scratch[5].r - scratch[4].i
+                Fout[m3].i = scratch[5].i + scratch[4].r
+                Fout = Fout[1:]
+def kf_bfly3(Fout, fstride, st, m, N, mm):
+    m2 = 2 * m
+    scratch = [Complex() for _ in range(5)]
+    Fout_beg = Fout
+    epi3 = st.twiddles[fstride * m]
+    for i in range(N):
+        Fout = Fout_beg[i * mm:]
+        tw1 = tw2 = st.twiddles
+        # For non-custom modes, m is guaranteed to be a multiple of 4.
+        k = m
+        while 0 < k:
+            C_MUL(scratch[1], Fout[m], tw1[0])
+            C_MUL(scratch[2], Fout[m2], tw2[0])
+            C_ADD(scratch[3], scratch[1], scratch[2])
+            C_SUB(scratch[0], scratch[1], scratch[2])
+            tw1 = tw1[fstride:]
+            tw2 = tw2[fstride * 2:]
+            Fout[m].r = Fout[0].r - scratch[3].r / 2
+            Fout[m].i = Fout[0].i - scratch[3].i / 2
+            C_MULBYSCALAR(scratch[0], epi3.i)
+            C_ADDTO(Fout[0], scratch[3])
+            Fout[m2].r = Fout[m].r + scratch[0].i
+            Fout[m2].i = Fout[m].i - scratch[0].r
+            Fout[m].r = Fout[m].r - scratch[0].i
+            Fout[m].i = Fout[m].i + scratch[0].r
+            Fout = Fout[1:]
+            k = k - 1
+def kf_bfly5(Fout, fstride, st, m, N, mm):
+    scratch = [Complex() for _ in range(13)]
+    Fout_beg = Fout
+    ya = st.twiddles[fstride * m]
+    yb = st.twiddles[fstride * 2 * m]
+    tw = st.twiddles
+    for i in range(N):
+        Fout = Fout_beg[i * mm:]
+        Fout0 = Fout
+        Fout1 = Fout0[m:]
+        Fout2 = Fout0[2 * m:]
+        Fout3 = Fout0[3 * m:]
+        Fout4 = Fout0[4 * m:]
+        # For non-custom modes, m is guaranteed to be a multiple of 4.
+        for u in range(m):
+            scratch[0].r = Fout0[0].r
+            scratch[0].i = Fout0[0].i
+            C_MUL(scratch[1], Fout1[0], tw[u * fstride])
+            C_MUL(scratch[2], Fout2[0], tw[2 * u * fstride])
+            C_MUL(scratch[3], Fout3[0], tw[3 * u * fstride])
+            C_MUL(scratch[4], Fout4[0], tw[4 * u * fstride])
+            C_ADD(scratch[7], scratch[1], scratch[4])
+            C_SUB(scratch[10], scratch[1], scratch[4])
+            C_ADD(scratch[8], scratch[2], scratch[3])
+            C_SUB(scratch[9], scratch[2], scratch[3])
+            Fout0[0].r = Fout0[0].r + (scratch[7].r + scratch[8].r)
+            Fout0[0].i = Fout0[0].i + (scratch[7].i + scratch[8].i)
+            scratch[5].r = scratch[0].r + ((scratch[7].r * ya.r) + (scratch[8].r * yb.r))
+            scratch[5].i = scratch[0].i + ((scratch[7].i * ya.r) + (scratch[8].i * yb.r))
+            scratch[6].r = (scratch[10].i * ya.i) + (scratch[9].i * yb.i)
+            scratch[6].i = -((scratch[10].r * ya.i) + (scratch[9].r * yb.i))
+            C_SUB(Fout1[0], scratch[5], scratch[6])
+            C_ADD(Fout4[0], scratch[5], scratch[6])
+            scratch[11].r = scratch[0].r + ((scratch[7].r * yb.r) + (scratch[8].r * ya.r))
+            scratch[11].i = scratch[0].i + ((scratch[7].i * yb.r) + (scratch[8].i * ya.r))
+            scratch[12].r = (scratch[9].i * ya.i) - (scratch[10].i * yb.i)
+            scratch[12].i = (scratch[10].r * yb.i) - (scratch[9].r * ya.i)
+            C_ADD(Fout2[0], scratch[11], scratch[12])
+            C_SUB(Fout3[0], scratch[11], scratch[12])
+            Fout0 = Fout0[1:]
+            Fout1 = Fout1[1:]
+            Fout2 = Fout2[1:]
+            Fout3 = Fout3[1:]
+            Fout4 = Fout4[1:]
+def compute_bitrev_table(Fout, f, fstride, in_stride, factors, st):
+    p = int(factors[0])  # the radix
+    m = int(factors[1])  # stage's fft length/p
+    if m == 1:
+        for j in range(p):
+            f[0] = Fout + j
+            f = f[fstride * in_stride:]
+    else:
+        for j in range(p):
+            compute_bitrev_table(Fout, f, fstride * p, in_stride, factors[2:], st)
+            f = f[fstride * in_stride:]
+            Fout += m
+def kf_factor(n, facbuf):
+    p = 4
+    stages = 0
+    nbak = n
+    while n > 1:
+        while n % p:
+            p = 2 if p == 4 else 3 if p == 2 else p + 2
+            if p > 32000 or p * p > n:
+                p = n
+        n /= p
+        if p > 5:
+            return 0
+        facbuf[2 * stages] = p
+        if p == 2 and stages > 1:
+            facbuf[2 * stages] = 4
+            facbuf[2] = 2
+        stages = stages + 1
+    n = nbak
+    for i in range(stages // 2):
+        tmp = facbuf[2 * i]
+        facbuf[2 * i] = facbuf[2 * (stages - i - 1)]
+        facbuf[2 * (stages - i - 1)] = tmp
+    for i in range(stages):
+        n /= facbuf[2 * i]
+        facbuf[2 * i + 1] = n
+    return 1
+def compute_twiddles(twiddles, nfft):
+    for i in range(nfft):
+        phase = (-2 * math.pi / nfft) * i
+        twiddles[i].r = math.cos(phase)
+        twiddles[i].i = math.sin(phase)
+def opus_fft_alloc_twiddles(nfft):
+    st = FFTState()
+    st.nfft = nfft
+    st.scale = 1. / nfft
+    st.twiddles = twiddles = [Complex() for _ in range(nfft)]
+    compute_twiddles(twiddles, nfft)
+    st.shift = -1
+    kf_factor(nfft, st.factors)
+    # bitrev
+    st.bitrev = bitrev = np.zeros(nfft, dtype=int)
+    compute_bitrev_table(0, bitrev, 1, 1, st.factors, st)
+    return st
+def opus_fft_impl(st, fout):
+    fstride = np.zeros(MAXFACTORS, dtype=int)
+    # shift can be -1
+    shift = st.shift if st.shift > 0 else 0
+    fstride[0] = 1
+    L = 0
+    while True:
+        p = st.factors[2 * L]
+        m = st.factors[2 * L + 1]
+        fstride[L + 1] = fstride[L] * p
+        L += 1
+        if m == 1:
+            break
+    m = st.factors[2 * L - 1]
+    for i in range(L - 1, -1, -1):
+        if i != 0:
+            m2 = st.factors[2 * i - 1]
+        else:
+            m2 = 1
+        x = st.factors[2 * i]
+        if x == 2:
+            kf_bfly2(fout, m, fstride[i])
+        elif x == 4:
+            kf_bfly4(fout, fstride[i] << shift, st, m, fstride[i], m2)
+        elif x == 3:
+            kf_bfly3(fout, fstride[i] << shift, st, m, fstride[i], m2)
+        elif x == 5:
+            kf_bfly5(fout, fstride[i] << shift, st, m, fstride[i], m2)
+        m = m2
+def opus_fft(st, fin, fout):
+    scale = st.scale
+    # Bit-reverse the input
+    for i in range(st.nfft):
+        x = fin[i]
+        fout[st.bitrev[i]].r = scale * x.r
+        fout[st.bitrev[i]].i = scale * x.i
+    opus_fft_impl(st, fout)

models/ailia-models/code/pitch.py ADDED Viewed

	@@ -0,0 +1,406 @@

+import math
+def find_best_pitch(xcorr, y, _len, max_pitch, best_pitch):
+    Syy = 1
+    best_num = [-1, -1]
+    best_den = [0, 0]
+    best_pitch[0] = 0
+    best_pitch[1] = 1
+    for j in range(_len):
+        Syy = Syy + (y[j] * y[j])
+    for i in range(max_pitch):
+        if xcorr[i] > 0:
+            num = xcorr[i] * xcorr[i]
+            if num * best_den[1] > best_num[1] * Syy:
+                if num * best_den[0] > best_num[0] * Syy:
+                    best_num[1] = best_num[0]
+                    best_den[1] = best_den[0]
+                    best_pitch[1] = best_pitch[0]
+                    best_num[0] = num
+                    best_den[0] = Syy
+                    best_pitch[0] = i
+                else:
+                    best_num[1] = num
+                    best_den[1] = Syy
+                    best_pitch[1] = i
+        Syy += (y[i + _len] * y[i + _len]) - (y[i] * y[i])
+        Syy = max(1, Syy)
+def _celt_lpc(lpc, ac, p):
+    """
+    lpc (out): [0...p-1] LPC coefficients
+    ac (in):  [0...p] autocorrelation values
+    """
+    error = ac[0]
+    for i in range(p):
+        lpc[i] = 0
+    if ac[0] != 0:
+        for i in range(p):
+            # Sum up this iteration's reflection coefficient
+            rr = 0
+            for j in range(i):
+                rr += lpc[j] * ac[i - j]
+            rr += ac[i + 1]
+            r = -rr / error
+            # Update LPC coefficients and total error
+            lpc[i] = r
+            for j in range((i + 1) >> 1):
+                tmp1 = lpc[j]
+                tmp2 = lpc[i - 1 - j]
+                lpc[j] = tmp1 + (r * tmp2)
+                lpc[i - 1 - j] = tmp2 + (r * tmp1)
+            error = error - ((r * r) * error)
+            # Bail out once we get 30 dB gain
+            if error < .001 * ac[0]:
+                break
+def _celt_autocorr(x, ac, window, overlap, lag, n):
+    """
+    x: (in) [0...n-1] samples x
+    ac: (out) [0...lag-1] ac values
+    """
+    fastN = n - lag
+    if overlap == 0:
+        xptr = x
+    else:
+        xx = [0] * n
+        for i in range(n):
+            xx[i] = x[i]
+        for i in range(overlap):
+            xx[i] = x[i] * window[i]
+            xx[n - i - 1] = x[n - i - 1] * window[i]
+        xptr = xx
+    shift = 0
+    celt_pitch_xcorr(xptr, xptr, ac, fastN, lag + 1)
+    for k in range(lag + 1):
+        d = 0
+        for i in range(k + fastN, n):
+            d = d + (xptr[i] * xptr[i - k])
+        ac[k] += d
+    return shift
+def celt_fir5(x, num, y, N, mem):
+    num0 = num[0]
+    num1 = num[1]
+    num2 = num[2]
+    num3 = num[3]
+    num4 = num[4]
+    mem0 = mem[0]
+    mem1 = mem[1]
+    mem2 = mem[2]
+    mem3 = mem[3]
+    mem4 = mem[4]
+    for i in range(N):
+        _sum = x[i]
+        _sum = _sum + num0 * mem0
+        _sum = _sum + num1 * mem1
+        _sum = _sum + num2 * mem2
+        _sum = _sum + num3 * mem3
+        _sum = _sum + num4 * mem4
+        mem4 = mem3
+        mem3 = mem2
+        mem2 = mem1
+        mem1 = mem0
+        mem0 = x[i]
+        y[i] = _sum
+    mem[0] = mem0
+    mem[1] = mem1
+    mem[2] = mem2
+    mem[3] = mem3
+    mem[4] = mem4
+def pitch_downsample(x, x_lp, _len, C):
+    ac = [0] * 5
+    tmp = 1.
+    lpc = [0] * 4
+    mem = [0] * 5
+    lpc2 = [0] * 5
+    c1 = .8
+    for i in range(1, _len >> 1):
+        x_lp[i] = .5 * (.5 * (x[0][(2 * i - 1)] + x[0][(2 * i + 1)]) + x[0][2 * i])
+    x_lp[0] = .5 * (.5 * (x[0][1]) + x[0][0])
+    if C == 2:
+        for i in range(1, _len >> 2):
+            x_lp[i] += .5 * (.5 * (x[1][(2 * i - 1)] + x[1][(2 * i + 1)]) + x[1][2 * i])
+        x_lp[0] += .5 * (.5 * (x[1][1]) + x[1][0])
+    _celt_autocorr(x_lp, ac, None, 0, 4, _len >> 1)
+    # Noise floor -40 dB
+    ac[0] *= 1.0001
+    # Lag windowing
+    for i in range(1, 4 + 1):
+        ac[i] -= ac[i] * (.008 * i) * (.008 * i)
+    _celt_lpc(lpc, ac, 4)
+    for i in range(4):
+        tmp = .9 * tmp
+        lpc[i] = lpc[i] * tmp
+    # Add a zero
+    lpc2[0] = lpc[0] + .8
+    lpc2[1] = lpc[1] + c1 * lpc[0]
+    lpc2[2] = lpc[2] + c1 * lpc[1]
+    lpc2[3] = lpc[3] + c1 * lpc[2]
+    lpc2[4] = c1 * lpc[3]
+    celt_fir5(x_lp, lpc2, x_lp, _len >> 1, mem)
+def xcorr_kernel(x, y, _sum, _len):
+    y_0 = y[0]
+    y_1 = y[1]
+    y_2 = y[2]
+    y = y[3:]
+    for j in range(0, _len - 3, 4):
+        tmp = x[0]
+        y_3 = y[0]
+        x = x[1:]
+        y = y[1:]
+        _sum[0] = _sum[0] + tmp * y_0
+        _sum[1] = _sum[1] + tmp * y_1
+        _sum[2] = _sum[2] + tmp * y_2
+        _sum[3] = _sum[3] + tmp * y_3
+        tmp = x[0]
+        y_0 = y[0]
+        x = x[1:]
+        y = y[1:]
+        _sum[0] = _sum[0] + tmp * y_1
+        _sum[1] = _sum[1] + tmp * y_2
+        _sum[2] = _sum[2] + tmp * y_3
+        _sum[3] = _sum[3] + tmp * y_0
+        tmp = x[0]
+        y_1 = y[0]
+        x = x[1:]
+        y = y[1:]
+        _sum[0] = _sum[0] + tmp * y_2
+        _sum[1] = _sum[1] + tmp * y_3
+        _sum[2] = _sum[2] + tmp * y_0
+        _sum[3] = _sum[3] + tmp * y_1
+        tmp = x[0]
+        y_2 = y[0]
+        x = x[1:]
+        y = y[1:]
+        _sum[0] = _sum[0] + tmp * y_3
+        _sum[1] = _sum[1] + tmp * y_0
+        _sum[2] = _sum[2] + tmp * y_1
+        _sum[3] = _sum[3] + tmp * y_2
+    j += 4
+    if j < _len:
+        tmp = x[0]
+        y_3 = y[0]
+        x = x[1:]
+        y = y[1:]
+        _sum[0] = _sum[0] + tmp * y_0
+        _sum[1] = _sum[1] + tmp * y_1
+        _sum[2] = _sum[2] + tmp * y_2
+        _sum[3] = _sum[3] + tmp * y_3
+    j += 1
+    if j < _len:
+        tmp = x[0]
+        y_0 = y[0]
+        x = x[1:]
+        y = y[1:]
+        _sum[0] = _sum[0] + tmp * y_1
+        _sum[1] = _sum[1] + tmp * y_2
+        _sum[2] = _sum[2] + tmp * y_3
+        _sum[3] = _sum[3] + tmp * y_0
+    j += 1
+    if j < _len:
+        tmp = x[0]
+        y_1 = y[0]
+        _sum[0] = _sum[0] + tmp * y_2
+        _sum[1] = _sum[1] + tmp * y_3
+        _sum[2] = _sum[2] + tmp * y_0
+        _sum[3] = _sum[3] + tmp * y_1
+def dual_inner_prod(x, y01, y02, N):
+    xy01 = xy02 = 0
+    for i in range(N):
+        xy01 = xy01 + x[i] * y01[i]
+        xy02 = xy02 + x[i] * y02[i]
+    return xy01, xy02
+def celt_inner_prod(x, y, N):
+    xy = 0
+    for i in range(N):
+        xy = xy + x[i] * y[i]
+    return xy
+def celt_pitch_xcorr(_x, _y, xcorr, _len, max_pitch):
+    # The EDSP version requires that max_pitch is at least 1, and that _x is 32-bit aligned.
+    # Since it's hard to put asserts in assembly, put them here.
+    for i in range(0, max_pitch - 3, 4):
+        _sum = [0, 0, 0, 0]
+        xcorr_kernel(_x, _y[i:], _sum, _len)
+        xcorr[i] = _sum[0]
+        xcorr[i + 1] = _sum[1]
+        xcorr[i + 2] = _sum[2]
+        xcorr[i + 3] = _sum[3]
+    i += 4
+    # In case max_pitch isn't a multiple of 4, do non-unrolled version.
+    for i in range(i, max_pitch):
+        _sum = celt_inner_prod(_x, _y[i:], _len)
+        xcorr[i] = _sum
+def pitch_search(x_lp, y, _len, max_pitch):
+    best_pitch = [0, 0]
+    lag = _len + max_pitch
+    x_lp4 = [0] * (_len >> 2)
+    y_lp4 = [0] * (lag >> 2)
+    xcorr = [0] * (max_pitch >> 1)
+    # Downsample by 2 again
+    for j in range(_len >> 2):
+        x_lp4[j] = x_lp[2 * j]
+    for j in range(lag >> 2):
+        y_lp4[j] = y[2 * j]
+    # Coarse search with 4x decimation
+    celt_pitch_xcorr(x_lp4, y_lp4, xcorr, _len >> 2, max_pitch >> 2)
+    find_best_pitch(xcorr, y_lp4, _len >> 2, max_pitch >> 2, best_pitch)
+    # Finer search with 2x decimation
+    for i in range(max_pitch >> 1):
+        xcorr[i] = 0
+        if abs(i - 2 * best_pitch[0]) > 2 and abs(i - 2 * best_pitch[1]) > 2:
+            continue
+        _sum = celt_inner_prod(x_lp, y[i:], _len >> 1)
+        xcorr[i] = max(-1, _sum)
+    find_best_pitch(xcorr, y, _len >> 1, max_pitch >> 1, best_pitch)
+    # Refine by pseudo-interpolation
+    offset = 0
+    if 0 < best_pitch[0] < (max_pitch >> 1) - 1:
+        a = xcorr[best_pitch[0] - 1]
+        b = xcorr[best_pitch[0]]
+        c = xcorr[best_pitch[0] + 1]
+        if (c - a) > .7 * (b - a):
+            offset = 1
+        elif (a - c) > .7 * (b - c):
+            offset = -1
+    pitch = 2 * best_pitch[0] - offset
+    return pitch
+def compute_pitch_gain(xy, xx, yy):
+    return xy / math.sqrt(1 + xx * yy)
+second_check = [0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2]
+def remove_doubling(x, maxperiod, minperiod, N, T0_, prev_period, prev_gain):
+    xcorr = [0] * 3
+    minperiod0 = minperiod
+    maxperiod //= 2
+    minperiod //= 2
+    T0_[0] //= 2
+    prev_period //= 2
+    N //= 2
+    x0 = x
+    x = x0[maxperiod:]
+    if T0_[0] >= maxperiod:
+        T0_[0] = maxperiod - 1
+    T = T0 = T0_[0]
+    yy_lookup = [0] * (maxperiod + 1)
+    xx, xy = dual_inner_prod(x, x, x0[maxperiod - T0:], N)
+    yy_lookup[0] = xx
+    yy = xx
+    for i in range(1, maxperiod + 1):
+        yy = yy + (x0[maxperiod - i] * x0[maxperiod - i]) - (x[N - i] * x[N - i])
+        yy_lookup[i] = max(0, yy)
+    yy = yy_lookup[T0]
+    best_xy = xy
+    best_yy = yy
+    g = g0 = compute_pitch_gain(xy, xx, yy)
+    # Look for any pitch at T/k
+    for k in range(2, 15 + 1):
+        T1 = (2 * T0 + k) // (2 * k)
+        if T1 < minperiod:
+            break
+        # Look for another strong correlation at T1b
+        if k == 2:
+            if T1 + T0 > maxperiod:
+                T1b = T0
+            else:
+                T1b = T0 + T1
+        else:
+            T1b = (2 * second_check[k] * T0 + k) // (2 * k);
+        xy, xy2 = dual_inner_prod(x, x0[maxperiod - T1:], x0[maxperiod - T1b:], N)
+        xy = .5 * (xy + xy2)
+        yy = .5 * (yy_lookup[T1] + yy_lookup[T1b])
+        g1 = compute_pitch_gain(xy, xx, yy)
+        if abs(T1 - prev_period) <= 1:
+            cont = prev_gain
+        elif abs(T1 - prev_period) <= 2 and 5 * k * k < T0:
+            cont = .5 * prev_gain
+        else:
+            cont = 0
+        thresh = max(.3, (.7 * g0) - cont)
+        # Bias against very high pitch (very short period) to avoid false-positives
+        # due to short-term correlation
+        if T1 < 3 * minperiod:
+            thresh = max(.4, (.85 * g0) - cont)
+        elif T1 < 2 * minperiod:
+            thresh = max(.5, (.9 * g0) - cont)
+        if g1 > thresh:
+            best_xy = xy
+            best_yy = yy
+            T = T1
+            g = g1
+    best_xy = max(0, best_xy)
+    if best_yy <= best_xy:
+        pg = 1.
+    else:
+        pg = best_xy / (best_yy + 1)
+    for k in range(3):
+        xcorr[k] = celt_inner_prod(x, x0[maxperiod - (T + k - 1):], N)
+    if xcorr[2] - xcorr[0] > .7 * (xcorr[1] - xcorr[0]):
+        offset = 1
+    elif xcorr[0] - xcorr[2] > .7 * (xcorr[1] - xcorr[2]):
+        offset = -1
+    else:
+        offset = 0
+    if pg > g:
+        pg = g
+    T0_[0] = 2 * T + offset
+    if T0_[0] < minperiod0:
+        T0_[0] = minperiod0
+    return pg

models/ailia-models/code/rnnoise.py ADDED Viewed

	@@ -0,0 +1,521 @@

+import sys
+import math
+import wave
+import struct
+from logging import getLogger
+import numpy as np
+from tqdm import tqdm
+import ailia
+# import original modules
+sys.path.append('../../util')
+from arg_utils import get_base_parser, update_parser, get_savepath  # noqa
+from model_utils import check_and_download_models  # noqa
+from kiss_fft import Complex, opus_fft_alloc_twiddles, opus_fft
+from pitch import pitch_downsample, pitch_search, remove_doubling
+logger = getLogger(__name__)
+# ======================
+# Parameters
+# ======================
+WEIGHT_PATH = 'rnn_model.onnx'
+MODEL_PATH = 'rnn_model.onnx.prototxt'
+REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/rnnoise/'
+AUDIO_PATH = 'babble_15dB.wav'
+OUTPUT_PATH = 'denoised.wav'
+PITCH_MIN_PERIOD = 60
+PITCH_MAX_PERIOD = 768
+PITCH_FRAME_SIZE = 960
+PITCH_BUF_SIZE = PITCH_MAX_PERIOD + PITCH_FRAME_SIZE
+NB_BANDS = 22
+CEPS_MEM = 8
+NB_DELTA_CEPS = 6
+NB_FEATURES = NB_BANDS + 3 * NB_DELTA_CEPS + 2
+FRAME_SIZE_SHIFT = 2
+FRAME_SIZE = 120 << FRAME_SIZE_SHIFT
+WINDOW_SIZE = 2 * FRAME_SIZE
+FREQ_SIZE = FRAME_SIZE + 1
+# ======================
+# Arguemnt Parser Config
+# ======================
+parser = get_base_parser(
+    'rnnoise', AUDIO_PATH, OUTPUT_PATH
+)
+parser.add_argument(
+    '--onnx',
+    action='store_true',
+    help='execute onnxruntime version.'
+)
+args = update_parser(parser)
+# ======================
+# Secondaty Functions
+# ======================
+class CommonState:
+    init = False
+    kfft = None
+    half_window = np.zeros(FRAME_SIZE)
+    dct_table = np.zeros(NB_BANDS * NB_BANDS)
+common = CommonState()
+class DenoiseState:
+    analysis_mem = np.zeros(FRAME_SIZE)
+    cepstral_mem = np.zeros((CEPS_MEM, NB_BANDS))
+    memid = 0
+    synthesis_mem = np.zeros(FRAME_SIZE)
+    pitch_buf = np.zeros(PITCH_BUF_SIZE)
+    pitch_enh_buf = np.zeros(PITCH_BUF_SIZE)
+    last_gain = 0.0
+    last_period = 0
+    mem_hp_x = np.zeros(2)
+def compute_band_energy(bandE, X):
+    eband5ms = [
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
+    ]
+    _sum = [0] * NB_BANDS
+    for i in range(NB_BANDS - 1):
+        band_size = (eband5ms[i + 1] - eband5ms[i]) << FRAME_SIZE_SHIFT
+        for j in range(band_size):
+            frac = j / band_size
+            tmp = X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].r ** 2
+            tmp += X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].i ** 2
+            _sum[i] += (1 - frac) * tmp
+            _sum[i + 1] += frac * tmp
+    _sum[0] *= 2
+    _sum[NB_BANDS - 1] *= 2
+    for i in range(NB_BANDS):
+        bandE[i] = _sum[i]
+eband5ms = [
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
+]
+def compute_band_corr(bandE, X, P):
+    _sum = [0] * NB_BANDS
+    for i in range(NB_BANDS - 1):
+        band_size = (eband5ms[i + 1] - eband5ms[i]) << FRAME_SIZE_SHIFT
+        for j in range(band_size):
+            frac = j / band_size
+            tmp = X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].r
+            tmp += X[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i] << FRAME_SIZE_SHIFT) + j].i
+            _sum[i] += (1 - frac) * tmp
+            _sum[i + 1] += frac * tmp
+    _sum[0] *= 2
+    _sum[NB_BANDS - 1] *= 2
+    for i in range(NB_BANDS):
+        bandE[i] = _sum[i]
+def interp_band_gain(g, bandE):
+    g[...] = 0
+    for i in range(NB_BANDS - 1):
+        band_size = (eband5ms[i + 1] - eband5ms[i]) << FRAME_SIZE_SHIFT
+        for j in range(band_size):
+            frac = j / band_size
+            g[(eband5ms[i] << FRAME_SIZE_SHIFT) + j] = (1 - frac) * bandE[i] + frac * bandE[i + 1]
+def check_init():
+    if common.init:
+        return
+    common.kfft = opus_fft_alloc_twiddles(2 * FRAME_SIZE)
+    for i in range(FRAME_SIZE):
+        common.half_window[i] = math.sin(
+            .5 * math.pi * math.sin(.5 * math.pi * (i + .5) / FRAME_SIZE)
+            * math.sin(.5 * math.pi * (i + .5) / FRAME_SIZE)
+        )
+    for i in range(NB_BANDS):
+        for j in range(NB_BANDS):
+            common.dct_table[i * NB_BANDS + j] = math.cos((i + .5) * j * math.pi / NB_BANDS)
+            if j == 0:
+                common.dct_table[i * NB_BANDS + j] *= math.sqrt(.5)
+    common.init = True
+def dct(out, in_data):
+    check_init()
+    for i in range(NB_BANDS):
+        _sum = 0
+        for j in range(NB_BANDS):
+            _sum += in_data[j] * common.dct_table[j * NB_BANDS + i]
+        out[i] = _sum * math.sqrt(2. / 22)
+    return out
+def forward_transform(out, in_data):
+    check_init()
+    x = [Complex() for _ in range(WINDOW_SIZE)]
+    y = [Complex() for _ in range(WINDOW_SIZE)]
+    for i in range(WINDOW_SIZE):
+        x[i].r = in_data[i]
+        x[i].i = 0
+    opus_fft(common.kfft, x, y)
+    for i in range(FREQ_SIZE):
+        out[i] = y[i]
+def inverse_transform(out, in_data):
+    check_init()
+    x = [Complex() for _ in range(WINDOW_SIZE)]
+    y = [Complex() for _ in range(WINDOW_SIZE)]
+    for i in range(FREQ_SIZE):
+        x[i] = in_data[i]
+    for i in range(i + 1, WINDOW_SIZE):
+        x[i].r = x[WINDOW_SIZE - i].r
+        x[i].i = -x[WINDOW_SIZE - i].i
+    opus_fft(common.kfft, x, y)
+    # output in reverse order for IFFT.
+    out[0] = WINDOW_SIZE * y[0].r
+    for i in range(1, WINDOW_SIZE):
+        out[i] = WINDOW_SIZE * y[WINDOW_SIZE - i].r
+def apply_window(x):
+    check_init()
+    for i in range(FRAME_SIZE):
+        x[i] *= common.half_window[i]
+        x[WINDOW_SIZE - 1 - i] *= common.half_window[i]
+def frame_analysis(st, X, Ex, in_data):
+    x = np.zeros(WINDOW_SIZE)
+    x[:FRAME_SIZE] = st.analysis_mem
+    x[FRAME_SIZE:] = in_data
+    st.analysis_mem[...] = in_data
+    apply_window(x)
+    forward_transform(X, x)
+    compute_band_energy(Ex, X)
+def compute_frame_features(st, X, P, Ex, Ep, Exp, features, x):
+    E = 0
+    spec_variability = 0
+    Ly = np.zeros(NB_BANDS)
+    p = np.zeros(WINDOW_SIZE)
+    pitch_buf = np.zeros(PITCH_BUF_SIZE >> 1)
+    tmp = np.zeros(NB_BANDS)
+    frame_analysis(st, X, Ex, x)
+    st.pitch_buf[:PITCH_BUF_SIZE - FRAME_SIZE] = st.pitch_buf[FRAME_SIZE:]
+    st.pitch_buf[PITCH_BUF_SIZE - FRAME_SIZE:] = x
+    pre = [st.pitch_buf]
+    pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1)
+    pitch_index = pitch_search(
+        pitch_buf[PITCH_MAX_PERIOD >> 1:], pitch_buf, PITCH_FRAME_SIZE,
+        PITCH_MAX_PERIOD - 3 * PITCH_MIN_PERIOD)
+    pitch_index = PITCH_MAX_PERIOD - pitch_index
+    p_pitch_index = [pitch_index]
+    gain = remove_doubling(
+        pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
+        PITCH_FRAME_SIZE, p_pitch_index, st.last_period, st.last_gain)
+    st.last_period = pitch_index = p_pitch_index[0]
+    st.last_gain = gain
+    for i in range(WINDOW_SIZE):
+        p[i] = st.pitch_buf[PITCH_BUF_SIZE - WINDOW_SIZE - pitch_index + i]
+    apply_window(p)
+    forward_transform(P, p)
+    compute_band_energy(Ep, P)
+    compute_band_corr(Exp, X, P)
+    for i in range(NB_BANDS):
+        Exp[i] = Exp[i] / math.sqrt(.001 + Ex[i] * Ep[i])
+    dct(tmp, Exp)
+    for i in range(NB_DELTA_CEPS):
+        features[NB_BANDS + 2 * NB_DELTA_CEPS + i] = tmp[i]
+    features[NB_BANDS + 2 * NB_DELTA_CEPS] -= 1.3
+    features[NB_BANDS + 2 * NB_DELTA_CEPS + 1] -= 0.9
+    features[NB_BANDS + 3 * NB_DELTA_CEPS] = .01 * (pitch_index - 300)
+    logMax = -2
+    follow = -2
+    for i in range(NB_BANDS):
+        Ly[i] = math.log10(1e-2 + Ex[i])
+        Ly[i] = max(logMax - 7, max(follow - 1.5, Ly[i]))
+        logMax = max(logMax, Ly[i])
+        follow = max(follow - 1.5, Ly[i])
+        E += Ex[i]
+    if E < 0.04:
+        # If there's no audio, avoid messing up the state.
+        features[...] = 0
+        return 1
+    dct(features, Ly)
+    features[0] -= 12
+    features[1] -= 4
+    ceps_0 = st.cepstral_mem[st.memid]
+    ceps_1 = st.cepstral_mem[CEPS_MEM + st.memid - 1] \
+        if st.memid < 1 else st.cepstral_mem[st.memid - 1]
+    ceps_2 = st.cepstral_mem[CEPS_MEM + st.memid - 2] \
+        if st.memid < 2 else st.cepstral_mem[st.memid - 2]
+    for i in range(NB_BANDS):
+        ceps_0[i] = features[i]
+    st.memid += 1
+    for i in range(NB_DELTA_CEPS):
+        features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i]
+        features[NB_BANDS + i] = ceps_0[i] - ceps_2[i]
+        features[NB_BANDS + NB_DELTA_CEPS + i] = ceps_0[i] - 2 * ceps_1[i] + ceps_2[i]
+    # Spectral variability features.
+    if st.memid == CEPS_MEM:
+        st.memid = 0
+    for i in range(CEPS_MEM):
+        mindist = 1e15
+        for j in range(CEPS_MEM):
+            dist = 0.
+            for k in range(NB_BANDS):
+                tmp = st.cepstral_mem[i][k] - st.cepstral_mem[j][k]
+                dist += tmp * tmp
+            if j != i:
+                mindist = min(mindist, dist)
+        spec_variability += mindist
+    features[NB_BANDS + 3 * NB_DELTA_CEPS + 1] = spec_variability / CEPS_MEM - 2.1
+    return E < 0.1
+def frame_synthesis(st, out, y):
+    x = np.zeros(WINDOW_SIZE)
+    inverse_transform(x, y)
+    apply_window(x)
+    for i in range(FRAME_SIZE):
+        out[i] = x[i] + st.synthesis_mem[i]
+    st.synthesis_mem[...] = x[FRAME_SIZE:]
+def biquad(y, mem, x, b, a, N):
+    for i in range(N):
+        xi = x[i]
+        yi = x[i] + mem[0]
+        mem[0] = mem[1] + (b[0] * xi - a[0] * yi)
+        mem[1] = b[1] * xi - a[1] * yi
+        y[i] = yi
+def pitch_filter(X, P, Ex, Ep, Exp, g):
+    r = np.zeros(NB_BANDS)
+    rf = np.zeros(FREQ_SIZE)
+    for i in range(NB_BANDS):
+        if Exp[i] > g[i]:
+            r[i] = 1
+        else:
+            r[i] = Exp[i] ** 2 * (1 - g[i] ** 2) / (.001 + (g[i] ** 2) * (1 - Exp[i] ** 2))
+        r[i] = math.sqrt(min(1, max(0, r[i])))
+        r[i] *= math.sqrt(Ex[i] / (1e-8 + Ep[i]))
+    interp_band_gain(rf, r)
+    for i in range(FREQ_SIZE):
+        X[i].r += rf[i] * P[i].r
+        X[i].i += rf[i] * P[i].i
+    newE = np.zeros(NB_BANDS)
+    compute_band_energy(newE, X)
+    norm = np.zeros(NB_BANDS)
+    normf = np.zeros(FREQ_SIZE)
+    for i in range(NB_BANDS):
+        norm[i] = math.sqrt(Ex[i] / (1e-8 + newE[i]))
+    interp_band_gain(normf, norm)
+    for i in range(FREQ_SIZE):
+        X[i].r *= normf[i]
+        X[i].i *= normf[i]
+# ======================
+# Main functions
+# ======================
+def preprocess(st, data):
+    X = [Complex() for _ in range(FREQ_SIZE)]
+    P = [Complex() for _ in range(WINDOW_SIZE)]
+    x = np.zeros(FRAME_SIZE)
+    Ex = np.zeros(NB_BANDS)
+    Ep = np.zeros(NB_BANDS)
+    Exp = np.zeros(NB_BANDS)
+    features = np.zeros(NB_FEATURES)
+    a_hp = (-1.99599, 0.99600)
+    b_hp = (-2., 1.)
+    biquad(x, st.mem_hp_x, data, b_hp, a_hp, FRAME_SIZE)
+    compute_frame_features(st, X, P, Ex, Ep, Exp, features, x)
+    return X, P, Ex, Ep, Exp, features
+def postprocess(st, pp, gains, vad_prob):
+    outputs = []
+    for p, g, prob in zip(pp, gains, vad_prob):
+        X = p["X"]
+        P = p["P"]
+        Ex = p["Ex"]
+        Ep = p["Ep"]
+        Exp = p["Exp"]
+        gf = np.ones(FREQ_SIZE)
+        pitch_filter(X, P, Ex, Ep, Exp, g)
+        interp_band_gain(gf, g)
+        for i in range(FREQ_SIZE):
+            X[i].r *= gf[i]
+            X[i].i *= gf[i]
+        out = np.zeros(FRAME_SIZE)
+        frame_synthesis(st, out, X)
+        outputs.append(out)
+    return outputs
+def rnnoise_process_frame(net, x):
+    x = np.array(x, dtype=np.float32)
+    if x.shape[0] < 100:
+        x = np.concatenate([
+            x,
+            np.zeros((100 - x.shape[0], NB_FEATURES), dtype=np.float32)
+        ])
+    x = np.expand_dims(x, axis=0)
+    # feedforward
+    if not args.onnx:
+        output = net.predict([x])
+    else:
+        output = net.run(None, {'main_input:0': x})
+    gains, vad_prob = output
+    return gains[0], vad_prob[0]
+def recognize_from_audio(net):
+    wav_path = args.input[0]
+    logger.info(wav_path)
+    logger.info('Start inference...')
+    wf = wave.open(wav_path, "rb")
+    save_path = get_savepath(args.savepath, wav_path, ext='.wav')
+    wf_out = wave.open(save_path, "wb")
+    wf_out.setnchannels(1)
+    wf_out.setsampwidth(16 // 8)
+    wf_out.setframerate(48000)
+    pp = []
+    st = DenoiseState()
+    bar = tqdm(total=wf.getnframes())
+    while True:
+        buf = wf.readframes(FRAME_SIZE)
+        if not buf:
+            break
+        data = np.frombuffer(buf, dtype=np.int16)
+        X, P, Ex, Ep, Exp, feat = preprocess(st, data)
+        pp.append(dict(
+            X=X,
+            P=P,
+            Ex=Ex,
+            Ep=Ep,
+            Exp=Exp,
+            feat=feat
+        ))
+        if len(pp) == 100:
+            x = [p["feat"] for p in pp]
+            gains, vad_prob = rnnoise_process_frame(net, x)
+            outputs = postprocess(st, pp, gains, vad_prob)
+            pp.clear()
+            for out in outputs:
+                out = np.array(out, dtype=int)
+                out = np.clip(out, (-0x7fff - 1), 0x7fff)
+                out = struct.pack("h" * len(out), *out)
+                wf_out.writeframes(out)
+        bar.update(len(data))
+    if 0 < len(pp):
+        x = [p["feat"] for p in pp]
+        gains, vad_prob = rnnoise_process_frame(net, x)
+        outputs = postprocess(st, pp, gains, vad_prob)
+        for out in outputs:
+            out = np.array(out, dtype=int)
+            out = np.clip(out, (-0x7fff - 1), 0x7fff)
+            out = struct.pack("h" * len(out), *out)
+            wf_out.writeframes(out)
+    bar.close()
+    wf_out.close()
+    logger.info(f'saved at : {save_path}')
+    logger.info('Script finished successfully.')
+def main():
+    # model files check and download
+    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)
+    env_id = args.env_id
+    # initialize
+    if not args.onnx:
+        net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
+    else:
+        import onnxruntime
+        cuda = 0 < ailia.get_gpu_environment_id()
+        providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
+        net = onnxruntime.InferenceSession(WEIGHT_PATH, providers=providers)
+    recognize_from_audio(net)
+if __name__ == '__main__':
+    main()

models/ailia-models/rnn_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22123b85825b413e7c97de8334c6b609f0c6aa1cd5290ab672bb1b85bae20403
+size 1020606

models/ailia-models/rnn_model.onnx.prototxt ADDED Viewed

The diff for this file is too large to render. See raw diff

models/ailia-models/source.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+https://github.com/axinc-ai/ailia-models/tree/master/audio_processing/rnnoise
+https://storage.googleapis.com/ailia-models/rnnoise/rnn_model.onnx
+https://storage.googleapis.com/ailia-models/rnnoise/rnn_model.onnx.prototxt