Spaces:

karim23657
/

resemble-denoise-onnx

Runtime error

App Files Files Community

karim23657 commited on Dec 12, 2025

Commit

8c8f185

verified ·

1 Parent(s): f2166a4

Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

.gitattributes +1 -0
.gitignore +9 -0
LICENSE +21 -0
README.md +7 -5
app.py +92 -0
denoiser.onnx +3 -0
denoiser.py +75 -0
denoiser_output.wav +3 -0
packages.txt +1 -0
pyproject.toml +6 -0
requirements.txt +6 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+denoiser_output.wav filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+/data
+/runs
+/scripts
+/dist
+/build
+/*.egg-info
+/flagged
+version.py
+__pycache__

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Resemble AI
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,15 @@
 ---
-title: Resemble Denoise Onnx
 emoji: 🚀
 colorFrom: red
-colorTo: gray
 sdk: gradio
-sdk_version: 6.1.0
 app_file: app.py
 pinned: false
-short_description: Denoise audio files on cpu
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Resemble Enhance
 emoji: 🚀
 colorFrom: red
+colorTo: pink
 sdk: gradio
+sdk_version: 4.8.0
 app_file: app.py
 pinned: false
+license: mit
 ---
+# Resemble Enhance
+Resemble Enhance is an AI-powered tool that aims to improve the overall quality of speech by performing denoising and enhancement. It consists of two modules: a denoiser, which separates speech from a noisy audio, and an enhancer, which further boosts the perceptual audio quality by restoring audio distortions and extending the audio bandwidth. The two models are trained on high-quality 44.1kHz speech data that guarantees the enhancement of your speech with high quality.

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import argparse
+from functools import partial
+import gradio as gr
+import time
+import numpy as np
+from denoiser import run
+import onnxruntime
+import librosa
+import scipy
+opts = onnxruntime.SessionOptions()
+opts.inter_op_num_threads = 4
+opts.intra_op_num_threads = 4
+opts.log_severity_level = 4
+session = onnxruntime.InferenceSession(
+    'denoiser.onnx',
+    providers=["CPUExecutionProvider"],
+    #providers=["ROCMExecutionProvider"],
+    #providers=["DnnlExecutionProvider"],
+    sess_options=opts,
+)
+def _fn(path, solver, nfe, tau, denoising, unlimited):
+    if path is None:
+        gr.Warning("Please upload an audio file.")
+        return None, None
+    wav, sr = librosa.load(path, mono=True)
+    start = time.time()
+    wav_onnx, new_sr = run(session, wav, sr, batch_process_chunks=False)
+    print(f'Ran in {time.time() - start}s')
+    # scipy.io.wavfile.write('denoiser_output.wav', new_sr, wav_onnx)
+    wav1 = wav1.cpu().numpy()
+    wav2 = wav2.cpu().numpy()
+    return (new_sr, wav_onnx)
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--unlimited", action="store_true")
+    args = parser.parse_args()
+    inputs: list = [
+        gr.Audio(type="filepath", label="Input Audio"),
+        gr.Dropdown(
+            choices=["Midpoint", "RK4", "Euler"],
+            value="Midpoint",
+            label="CFM ODE Solver (Midpoint is recommended)",
+        ),
+        gr.Slider(
+            minimum=1,
+            maximum=256,
+            value=64,
+            step=1,
+            label="CFM Number of Function Evaluations (higher values in general yield better quality but may be slower)",
+        ),
+        gr.Slider(
+            minimum=0,
+            maximum=1,
+            value=0.5,
+            step=0.01,
+            label="CFM Prior Temperature (higher values can improve quality but can reduce stability)",
+        ),
+        gr.Checkbox(
+            value=False,
+            label="Denoise Before Enhancement (tick if your audio contains heavy background noise)",
+        ),
+    ]
+    outputs: list = [
+        gr.Audio(label="Output Denoised Audio"),
+        # gr.Audio(label="Output Enhanced Audio"),
+    ]
+    interface = gr.Interface(
+        fn=partial(_fn, unlimited=args.unlimited),
+        title="Resemble Enhance",
+        description="AI-driven audio enhancement for your audio files, powered by Resemble AI.",
+        inputs=inputs,
+        outputs=outputs,
+    )
+    interface.launch()
+if __name__ == "__main__":
+    main()

denoiser.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:490640369540d1b0948352b75f880e215863a0de0b95a4b621ef590ee0e04e77
+size 42661638

denoiser.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import numpy as np
+from librosa import stft, istft
+from resampy.core import resample
+stft_hop_length = 420
+win_length = n_fft = 4 * stft_hop_length
+def _stft(x):
+    s = stft(x, window='hann', win_length=win_length, n_fft=n_fft, hop_length=stft_hop_length,
+             center=True, pad_mode='reflect')
+    s = s[..., :-1]
+    mag = np.abs(s)
+    phi = np.angle(s)
+    cos = np.cos(phi)
+    sin = np.sin(phi)
+    return mag, cos, sin
+def _istft(mag: np.array, cos: np.array, sin: np.array):
+    real = mag * cos
+    imag = mag * sin
+    s = real + imag * 1.0j
+    s = np.pad(s, ((0, 0), (0, 0), (0, 1)), mode='edge')
+    x = istft(s, window='hann', win_length=win_length, hop_length=stft_hop_length, n_fft=n_fft)
+    return x
+def model(onnx_session, wav: np.array) -> np.array:
+    padded_wav = np.pad(wav, ((0,0), (0, 441)))
+    mag, cos, sin = _stft(padded_wav)  # (b nfft/2 t)
+    ort_inputs = {
+        "mag": mag,
+        "cos": cos,
+        "sin": sin,
+    }
+    sep_mag, sep_cos, sep_sin = onnx_session.run(None, ort_inputs)
+    o = _istft(sep_mag, sep_cos, sep_sin)
+    o = o[:wav.shape[-1]]
+    return o
+def run(onnx_session, wav: np.array, sample_rate: int, batch_process_chunks = False) -> np.array:
+    assert wav.ndim == 1, 'Input should be 1D (mono) wav'
+    if sample_rate != 44_100:
+        wav = resample(wav, sample_rate, 44_100, filter='kaiser_best', parallel=True)
+    chunk_length = int(sample_rate * 30)
+    #overlap_length = int(sr * overlap_seconds)
+    hop_length = chunk_length # - overlap_length
+    num_chunks = 1 + (wav.shape[-1] - 1) // hop_length
+    n_pad = (num_chunks - wav.shape[-1] % num_chunks) % num_chunks
+    wav = np.pad(wav, (0, n_pad))
+    chunks = np.reshape(wav, (num_chunks, -1))
+    abs_max = np.clip(np.max(np.abs(chunks), axis = -1, keepdims = True), a_min=1e-7, a_max=None)
+    chunks /= abs_max
+    if batch_process_chunks:
+        res_chunks = model(onnx_session, chunks)
+    else:
+        res_chunks = np.array([model(onnx_session, c[None]) for c in chunks]).squeeze(axis=1)
+    res_chunks *= abs_max
+    res = np.reshape(res_chunks, (-1))
+    return res[:wav.shape[-1]], 44_100

denoiser_output.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0124974f9ddd0d806bf78647c2101b1a205684288154829642146069cb069367
+size 3496474

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ libsox-dev

pyproject.toml ADDED Viewed

	@@ -0,0 +1,6 @@

+[tool.black]
+line-length = 120
+target-version = ['py310']
+[tool.isort]
+line_length = 120

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+numpy
+scipy
+librosa
+resampy
+onnxruntime