SaoYear
commited on
Commit
·
651ebfd
1
Parent(s):
8521c95
+Small models
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ def install_mamba():
|
|
| 14 |
subprocess.run(shlex.split("pip install https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.4.0/causal_conv1d-1.4.0+cu122torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"))
|
| 15 |
subprocess.run(shlex.split("pip install https://github.com/state-spaces/mamba/releases/download/v1.2.0.post1/mamba_ssm-1.2.0.post1+cu122torch2.2cxx11abiTRUE-cp310-cp310-linux_x86_64.whl"))
|
| 16 |
|
| 17 |
-
|
| 18 |
|
| 19 |
import torch
|
| 20 |
import spaces
|
|
@@ -30,11 +30,11 @@ from model.vocos.pretrained import Vocos
|
|
| 30 |
from model.stft import InputSTFT, TargetMel
|
| 31 |
|
| 32 |
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
| 33 |
-
|
| 34 |
def read_audio(file_path):
|
| 35 |
audio, sample_rate = sf.read(file_path)
|
| 36 |
if audio.ndim > 1:
|
| 37 |
-
|
|
|
|
| 38 |
if sample_rate != 16000:
|
| 39 |
audio = lb.resample(audio, orig_sr=sample_rate, target_sr=16000)
|
| 40 |
sample_rate = 16000
|
|
@@ -178,7 +178,9 @@ def reset_everything():
|
|
| 178 |
demo = gr.Blocks()
|
| 179 |
with gr.Blocks(title="CleanMel Demo") as demo:
|
| 180 |
gr.Markdown("## CleanMel Demo")
|
| 181 |
-
gr.Markdown("This demo showcases the CleanMel model for speech enhancement.
|
|
|
|
|
|
|
| 182 |
|
| 183 |
with gr.Row():
|
| 184 |
audio_input = gr.Audio(label="Input Audio", type="filepath", sources="upload")
|
|
|
|
| 14 |
subprocess.run(shlex.split("pip install https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.4.0/causal_conv1d-1.4.0+cu122torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"))
|
| 15 |
subprocess.run(shlex.split("pip install https://github.com/state-spaces/mamba/releases/download/v1.2.0.post1/mamba_ssm-1.2.0.post1+cu122torch2.2cxx11abiTRUE-cp310-cp310-linux_x86_64.whl"))
|
| 16 |
|
| 17 |
+
install_mamba()
|
| 18 |
|
| 19 |
import torch
|
| 20 |
import spaces
|
|
|
|
| 30 |
from model.stft import InputSTFT, TargetMel
|
| 31 |
|
| 32 |
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
|
|
|
| 33 |
def read_audio(file_path):
|
| 34 |
audio, sample_rate = sf.read(file_path)
|
| 35 |
if audio.ndim > 1:
|
| 36 |
+
# select the loudest channel if stereo
|
| 37 |
+
audio = audio[:, np.argmax(np.abs(audio).mean(axis=0))]
|
| 38 |
if sample_rate != 16000:
|
| 39 |
audio = lb.resample(audio, orig_sr=sample_rate, target_sr=16000)
|
| 40 |
sample_rate = 16000
|
|
|
|
| 178 |
demo = gr.Blocks()
|
| 179 |
with gr.Blocks(title="CleanMel Demo") as demo:
|
| 180 |
gr.Markdown("## CleanMel Demo")
|
| 181 |
+
gr.Markdown("This demo showcases the CleanMel model for speech enhancement. \n \
|
| 182 |
+
Since the model is running on CPU, it may take a while to process the audio. \n \
|
| 183 |
+
Please be patient and wait for the result. \n")
|
| 184 |
|
| 185 |
with gr.Row():
|
| 186 |
audio_input = gr.Audio(label="Input Audio", type="filepath", sources="upload")
|
ckpts/CleanMel/offline_CleanMel_S_map.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b76eab8ff6944d7a3402901d57ef0bf1b7ef8e22e34849457ceaee20c37d35e4
|
| 3 |
+
size 10101102
|
ckpts/CleanMel/offline_CleanMel_S_mask.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2209e32bc37cba6c901ec3f04a31b0d12f60074b34816edca4bdb5d58ce33a72
|
| 3 |
+
size 10101704
|