SaoYear commited on
Commit
651ebfd
·
1 Parent(s): 8521c95

+Small models

Browse files
app.py CHANGED
@@ -14,7 +14,7 @@ def install_mamba():
14
  subprocess.run(shlex.split("pip install https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.4.0/causal_conv1d-1.4.0+cu122torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"))
15
  subprocess.run(shlex.split("pip install https://github.com/state-spaces/mamba/releases/download/v1.2.0.post1/mamba_ssm-1.2.0.post1+cu122torch2.2cxx11abiTRUE-cp310-cp310-linux_x86_64.whl"))
16
 
17
- # install_mamba()
18
 
19
  import torch
20
  import spaces
@@ -30,11 +30,11 @@ from model.vocos.pretrained import Vocos
30
  from model.stft import InputSTFT, TargetMel
31
 
32
  DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
33
-
34
  def read_audio(file_path):
35
  audio, sample_rate = sf.read(file_path)
36
  if audio.ndim > 1:
37
- audio = audio[:, 0]
 
38
  if sample_rate != 16000:
39
  audio = lb.resample(audio, orig_sr=sample_rate, target_sr=16000)
40
  sample_rate = 16000
@@ -178,7 +178,9 @@ def reset_everything():
178
  demo = gr.Blocks()
179
  with gr.Blocks(title="CleanMel Demo") as demo:
180
  gr.Markdown("## CleanMel Demo")
181
- gr.Markdown("This demo showcases the CleanMel model for speech enhancement.")
 
 
182
 
183
  with gr.Row():
184
  audio_input = gr.Audio(label="Input Audio", type="filepath", sources="upload")
 
14
  subprocess.run(shlex.split("pip install https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.4.0/causal_conv1d-1.4.0+cu122torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"))
15
  subprocess.run(shlex.split("pip install https://github.com/state-spaces/mamba/releases/download/v1.2.0.post1/mamba_ssm-1.2.0.post1+cu122torch2.2cxx11abiTRUE-cp310-cp310-linux_x86_64.whl"))
16
 
17
+ install_mamba()
18
 
19
  import torch
20
  import spaces
 
30
  from model.stft import InputSTFT, TargetMel
31
 
32
  DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 
33
  def read_audio(file_path):
34
  audio, sample_rate = sf.read(file_path)
35
  if audio.ndim > 1:
36
+ # select the loudest channel if stereo
37
+ audio = audio[:, np.argmax(np.abs(audio).mean(axis=0))]
38
  if sample_rate != 16000:
39
  audio = lb.resample(audio, orig_sr=sample_rate, target_sr=16000)
40
  sample_rate = 16000
 
178
  demo = gr.Blocks()
179
  with gr.Blocks(title="CleanMel Demo") as demo:
180
  gr.Markdown("## CleanMel Demo")
181
+ gr.Markdown("This demo showcases the CleanMel model for speech enhancement. \n \
182
+ Since the model is running on CPU, it may take a while to process the audio. \n \
183
+ Please be patient and wait for the result. \n")
184
 
185
  with gr.Row():
186
  audio_input = gr.Audio(label="Input Audio", type="filepath", sources="upload")
ckpts/CleanMel/offline_CleanMel_S_map.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b76eab8ff6944d7a3402901d57ef0bf1b7ef8e22e34849457ceaee20c37d35e4
3
+ size 10101102
ckpts/CleanMel/offline_CleanMel_S_mask.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2209e32bc37cba6c901ec3f04a31b0d12f60074b34816edca4bdb5d58ce33a72
3
+ size 10101704