Spaces:
Runtime error
Runtime error
| import torch | |
| import soundfile as sf | |
| import gradio as gr | |
| import spaces | |
| from clearvoice import ClearVoice | |
| import os | |
| import random | |
| def fn_clearvoice_sr(input_wav, apply_se): | |
| wavname = input_wav.split('/')[-1] | |
| myClearVoice = ClearVoice(task='speech_super_resolution', model_names=['MossFormer2_SR_48K']) | |
| fs = 48000 | |
| if apply_se: | |
| new_wavname = wavname.replace('.wav', str(random.randint(0,1000))+'.wav') | |
| myClearVoice_se = ClearVoice(task='speech_enhancement', model_names=['MossFormer2_SE_48K']) | |
| output_wav_dict = myClearVoice_se(input_path=input_wav, online_write=True, output_path=new_wavname) | |
| input_wav = new_wavname | |
| output_wav_dict = myClearVoice(input_path=input_wav, online_write=False) | |
| if isinstance(output_wav_dict, dict): | |
| key = next(iter(output_wav_dict)) | |
| output_wav = output_wav_dict[key] | |
| else: | |
| output_wav = output_wav_dict | |
| sf.write('enhanced_high_res.wav', output_wav[0,:], fs) | |
| return 'enhanced_high_res.wav' | |
| demo = gr.Blocks() | |
| sr_demo = gr.Interface( | |
| fn=fn_clearvoice_sr, | |
| inputs = [ | |
| gr.Audio(label="Input Audio", type="filepath"), | |
| gr.Checkbox(label="Apply Speech Enhancement", value=True), | |
| ], | |
| outputs = [ | |
| gr.Audio(label="Output Audio", type="filepath"), | |
| ], | |
| title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Super Resolution", | |
| description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and transform low-resolution audio (effective sampling rate ≥ 16 kHz) into crystal-clear, high-resolution audio at 48 kHz. It supports most of audio types. " | |
| "To try it, simply upload your audio, or click one of the examples. "), | |
| article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>" | |
| "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"), | |
| examples = [ | |
| ["examples/mandarin_speech_16kHz.wav", True], | |
| ["examples/LJSpeech-001-0001-22k.wav", True], | |
| ["examples/LibriTTS_986_129388_24k.wav", True], | |
| ["examples/english_speech_48kHz.wav", True], | |
| ], | |
| cache_examples = True, | |
| ) | |
| with demo: | |
| gr.TabbedInterface([sr_demo], ["Task 4: Speech Super Resolution"]) | |
| demo.launch() |