ClearVoice-SR

Runtime error

App Files Files Community

ClearVoice-SR / app.py

alibabasglab

Update app.py

ed2aa07 verified 11 months ago

raw

history blame

2.67 kB

	import torch
	import soundfile as sf
	import gradio as gr
	import spaces
	from clearvoice import ClearVoice
	import os
	import random

	@spaces.GPU
	def fn_clearvoice_sr(input_wav, apply_se):
	wavname = input_wav.split('/')[-1]
	myClearVoice = ClearVoice(task='speech_super_resolution', model_names=['MossFormer2_SR_48K'])
	fs = 48000
	if apply_se:
	new_wavname = wavname.replace('.wav', str(random.randint(0,1000))+'.wav')
	myClearVoice_se = ClearVoice(task='speech_enhancement', model_names=['MossFormer2_SE_48K'])
	output_wav_dict = myClearVoice_se(input_path=input_wav, online_write=True, output_path=new_wavname)
	input_wav = new_wavname

	output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
	if isinstance(output_wav_dict, dict):
	key = next(iter(output_wav_dict))
	output_wav = output_wav_dict[key]
	else:
	output_wav = output_wav_dict
	sf.write('enhanced_high_res.wav', output_wav[0,:], fs)
	return 'enhanced_high_res.wav'

	demo = gr.Blocks()

	sr_demo = gr.Interface(
	fn=fn_clearvoice_sr,
	inputs = [
	gr.Audio(label="Input Audio", type="filepath"),
	gr.Checkbox(label="Apply Speech Enhancement", value=True),
	],
	outputs = [
	gr.Audio(label="Output Audio", type="filepath"),
	],
	title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Super Resolution",
	description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and transform low-resolution audio (effective sampling rate ≥ 16 kHz) into crystal-clear, high-resolution audio at 48 kHz. It supports most of audio types. "
	"To try it, simply upload your audio, or click one of the examples. "),
	article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
	"<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
	examples = [
	["examples/mandarin_speech_16kHz.wav", True],
	["examples/LJSpeech-001-0001-22k.wav", True],
	["examples/LibriTTS_986_129388_24k.wav", True],
	["examples/english_speech_48kHz.wav", True],
	],
	cache_examples = True,
	)

	with demo:
	gr.TabbedInterface([sr_demo], ["Task 4: Speech Super Resolution"])

	demo.launch()