Spaces:

breadlicker45
/

WavTokenizer-demo

Build error

WavTokenizer-demo / app.py

Update app.py

6b29d56 verified over 1 year ago

1.34 kB

	import gradio as gr
	import torch
	import torchaudio
	from encoder.utils import convert_audio
	from decoder.pretrained import WavTokenizer

	# Initialize WavTokenizer
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	config_path = "wavtokenizer_smalldata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
	model_path = "WavTokenizer_small_600_24k_4096.ckpt"

	wavtokenizer = WavTokenizer.from_pretrained0802(config_path, model_path)
	wavtokenizer = wavtokenizer.to(device)

	def encode_audio(audio_file):
	# Load and preprocess the audio
	wav, sr = torchaudio.load(audio_file)
	wav = convert_audio(wav, sr, 24000, 1)
	wav = wav.to(device)

	# Encode the audio
	bandwidth_id = torch.tensor([0]).to(device)
	_, discrete_code = wavtokenizer.encode_infer(wav, bandwidth_id=bandwidth_id)

	# Convert the discrete code to a string representation
	code_str = ' '.join(map(str, discrete_code.cpu().numpy().flatten()))

	return code_str

	# Create the Gradio interface
	iface = gr.Interface(
	fn=encode_audio,
	inputs=gr.Audio(type="filepath"),
	outputs=gr.Textbox(label="Discrete Codes"),
	title="WavTokenizer Encoder Demo",
	description="Upload an audio file to see its WavTokenizer discrete codes. The output shows 40 tokens per second of audio."
	)

	# Launch the demo
	iface.launch()