Spaces:

breadlicker45
/

WavTokenizer-demo

Build error

breadlicker45 commited on Sep 9, 2024

Commit

0fca126

verified ·

1 Parent(s): 17cf200

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import torch
+import torchaudio
+from encoder.utils import convert_audio
+from decoder.pretrained import WavTokenizer
+# Initialize WavTokenizer
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+config_path = "./configs/wavtokenizer_config.yaml"  # Update with your config path
+model_path = "./wavtokenizer_model.ckpt"  # Update with your model path
+wavtokenizer = WavTokenizer.from_pretrained0802(config_path, model_path)
+wavtokenizer = wavtokenizer.to(device)
+def encode_audio(audio_file):
+    # Load and preprocess the audio
+    wav, sr = torchaudio.load(audio_file)
+    wav = convert_audio(wav, sr, 24000, 1)
+    wav = wav.to(device)
+    # Encode the audio
+    bandwidth_id = torch.tensor([0]).to(device)
+    _, discrete_code = wavtokenizer.encode_infer(wav, bandwidth_id=bandwidth_id)
+    # Convert the discrete code to a string representation
+    code_str = ' '.join(map(str, discrete_code.cpu().numpy().flatten()))
+    return code_str
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=encode_audio,
+    inputs=gr.Audio(type="filepath"),
+    outputs=gr.Textbox(label="Discrete Codes"),
+    title="WavTokenizer Encoder Demo",
+    description="Upload an audio file to see its WavTokenizer discrete codes. The output shows 40 tokens per second of audio."
+)
+# Launch the demo
+iface.launch()