breadlicker45 commited on
Commit
0fca126
·
verified ·
1 Parent(s): 17cf200

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+ from encoder.utils import convert_audio
5
+ from decoder.pretrained import WavTokenizer
6
+
7
+ # Initialize WavTokenizer
8
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
9
+ config_path = "./configs/wavtokenizer_config.yaml" # Update with your config path
10
+ model_path = "./wavtokenizer_model.ckpt" # Update with your model path
11
+
12
+ wavtokenizer = WavTokenizer.from_pretrained0802(config_path, model_path)
13
+ wavtokenizer = wavtokenizer.to(device)
14
+
15
+ def encode_audio(audio_file):
16
+ # Load and preprocess the audio
17
+ wav, sr = torchaudio.load(audio_file)
18
+ wav = convert_audio(wav, sr, 24000, 1)
19
+ wav = wav.to(device)
20
+
21
+ # Encode the audio
22
+ bandwidth_id = torch.tensor([0]).to(device)
23
+ _, discrete_code = wavtokenizer.encode_infer(wav, bandwidth_id=bandwidth_id)
24
+
25
+ # Convert the discrete code to a string representation
26
+ code_str = ' '.join(map(str, discrete_code.cpu().numpy().flatten()))
27
+
28
+ return code_str
29
+
30
+ # Create the Gradio interface
31
+ iface = gr.Interface(
32
+ fn=encode_audio,
33
+ inputs=gr.Audio(type="filepath"),
34
+ outputs=gr.Textbox(label="Discrete Codes"),
35
+ title="WavTokenizer Encoder Demo",
36
+ description="Upload an audio file to see its WavTokenizer discrete codes. The output shows 40 tokens per second of audio."
37
+ )
38
+
39
+ # Launch the demo
40
+ iface.launch()