artificialguybr commited on
Commit
90c9262
·
verified ·
1 Parent(s): 53b45ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -9
app.py CHANGED
@@ -4,7 +4,7 @@ import subprocess
4
  import traceback
5
  import gradio as gr
6
  import numpy as np
7
- import torchaudio
8
  import spaces
9
  import torch
10
  from pathlib import Path
@@ -69,15 +69,11 @@ codec_model = load_codec(os.path.join(checkpoint_dir, "codec.pth"), device, prec
69
 
70
  @torch.no_grad()
71
  def encode_reference_audio(audio_path):
72
- wav, sr = torchaudio.load(audio_path)
73
- if wav.shape[0] > 1:
74
- wav = wav.mean(dim=0, keepdim=True)
75
- if sr != codec_model.sample_rate:
76
- wav = torchaudio.functional.resample(wav, sr, codec_model.sample_rate)
77
- wav = wav.to(device)
78
  model_dtype = next(codec_model.parameters()).dtype
79
- audios = wav[None].to(dtype=model_dtype)
80
- audio_lengths = torch.tensor([wav.shape[-1]], device=device, dtype=torch.long)
81
  indices, feature_lengths = codec_model.encode(audios, audio_lengths)
82
  return indices[0, :, : feature_lengths[0]]
83
 
 
4
  import traceback
5
  import gradio as gr
6
  import numpy as np
7
+ import librosa
8
  import spaces
9
  import torch
10
  from pathlib import Path
 
69
 
70
  @torch.no_grad()
71
  def encode_reference_audio(audio_path):
72
+ wav_np, _ = librosa.load(audio_path, sr=codec_model.sample_rate, mono=True)
73
+ wav = torch.from_numpy(wav_np).to(device)
 
 
 
 
74
  model_dtype = next(codec_model.parameters()).dtype
75
+ audios = wav[None, None, :].to(dtype=model_dtype)
76
+ audio_lengths = torch.tensor([wav.shape[0]], device=device, dtype=torch.long)
77
  indices, feature_lengths = codec_model.encode(audios, audio_lengths)
78
  return indices[0, :, : feature_lengths[0]]
79