BoxOfColors commited on
Commit
00e0606
·
1 Parent(s): 00ee17c

Fix TARO GPU worker crash: use local_files_only=True for audioldm2 VAE/vocoder

Browse files

from_pretrained('cvssp/audioldm2') was hitting the HF network inside the
ZeroGPU worker to resolve cache, timing out and crashing. local_files_only=True
uses the pre-downloaded cache directly with no network calls.

Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -322,8 +322,10 @@ def _load_taro_models(device, weight_dtype):
322
  model_net = MMDiT(adm_in_channels=120, z_dims=[768], encoder_depth=4).to(device)
323
  model_net.load_state_dict(torch.load(taro_ckpt_path, map_location=device, weights_only=False)["ema"])
324
  model_net.eval().to(weight_dtype)
325
- vae = AutoencoderKL.from_pretrained("cvssp/audioldm2", subfolder="vae").to(device).eval()
326
- vocoder = SpeechT5HifiGan.from_pretrained("cvssp/audioldm2", subfolder="vocoder").to(device)
 
 
327
  latents_scale = torch.tensor([0.18215] * 8).view(1, 8, 1, 1).to(device)
328
  return model_net, vae, vocoder, latents_scale
329
 
 
322
  model_net = MMDiT(adm_in_channels=120, z_dims=[768], encoder_depth=4).to(device)
323
  model_net.load_state_dict(torch.load(taro_ckpt_path, map_location=device, weights_only=False)["ema"])
324
  model_net.eval().to(weight_dtype)
325
+ vae = AutoencoderKL.from_pretrained("cvssp/audioldm2", subfolder="vae",
326
+ local_files_only=True).to(device).eval()
327
+ vocoder = SpeechT5HifiGan.from_pretrained("cvssp/audioldm2", subfolder="vocoder",
328
+ local_files_only=True).to(device)
329
  latents_scale = torch.tensor([0.18215] * 8).view(1, 8, 1, 1).to(device)
330
  return model_net, vae, vocoder, latents_scale
331