stephenhoang commited on
Commit
a3e843d
·
verified ·
1 Parent(s): c921121

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +12 -4
inference.py CHANGED
@@ -402,19 +402,27 @@ class StyleTTS2(torch.nn.Module):
402
  )
403
  print("[DBG] wav shape:", wav.shape)
404
  print("[DBG] wav min/max:", wav.min().item(), wav.max().item())
405
- print("[DBG] wav mean abs:", wav.abs().mean().item())
406
 
407
  # trim an toàn
408
- trim = 4000
409
- if wav.shape[0] > 2 * trim:
 
 
 
410
  wav = wav[trim:-trim]
411
 
412
  if wav.size > 0:
413
  list_wav.append(wav)
414
 
 
 
415
  if len(list_wav) == 0:
416
  return np.zeros((2400,), dtype=np.float32) # 0.1s silence để không crash
417
 
418
  final_wav = np.concatenate(list_wav)
419
- final_wav = np.concatenate([np.zeros((4000,), dtype=np.float32), final_wav, np.zeros((4000,), dtype=np.float32)])
 
 
 
420
  return final_wav
 
402
  )
403
  print("[DBG] wav shape:", wav.shape)
404
  print("[DBG] wav min/max:", wav.min().item(), wav.max().item())
405
+ print("[DBG] wav mean abs:", np.abs(wav).mean())
406
 
407
  # trim an toàn
408
+
409
+
410
+ # trim only if long enough
411
+ trim = int(0.05 * 24000) # 50ms instead of 4000
412
+ if wav.shape[0] > 4 * trim:
413
  wav = wav[trim:-trim]
414
 
415
  if wav.size > 0:
416
  list_wav.append(wav)
417
 
418
+
419
+
420
  if len(list_wav) == 0:
421
  return np.zeros((2400,), dtype=np.float32) # 0.1s silence để không crash
422
 
423
  final_wav = np.concatenate(list_wav)
424
+ # final_wav = np.concatenate([np.zeros((4000,), dtype=np.float32), final_wav, np.zeros((4000,), dtype=np.float32)])
425
+ pad = int(0.05 * 24000) # 50ms
426
+ final_wav = np.concatenate([np.zeros((pad,), dtype=np.float32), final_wav, np.zeros((pad,), dtype=np.float32)])
427
+
428
  return final_wav