liumaolin commited on
Commit
7b86866
·
1 Parent(s): 2a5dcf2

Update TTS inference to validate audio duration using soundfile.

Browse files
third_party/moyoyo_tts/TTS_infer_pack/TTS.py CHANGED
@@ -10,6 +10,7 @@ from typing import List, Tuple, Union
10
 
11
  import ffmpeg
12
  import librosa
 
13
  import numpy as np
14
  import torch
15
  import yaml
@@ -469,7 +470,9 @@ class TTS:
469
  )
470
  with torch.no_grad():
471
  wav16k, sr = librosa.load(ref_wav_path, sr=16000)
472
- if (wav16k.shape[0] > 160000 or wav16k.shape[0] < 48000):
 
 
473
  raise OSError(i18n("参考音频在3~10秒范围外,请更换!"))
474
  wav16k = torch.from_numpy(wav16k)
475
  zero_wav_torch = torch.from_numpy(zero_wav)
 
10
 
11
  import ffmpeg
12
  import librosa
13
+ import soundfile
14
  import numpy as np
15
  import torch
16
  import yaml
 
470
  )
471
  with torch.no_grad():
472
  wav16k, sr = librosa.load(ref_wav_path, sr=16000)
473
+ y, sr, = soundfile.read(ref_wav_path)
474
+ duration = librosa.get_duration(y=y, sr=sr)
475
+ if (duration < 3 or duration > 10):
476
  raise OSError(i18n("参考音频在3~10秒范围外,请更换!"))
477
  wav16k = torch.from_numpy(wav16k)
478
  zero_wav_torch = torch.from_numpy(zero_wav)