cronos3k's picture
Upload utils.py with huggingface_hub
91ea9d8 verified
raw
history blame contribute delete
881 Bytes
import re
import librosa
import torch
def load_audio(wavpath, sr):
audio, _ = librosa.load(wavpath, sr=sr, mono=True)
return torch.from_numpy(audio).unsqueeze(0)
def normalize_text(text):
text = text.lower()
text = re.sub(r'["β€œβ€β€˜β€™]', ' ', text)
text = re.sub(r'\s+', ' ', text)
return text
def approx_duration_from_text(text, max_duration=30.0):
EN_DUR_PER_CHAR = 0.082
ZH_DUR_PER_CHAR = 0.21
text = re.sub(r"\s+", "", text)
num_zh = num_en = num_other = 0
for c in text:
if "\u4e00" <= c <= "\u9fff":
num_zh += 1
elif c.isalpha():
num_en += 1
else:
num_other += 1
if num_zh > num_en:
num_zh += num_other
else:
num_en += num_other
return min(max_duration, num_zh * ZH_DUR_PER_CHAR + num_en * EN_DUR_PER_CHAR)