Spaces:
Running on Zero
Running on Zero
| import re | |
| import librosa | |
| import torch | |
| def load_audio(wavpath, sr): | |
| audio, _ = librosa.load(wavpath, sr=sr, mono=True) | |
| return torch.from_numpy(audio).unsqueeze(0) | |
| def normalize_text(text): | |
| text = text.lower() | |
| text = re.sub(r'["ββββ]', ' ', text) | |
| text = re.sub(r'\s+', ' ', text) | |
| return text | |
| def approx_duration_from_text(text, max_duration=30.0): | |
| EN_DUR_PER_CHAR = 0.082 | |
| ZH_DUR_PER_CHAR = 0.21 | |
| text = re.sub(r"\s+", "", text) | |
| num_zh = num_en = num_other = 0 | |
| for c in text: | |
| if "\u4e00" <= c <= "\u9fff": | |
| num_zh += 1 | |
| elif c.isalpha(): | |
| num_en += 1 | |
| else: | |
| num_other += 1 | |
| if num_zh > num_en: | |
| num_zh += num_other | |
| else: | |
| num_en += num_other | |
| return min(max_duration, num_zh * ZH_DUR_PER_CHAR + num_en * EN_DUR_PER_CHAR) |