File size: 2,362 Bytes
6ea5c75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import traceback

# import ffmpeg
import numpy as np
import soundfile as sf
import librosa

from moyoyo_tts.tools.i18n.i18n import I18nAuto

i18n = I18nAuto(language=os.environ.get('language', 'Auto'))

def load_audio(file, sr):
    try:
        file = clean_path(file)  # 防止小白拷路径头尾带了空格和"和回车
        if os.path.exists(file) == False:
            raise RuntimeError(
                "You input a wrong audio path that does not exists, please fix it!"
            )
        
        # 使用soundfile读取音频文件
        data, original_sr = sf.read(file, dtype='float32')
        
        # 如果是多声道,转换为单声道(取平均值)
        if len(data.shape) > 1:
            data = np.mean(data, axis=1)
        
        # 如果采样率不匹配,进行重采样
        if original_sr != sr:
            data = librosa.resample(data, orig_sr=original_sr, target_sr=sr)
        
        return data.flatten()
        
    except Exception as e:
        traceback.print_exc()
        raise RuntimeError(i18n("音频加载失败"))
    
# def load_audio(file, sr):
#     try:
#         # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
#         # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
#         # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
#         file = clean_path(file)  # 防止小白拷路径头尾带了空格和"和回车
#         if os.path.exists(file) == False:
#             raise RuntimeError(
#                 "You input a wrong audio path that does not exists, please fix it!"
#             )
#         out, _ = (
#             ffmpeg.input(file, threads=0)
#             .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
#             .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
#         )
#     except Exception as e:
#         traceback.print_exc()
#         raise RuntimeError(i18n("音频加载失败"))

#     return np.frombuffer(out, np.float32).flatten()


def clean_path(path_str: str):
    if path_str.endswith(('\\', '/')):
        return clean_path(path_str[0:-1])
    path_str = path_str.replace('/', os.sep).replace('\\', os.sep)
    return path_str.strip(" ").strip('\'').strip("\n").strip('"').strip(" ").strip("\u202a")