Commit ·
28d51c4
1
Parent(s): 92de584
Upload ref infer_main.py
Browse files- inference_main.py +56 -0
inference_main.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
import librosa
|
| 7 |
+
import numpy as np
|
| 8 |
+
import soundfile
|
| 9 |
+
|
| 10 |
+
from inference import infer_tool
|
| 11 |
+
from inference import slicer
|
| 12 |
+
from inference.infer_tool import Svc
|
| 13 |
+
|
| 14 |
+
logging.getLogger('numba').setLevel(logging.WARNING)
|
| 15 |
+
chunks_dict = infer_tool.read_temp("inference/chunks_temp.json")
|
| 16 |
+
|
| 17 |
+
model_path = "logs/32k/G_50000.pth"
|
| 18 |
+
config_path = "logs/32k/config.json"
|
| 19 |
+
svc_model = Svc(model_path, config_path)
|
| 20 |
+
infer_tool.mkdir(["raw", "results"])
|
| 21 |
+
|
| 22 |
+
# 支持多个wav文件,放在raw文件夹下
|
| 23 |
+
clean_names = ["per1", "per2", "per3", "per4"]
|
| 24 |
+
trans = [0] # 音高调整,支持正负(半音)
|
| 25 |
+
spk_list = ['azusa'] # 每次同时合成多语者音色
|
| 26 |
+
slice_db = -40 # 默认-40,嘈杂的音频可以-30,干声保留呼吸可以-50
|
| 27 |
+
wav_format = 'flac' # 音频输出格式
|
| 28 |
+
|
| 29 |
+
infer_tool.fill_a_to_b(trans, clean_names)
|
| 30 |
+
for clean_name, tran in zip(clean_names, trans):
|
| 31 |
+
raw_audio_path = f"raw/{clean_name}"
|
| 32 |
+
if "." not in raw_audio_path:
|
| 33 |
+
raw_audio_path += ".wav"
|
| 34 |
+
infer_tool.format_wav(raw_audio_path)
|
| 35 |
+
wav_path = Path(raw_audio_path).with_suffix('.wav')
|
| 36 |
+
chunks = slicer.cut(wav_path, db_thresh=slice_db)
|
| 37 |
+
audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks)
|
| 38 |
+
|
| 39 |
+
for spk in spk_list:
|
| 40 |
+
audio = []
|
| 41 |
+
for (slice_tag, data) in audio_data:
|
| 42 |
+
print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======')
|
| 43 |
+
length = int(np.ceil(len(data) / audio_sr * svc_model.target_sample))
|
| 44 |
+
raw_path = io.BytesIO()
|
| 45 |
+
soundfile.write(raw_path, data, audio_sr, format="wav")
|
| 46 |
+
raw_path.seek(0)
|
| 47 |
+
if slice_tag:
|
| 48 |
+
print('jump empty segment')
|
| 49 |
+
_audio = np.zeros(length)
|
| 50 |
+
else:
|
| 51 |
+
out_audio, out_sr = svc_model.infer(spk, tran, raw_path)
|
| 52 |
+
_audio = out_audio.cpu().numpy()
|
| 53 |
+
audio.extend(list(_audio))
|
| 54 |
+
|
| 55 |
+
res_path = f'./results/{clean_name}_{tran}key_{spk}.{wav_format}'
|
| 56 |
+
soundfile.write(res_path, audio, svc_model.target_sample, format=wav_format)
|