Sweetlemon commited on
Commit
28d51c4
·
1 Parent(s): 92de584

Upload ref infer_main.py

Browse files
Files changed (1) hide show
  1. inference_main.py +56 -0
inference_main.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import logging
3
+ import time
4
+ from pathlib import Path
5
+
6
+ import librosa
7
+ import numpy as np
8
+ import soundfile
9
+
10
+ from inference import infer_tool
11
+ from inference import slicer
12
+ from inference.infer_tool import Svc
13
+
14
+ logging.getLogger('numba').setLevel(logging.WARNING)
15
+ chunks_dict = infer_tool.read_temp("inference/chunks_temp.json")
16
+
17
+ model_path = "logs/32k/G_50000.pth"
18
+ config_path = "logs/32k/config.json"
19
+ svc_model = Svc(model_path, config_path)
20
+ infer_tool.mkdir(["raw", "results"])
21
+
22
+ # 支持多个wav文件,放在raw文件夹下
23
+ clean_names = ["per1", "per2", "per3", "per4"]
24
+ trans = [0] # 音高调整,支持正负(半音)
25
+ spk_list = ['azusa'] # 每次同时合成多语者音色
26
+ slice_db = -40 # 默认-40,嘈杂的音频可以-30,干声保留呼吸可以-50
27
+ wav_format = 'flac' # 音频输出格式
28
+
29
+ infer_tool.fill_a_to_b(trans, clean_names)
30
+ for clean_name, tran in zip(clean_names, trans):
31
+ raw_audio_path = f"raw/{clean_name}"
32
+ if "." not in raw_audio_path:
33
+ raw_audio_path += ".wav"
34
+ infer_tool.format_wav(raw_audio_path)
35
+ wav_path = Path(raw_audio_path).with_suffix('.wav')
36
+ chunks = slicer.cut(wav_path, db_thresh=slice_db)
37
+ audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks)
38
+
39
+ for spk in spk_list:
40
+ audio = []
41
+ for (slice_tag, data) in audio_data:
42
+ print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======')
43
+ length = int(np.ceil(len(data) / audio_sr * svc_model.target_sample))
44
+ raw_path = io.BytesIO()
45
+ soundfile.write(raw_path, data, audio_sr, format="wav")
46
+ raw_path.seek(0)
47
+ if slice_tag:
48
+ print('jump empty segment')
49
+ _audio = np.zeros(length)
50
+ else:
51
+ out_audio, out_sr = svc_model.infer(spk, tran, raw_path)
52
+ _audio = out_audio.cpu().numpy()
53
+ audio.extend(list(_audio))
54
+
55
+ res_path = f'./results/{clean_name}_{tran}key_{spk}.{wav_format}'
56
+ soundfile.write(res_path, audio, svc_model.target_sample, format=wav_format)