VITS-TEST / app_server_infer.py
kushan1988's picture
Upload 231 files
0b65cde
import logging
import os
from spkmix import spk_mix_map
import soundfile
from inference import infer_tool
from inference.infer_tool import Svc
logging.getLogger("numba").setLevel(logging.WARNING)
chunks_dict = infer_tool.read_temp("inference/chunks_temp.json")
# clean_names = args.clean_names
trans = [0]
spk_list = ["America"]
slice_db = -40
wav_format = "wav"
auto_predict_f0 = False
cluster_infer_ratio = 0
noice_scale = 0.4
pad_seconds = 0.5
clip = 0
lg = 0
lgr = 0.75
f0p = "pm"
enhance = False
enhancer_adaptive_key = 0
cr_threshold = 0.05
diffusion_model_path = "logs/44k/diffusion/model_0.pt"
diffusion_config_path = "logs/44k/diffusion/config.yaml"
k_step = 100
only_diffusion = False
shallow_diffusion = False
use_spk_mix = False
second_encoding = False
loudness_envelope_adjustment = 1
device = "cpu"
feature_retrieval = False
cluster_model_path = "logs/44k/kmeans_10000.pt"
sound_file_path = os.path.join(os.curdir, "recorded_data")
print("Rsn " + wav_format)
# model names:
# G_354400.pth - with 5000 epochs training
# G_354400.pth - with 10000 epochs training 2023/07/22
# G_709600.pth - with 10000 epochs training 2023/07/22
model_path = "logs/44k/G_709600.pth"
config_path = "configs/config.json"
svc_model = Svc(
model_path,
config_path,
device,
cluster_model_path,
enhance,
diffusion_model_path,
diffusion_config_path,
shallow_diffusion,
only_diffusion,
use_spk_mix,
feature_retrieval,
)
print("Rsn svc_model = ")
infer_tool.mkdir(["raw", "results"])
print("ready to infer")
def inference_wav_file(file_name, i = 0):
use_spk_mix = False
spk_list = ["America"]
if len(spk_mix_map) <= 1:
use_spk_mix = False
if use_spk_mix:
spk_list = [spk_mix_map]
file_path = f'server_temp/{file_name}'
print("Rsn2 " + file_path)
infer_tool.format_wav(file_path)
for spk in spk_list:
kwarg = {
"raw_audio_path": file_path,
"spk": spk,
"tran": trans[0],
"slice_db": slice_db,
"cluster_infer_ratio": cluster_infer_ratio,
"auto_predict_f0": auto_predict_f0,
"noice_scale": noice_scale,
"pad_seconds": pad_seconds,
"clip_seconds": clip,
"lg_num": lg,
"lgr_num": lgr,
"f0_predictor": f0p,
"enhancer_adaptive_key": enhancer_adaptive_key,
"cr_threshold": cr_threshold,
"k_step": k_step,
"use_spk_mix": use_spk_mix,
"second_encoding": second_encoding,
"loudness_envelope_adjustment": loudness_envelope_adjustment,
}
audio = svc_model.slice_inference(**kwarg)
isdiffusion = "sovits"
if shallow_diffusion:
isdiffusion = "sovdiff"
if only_diffusion:
isdiffusion = "diff"
if use_spk_mix:
spk = "spk_mix"
res_path = os.path.join(
os.curdir,
"server_results",
f"result_{i}_{spk}{file_name}_{isdiffusion}.{wav_format}",
)
soundfile.write(res_path, audio, svc_model.target_sample, format=wav_format)
svc_model.clear_empty()
return res_path