Spaces:

kushan1988
/

VITS-TEST

Sleeping

App Files Files Community

VITS-TEST / app_server_infer.py

kushan1988

Upload 231 files

0b65cde over 2 years ago

raw

history blame contribute delete

3.23 kB

	import logging
	import os

	from spkmix import spk_mix_map
	import soundfile
	from inference import infer_tool
	from inference.infer_tool import Svc

	logging.getLogger("numba").setLevel(logging.WARNING)
	chunks_dict = infer_tool.read_temp("inference/chunks_temp.json")

	# clean_names = args.clean_names
	trans = [0]
	spk_list = ["America"]
	slice_db = -40
	wav_format = "wav"
	auto_predict_f0 = False
	cluster_infer_ratio = 0
	noice_scale = 0.4
	pad_seconds = 0.5
	clip = 0
	lg = 0
	lgr = 0.75
	f0p = "pm"
	enhance = False
	enhancer_adaptive_key = 0
	cr_threshold = 0.05
	diffusion_model_path = "logs/44k/diffusion/model_0.pt"
	diffusion_config_path = "logs/44k/diffusion/config.yaml"
	k_step = 100
	only_diffusion = False
	shallow_diffusion = False
	use_spk_mix = False
	second_encoding = False
	loudness_envelope_adjustment = 1

	device = "cpu"
	feature_retrieval = False
	cluster_model_path = "logs/44k/kmeans_10000.pt"

	sound_file_path = os.path.join(os.curdir, "recorded_data")

	print("Rsn " + wav_format)

	# model names:
	# G_354400.pth - with 5000 epochs training
	# G_354400.pth - with 10000 epochs training 2023/07/22
	# G_709600.pth - with 10000 epochs training 2023/07/22
	model_path = "logs/44k/G_709600.pth"
	config_path = "configs/config.json"

	svc_model = Svc(
	model_path,
	config_path,
	device,
	cluster_model_path,
	enhance,
	diffusion_model_path,
	diffusion_config_path,
	shallow_diffusion,
	only_diffusion,
	use_spk_mix,
	feature_retrieval,
	)

	print("Rsn svc_model = ")
	infer_tool.mkdir(["raw", "results"])

	print("ready to infer")


	def inference_wav_file(file_name, i = 0):
	use_spk_mix = False
	spk_list = ["America"]
	if len(spk_mix_map) <= 1:
	use_spk_mix = False
	if use_spk_mix:
	spk_list = [spk_mix_map]

	file_path = f'server_temp/{file_name}'

	print("Rsn2 " + file_path)
	infer_tool.format_wav(file_path)
	for spk in spk_list:
	kwarg = {
	"raw_audio_path": file_path,
	"spk": spk,
	"tran": trans[0],
	"slice_db": slice_db,
	"cluster_infer_ratio": cluster_infer_ratio,
	"auto_predict_f0": auto_predict_f0,
	"noice_scale": noice_scale,
	"pad_seconds": pad_seconds,
	"clip_seconds": clip,
	"lg_num": lg,
	"lgr_num": lgr,
	"f0_predictor": f0p,
	"enhancer_adaptive_key": enhancer_adaptive_key,
	"cr_threshold": cr_threshold,
	"k_step": k_step,
	"use_spk_mix": use_spk_mix,
	"second_encoding": second_encoding,
	"loudness_envelope_adjustment": loudness_envelope_adjustment,
	}
	audio = svc_model.slice_inference(**kwarg)
	isdiffusion = "sovits"
	if shallow_diffusion:
	isdiffusion = "sovdiff"
	if only_diffusion:
	isdiffusion = "diff"
	if use_spk_mix:
	spk = "spk_mix"
	res_path = os.path.join(
	os.curdir,
	"server_results",
	f"result_{i}_{spk}{file_name}_{isdiffusion}.{wav_format}",
	)
	soundfile.write(res_path, audio, svc_model.target_sample, format=wav_format)
	svc_model.clear_empty()
	return res_path