Spaces:
Sleeping
Sleeping
| import logging | |
| import os | |
| from spkmix import spk_mix_map | |
| import soundfile | |
| from inference import infer_tool | |
| from inference.infer_tool import Svc | |
| logging.getLogger("numba").setLevel(logging.WARNING) | |
| chunks_dict = infer_tool.read_temp("inference/chunks_temp.json") | |
| # clean_names = args.clean_names | |
| trans = [0] | |
| spk_list = ["America"] | |
| slice_db = -40 | |
| wav_format = "wav" | |
| auto_predict_f0 = False | |
| cluster_infer_ratio = 0 | |
| noice_scale = 0.4 | |
| pad_seconds = 0.5 | |
| clip = 0 | |
| lg = 0 | |
| lgr = 0.75 | |
| f0p = "pm" | |
| enhance = False | |
| enhancer_adaptive_key = 0 | |
| cr_threshold = 0.05 | |
| diffusion_model_path = "logs/44k/diffusion/model_0.pt" | |
| diffusion_config_path = "logs/44k/diffusion/config.yaml" | |
| k_step = 100 | |
| only_diffusion = False | |
| shallow_diffusion = False | |
| use_spk_mix = False | |
| second_encoding = False | |
| loudness_envelope_adjustment = 1 | |
| device = "cpu" | |
| feature_retrieval = False | |
| cluster_model_path = "logs/44k/kmeans_10000.pt" | |
| sound_file_path = os.path.join(os.curdir, "recorded_data") | |
| print("Rsn " + wav_format) | |
| # model names: | |
| # G_354400.pth - with 5000 epochs training | |
| # G_354400.pth - with 10000 epochs training 2023/07/22 | |
| # G_709600.pth - with 10000 epochs training 2023/07/22 | |
| model_path = "logs/44k/G_709600.pth" | |
| config_path = "configs/config.json" | |
| svc_model = Svc( | |
| model_path, | |
| config_path, | |
| device, | |
| cluster_model_path, | |
| enhance, | |
| diffusion_model_path, | |
| diffusion_config_path, | |
| shallow_diffusion, | |
| only_diffusion, | |
| use_spk_mix, | |
| feature_retrieval, | |
| ) | |
| print("Rsn svc_model = ") | |
| infer_tool.mkdir(["raw", "results"]) | |
| print("ready to infer") | |
| def inference_wav_file(file_name, i = 0): | |
| use_spk_mix = False | |
| spk_list = ["America"] | |
| if len(spk_mix_map) <= 1: | |
| use_spk_mix = False | |
| if use_spk_mix: | |
| spk_list = [spk_mix_map] | |
| file_path = f'server_temp/{file_name}' | |
| print("Rsn2 " + file_path) | |
| infer_tool.format_wav(file_path) | |
| for spk in spk_list: | |
| kwarg = { | |
| "raw_audio_path": file_path, | |
| "spk": spk, | |
| "tran": trans[0], | |
| "slice_db": slice_db, | |
| "cluster_infer_ratio": cluster_infer_ratio, | |
| "auto_predict_f0": auto_predict_f0, | |
| "noice_scale": noice_scale, | |
| "pad_seconds": pad_seconds, | |
| "clip_seconds": clip, | |
| "lg_num": lg, | |
| "lgr_num": lgr, | |
| "f0_predictor": f0p, | |
| "enhancer_adaptive_key": enhancer_adaptive_key, | |
| "cr_threshold": cr_threshold, | |
| "k_step": k_step, | |
| "use_spk_mix": use_spk_mix, | |
| "second_encoding": second_encoding, | |
| "loudness_envelope_adjustment": loudness_envelope_adjustment, | |
| } | |
| audio = svc_model.slice_inference(**kwarg) | |
| isdiffusion = "sovits" | |
| if shallow_diffusion: | |
| isdiffusion = "sovdiff" | |
| if only_diffusion: | |
| isdiffusion = "diff" | |
| if use_spk_mix: | |
| spk = "spk_mix" | |
| res_path = os.path.join( | |
| os.curdir, | |
| "server_results", | |
| f"result_{i}_{spk}{file_name}_{isdiffusion}.{wav_format}", | |
| ) | |
| soundfile.write(res_path, audio, svc_model.target_sample, format=wav_format) | |
| svc_model.clear_empty() | |
| return res_path | |