import os import subprocess import numpy as np import torch import joblib from pathlib import Path from huggingface_hub import HfApi, create_repo SOURCE_AUDIO = "source.wav" TARGET_AUDIO = "target.wav" MODEL_NAME = os.environ["model_name"] HF_TOKEN = os.environ["Hf_Token"] HF_REPO = "Neon-AI/voice-models" def train(): print(f"Training voice model: {MODEL_NAME}") # install rvc training deps at runtime # (keeps Dockerfile lean, only installs when Space runs) subprocess.run([ "pip", "install", "-q", "praat-parselmouth", "pyworld", "librosa", "scikit-learn", "faiss-cpu", "joblib", "soundfile", ], check=True) import pyworld as pw import librosa from sklearn.mixture import GaussianMixture SR = 16000 N_MCEP = 40 # higher than default → better accent capture N_FFT = 1024 # larger fft → finer spectral detail N_GMM = 64 # flawless quality def extract(path): audio, _ = librosa.load(path, sr=SR, mono=True) f0, sp, ap = pw.wav2world(audio.astype(np.float64), SR) mcep = pw.code_spectral_envelope(sp, SR, N_MCEP) return mcep, f0, sp, ap print("Extracting source features...") src_mcep, src_f0, _, _ = extract(SOURCE_AUDIO) print("Extracting target features...") tgt_mcep, tgt_f0, _, _ = extract(TARGET_AUDIO) min_len = min(len(src_mcep), len(tgt_mcep)) src_mcep = src_mcep[:min_len] tgt_mcep = tgt_mcep[:min_len] print(f"Training GMM-64 on {min_len} frames...") gmm = GaussianMixture( n_components=N_GMM, covariance_type="full", max_iter=300, verbose=2, tol=1e-4, ) gmm.fit(np.hstack([src_mcep, tgt_mcep])) src_f0_v = src_f0[src_f0 > 0] tgt_f0_v = tgt_f0[tgt_f0 > 0] model = { "gmm": gmm, "f0_ratio": float(np.mean(tgt_f0_v) / np.mean(src_f0_v)), "src_mean": src_mcep.mean(0), "src_std": src_mcep.std(0), "tgt_mean": tgt_mcep.mean(0), "tgt_std": tgt_mcep.std(0), "SR": SR, "N_MCEP": N_MCEP, "N_FFT": N_FFT, } out_path = f"{MODEL_NAME}.pkl" joblib.dump(model, out_path) print(f"Model saved locally: {out_path}") return out_path def push(model_path): api = HfApi(token=HF_TOKEN) # create repo if it doesn't exist yet create_repo(HF_REPO, token=HF_TOKEN, repo_type="model", exist_ok=True, private=True) api.upload_file( path_or_fileobj=model_path, path_in_repo=f"{MODEL_NAME}.pkl", repo_id=HF_REPO, repo_type="model", commit_message=f"Add voice model: {MODEL_NAME}", ) print(f"Pushed to {HF_REPO}/{MODEL_NAME}.pkl") if __name__ == "__main__": model_path = train() push(model_path) print("Done.")