| import os |
| import subprocess |
| import numpy as np |
| import torch |
| import joblib |
| from pathlib import Path |
| from huggingface_hub import HfApi, create_repo |
|
|
| SOURCE_AUDIO = "source.wav" |
| TARGET_AUDIO = "target.wav" |
| MODEL_NAME = os.environ["model_name"] |
| HF_TOKEN = os.environ["Hf_Token"] |
| HF_REPO = "Neon-AI/voice-models" |
|
|
| def train(): |
| print(f"Training voice model: {MODEL_NAME}") |
|
|
| |
| |
| subprocess.run([ |
| "pip", "install", "-q", |
| "praat-parselmouth", |
| "pyworld", |
| "librosa", |
| "scikit-learn", |
| "faiss-cpu", |
| "joblib", |
| "soundfile", |
| ], check=True) |
|
|
| import pyworld as pw |
| import librosa |
| from sklearn.mixture import GaussianMixture |
|
|
| SR = 16000 |
| N_MCEP = 40 |
| N_FFT = 1024 |
| N_GMM = 64 |
|
|
| def extract(path): |
| audio, _ = librosa.load(path, sr=SR, mono=True) |
| f0, sp, ap = pw.wav2world(audio.astype(np.float64), SR) |
| mcep = pw.code_spectral_envelope(sp, SR, N_MCEP) |
| return mcep, f0, sp, ap |
|
|
| print("Extracting source features...") |
| src_mcep, src_f0, _, _ = extract(SOURCE_AUDIO) |
|
|
| print("Extracting target features...") |
| tgt_mcep, tgt_f0, _, _ = extract(TARGET_AUDIO) |
|
|
| min_len = min(len(src_mcep), len(tgt_mcep)) |
| src_mcep = src_mcep[:min_len] |
| tgt_mcep = tgt_mcep[:min_len] |
|
|
| print(f"Training GMM-64 on {min_len} frames...") |
| gmm = GaussianMixture( |
| n_components=N_GMM, |
| covariance_type="full", |
| max_iter=300, |
| verbose=2, |
| tol=1e-4, |
| ) |
| gmm.fit(np.hstack([src_mcep, tgt_mcep])) |
|
|
| src_f0_v = src_f0[src_f0 > 0] |
| tgt_f0_v = tgt_f0[tgt_f0 > 0] |
|
|
| model = { |
| "gmm": gmm, |
| "f0_ratio": float(np.mean(tgt_f0_v) / np.mean(src_f0_v)), |
| "src_mean": src_mcep.mean(0), |
| "src_std": src_mcep.std(0), |
| "tgt_mean": tgt_mcep.mean(0), |
| "tgt_std": tgt_mcep.std(0), |
| "SR": SR, |
| "N_MCEP": N_MCEP, |
| "N_FFT": N_FFT, |
| } |
|
|
| out_path = f"{MODEL_NAME}.pkl" |
| joblib.dump(model, out_path) |
| print(f"Model saved locally: {out_path}") |
| return out_path |
|
|
| def push(model_path): |
| api = HfApi(token=HF_TOKEN) |
|
|
| |
| create_repo(HF_REPO, token=HF_TOKEN, repo_type="model", exist_ok=True, private=True) |
|
|
| api.upload_file( |
| path_or_fileobj=model_path, |
| path_in_repo=f"{MODEL_NAME}.pkl", |
| repo_id=HF_REPO, |
| repo_type="model", |
| commit_message=f"Add voice model: {MODEL_NAME}", |
| ) |
| print(f"Pushed to {HF_REPO}/{MODEL_NAME}.pkl") |
|
|
| if __name__ == "__main__": |
| model_path = train() |
| push(model_path) |
| print("Done.") |