import torchaudio import numpy as np import pyworld as pw import scipy.io.wavfile as wavfile def extract_pitch(audio, sr): _f0, t = pw.dio(audio.astype(np.float64), sr) f0 = pw.stonemask(audio.astype(np.float64), _f0, t, sr) return f0 def run_diffsinger_inference(input_path): # Load audio waveform, sr = torchaudio.load(input_path) audio = waveform[0].numpy() # Pitch extraction f0 = extract_pitch(audio, sr) # Simulate pitch & vibrato mod (placeholder until DiffSinger model added) new_audio = audio * 0.8 # just reduce volume for test # Save as WAV output_path = "/tmp/output_singing.wav" wavfile.write(output_path, sr, (new_audio * 32767).astype(np.int16)) return output_path