File size: 741 Bytes
3306f2a ee3dec5 3306f2a ee3dec5 3306f2a ee3dec5 3306f2a ee3dec5 3306f2a ee3dec5 3306f2a ee3dec5 3306f2a ee3dec5 3306f2a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | import torchaudio
import numpy as np
import pyworld as pw
import scipy.io.wavfile as wavfile
def extract_pitch(audio, sr):
_f0, t = pw.dio(audio.astype(np.float64), sr)
f0 = pw.stonemask(audio.astype(np.float64), _f0, t, sr)
return f0
def run_diffsinger_inference(input_path):
# Load audio
waveform, sr = torchaudio.load(input_path)
audio = waveform[0].numpy()
# Pitch extraction
f0 = extract_pitch(audio, sr)
# Simulate pitch & vibrato mod (placeholder until DiffSinger model added)
new_audio = audio * 0.8 # just reduce volume for test
# Save as WAV
output_path = "/tmp/output_singing.wav"
wavfile.write(output_path, sr, (new_audio * 32767).astype(np.int16))
return output_path |