File size: 741 Bytes
3306f2a
ee3dec5
3306f2a
ee3dec5
3306f2a
 
ee3dec5
 
3306f2a
 
ee3dec5
 
 
 
3306f2a
ee3dec5
 
3306f2a
ee3dec5
 
3306f2a
ee3dec5
 
 
3306f2a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import torchaudio
import numpy as np
import pyworld as pw
import scipy.io.wavfile as wavfile

def extract_pitch(audio, sr):
    _f0, t = pw.dio(audio.astype(np.float64), sr)
    f0 = pw.stonemask(audio.astype(np.float64), _f0, t, sr)
    return f0

def run_diffsinger_inference(input_path):
    # Load audio
    waveform, sr = torchaudio.load(input_path)
    audio = waveform[0].numpy()

    # Pitch extraction
    f0 = extract_pitch(audio, sr)

    # Simulate pitch & vibrato mod (placeholder until DiffSinger model added)
    new_audio = audio * 0.8  # just reduce volume for test

    # Save as WAV
    output_path = "/tmp/output_singing.wav"
    wavfile.write(output_path, sr, (new_audio * 32767).astype(np.int16))
    return output_path