Spaces:

Vaishnavi0404
/

Text2Sing-DiffSinger

Running

File size: 741 Bytes

3306f2a
ee3dec5
3306f2a
ee3dec5
3306f2a
 
ee3dec5
 
3306f2a
 
ee3dec5
 
 
 
3306f2a
ee3dec5
 
3306f2a
ee3dec5
 
3306f2a
ee3dec5
 
 
3306f2a

import torchaudio
import numpy as np
import pyworld as pw
import scipy.io.wavfile as wavfile

def extract_pitch(audio, sr):
    _f0, t = pw.dio(audio.astype(np.float64), sr)
    f0 = pw.stonemask(audio.astype(np.float64), _f0, t, sr)
    return f0

def run_diffsinger_inference(input_path):
    # Load audio
    waveform, sr = torchaudio.load(input_path)
    audio = waveform[0].numpy()

    # Pitch extraction
    f0 = extract_pitch(audio, sr)

    # Simulate pitch & vibrato mod (placeholder until DiffSinger model added)
    new_audio = audio * 0.8  # just reduce volume for test

    # Save as WAV
    output_path = "/tmp/output_singing.wav"
    wavfile.write(output_path, sr, (new_audio * 32767).astype(np.int16))
    return output_path