Spaces:

Vaishnavi0404
/

Text2Sing-DiffSinger

Running

Vaishnavi0404 commited on Apr 11, 2025

Commit

3306f2a

verified ·

1 Parent(s): b6e4f5d

Create diff_singer_infer.py

Files changed (1) hide show

diff_singer_infer.py ADDED Viewed

+import numpy as np
+import torchaudio
+import pyworld as pw
+import soundfile as sf
+from scipy.interpolate import interp1d
+def extract_pitch(audio, sr):
+    # Convert audio to double precision for pyworld
+    audio = audio.astype(np.float64)
+    _f0, t = pw.dio(audio, sr)
+    f0 = pw.stonemask(audio, _f0, t, sr)
+    return f0
+def run_diffsinger_inference(input_audio_path):
+    waveform, sr = torchaudio.load(input_audio_path)
+    audio = waveform[0].numpy()  # Mono
+    f0 = extract_pitch(audio, sr)
+    # Simple manipulation to simulate expressiveness
+    f0_smooth = interp1d(np.arange(len(f0)), f0, kind='cubic', fill_value="extrapolate")(np.linspace(0, len(f0)-1, len(f0)))
+    f0_smooth *= 1.15  # Slight pitch boost
+    # Pitch-shifted output
+    output_audio = pw.synthesize(f0_smooth, pw.cheaptrick(audio.astype(np.float64), f0, np.linspace(0, len(audio)/sr, len(f0)), sr), pw.d4c(audio.astype(np.float64), f0, np.linspace(0, len(audio)/sr, len(f0)), sr), sr)
+    output_path = "output_singing.wav"
+    sf.write(output_path, output_audio, sr)
+    return output_path