# utils.py - Combined audio utilities for 9year Anniversary App """ Audio processing utilities: - Voice/instrumental separation (Demucs) - Voice conversion (RVC) - Audio mixing and synthesis """ from pathlib import Path from pydub import AudioSegment import subprocess import sys import os import tempfile import shutil import traceback # ============ Audio Utils ============ def load_audio(file_path): """Load audio file""" import librosa audio, sr = librosa.load(file_path, sr=None) return audio, sr def save_audio(file_path, audio, sr): """Save audio file""" import soundfile as sf sf.write(file_path, audio, sr) # ============ Vocal Separation ============ def separate_vocals_and_instrumental(input_audio: Path, output_dir: Path): """ Separate vocals and instrumental using Demucs Returns: tuple: (vocals_path, instrumental_path) or (None, None) if failed """ output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) vocals_path = output_dir / "vocals.wav" instrumental_path = output_dir / "instrumental.wav" try: print("🎵 Starting vocal separation with Demucs...") cmd = [ sys.executable, "-m", "demucs", "--two-stems", "vocals", "-n", "htdemucs", "-o", str(output_dir), str(input_audio) ] result = subprocess.run( cmd, capture_output=True, text=True, timeout=600 # 10 minute timeout ) # Find output files input_stem = Path(input_audio).stem demucs_output = output_dir / "htdemucs" / input_stem if demucs_output.exists(): vocals_file = demucs_output / "vocals.wav" no_vocals_file = demucs_output / "no_vocals.wav" if vocals_file.exists() and no_vocals_file.exists(): shutil.move(str(vocals_file), str(vocals_path)) shutil.move(str(no_vocals_file), str(instrumental_path)) # Cleanup shutil.rmtree(output_dir / "htdemucs", ignore_errors=True) print(f"✅ Separation complete!") return vocals_path, instrumental_path print(f"⚠️ Demucs output not found") return None, None except subprocess.TimeoutExpired: print("⚠️ Demucs timeout, skipping separation") return None, None except Exception as e: print(f"⚠️ Separation failed: {e}") return None, None # ============ Audio Merging ============ def merge_vocals_and_instrumental(converted_vocals: Path, instrumental: Path, output_file: Path): """ Merge converted vocals with instrumental Returns: bool: Success status """ try: vocals_audio = AudioSegment.from_file(str(converted_vocals)) instrumental_audio = AudioSegment.from_file(str(instrumental)) # Normalize sample rates target_sr = 44100 if vocals_audio.frame_rate != target_sr: vocals_audio = vocals_audio.set_frame_rate(target_sr) if instrumental_audio.frame_rate != target_sr: instrumental_audio = instrumental_audio.set_frame_rate(target_sr) # Stereo if vocals_audio.channels == 1: vocals_audio = vocals_audio.set_channels(2) if instrumental_audio.channels == 1: instrumental_audio = instrumental_audio.set_channels(2) # Match lengths vocals_len = len(vocals_audio) instrumental_len = len(instrumental_audio) if vocals_len > instrumental_len: vocals_audio = vocals_audio[:instrumental_len] elif instrumental_len > vocals_len: instrumental_audio = instrumental_audio[:vocals_len] # Adjust vocal volume (slightly lower than instrumental) vocals_dBFS = vocals_audio.dBFS instrumental_dBFS = instrumental_audio.dBFS target_vocals_dBFS = instrumental_dBFS - 4.0 if vocals_dBFS > target_vocals_dBFS: vocals_audio = vocals_audio + (target_vocals_dBFS - vocals_dBFS) # Mix mixed_audio = instrumental_audio.overlay(vocals_audio) # Normalize final volume mixed_dBFS = mixed_audio.dBFS if mixed_dBFS < -25: mixed_audio = mixed_audio + (-20.0 - mixed_dBFS) elif mixed_dBFS > -10: mixed_audio = mixed_audio + (-20.0 - mixed_dBFS) # Export output_file = Path(output_file) output_file.parent.mkdir(parents=True, exist_ok=True) mixed_audio.export(str(output_file), format="wav") return True except Exception as e: print(f"❌ Merge failed: {e}") traceback.print_exc() return False def optimize_audio(input_file: Path, output_file: Path): """Optimize audio quality""" try: audio = AudioSegment.from_file(str(input_file)) if audio.frame_rate != 44100: audio = audio.set_frame_rate(44100) if audio.channels == 1: audio = audio.set_channels(2) # Normalize volume dBFS = audio.dBFS if dBFS < -30 or dBFS > -10: audio = audio + (-20.0 - dBFS) output_file = Path(output_file) output_file.parent.mkdir(parents=True, exist_ok=True) audio.export(str(output_file), format="wav") return True except Exception as e: print(f"Audio optimization failed: {e}") shutil.copy2(input_file, output_file) return False # ============ Simple Mix (Fallback) ============ def simple_mix_audio(voice_file: Path, background_music: Path, output_file: Path, voice_volume_db: float = 0.0, music_volume_db: float = -10.0): """ Simple mix mode: overlay voice on background music """ try: voice_audio = AudioSegment.from_file(str(voice_file)) music_audio = AudioSegment.from_file(str(background_music)) if voice_volume_db != 0.0: voice_audio = voice_audio + voice_volume_db if music_volume_db != 0.0: music_audio = music_audio + music_volume_db # Match lengths voice_len = len(voice_audio) music_len = len(music_audio) if voice_len > music_len: voice_audio = voice_audio[:music_len] elif music_len > voice_len: num_loops = (music_len // voice_len) + 1 voice_audio = voice_audio * num_loops voice_audio = voice_audio[:music_len] mixed_audio = music_audio.overlay(voice_audio) output_file = Path(output_file) output_file.parent.mkdir(parents=True, exist_ok=True) mixed_audio.export(str(output_file), format="wav") return True except Exception as e: print(f"Simple mix failed: {e}") return False