Xujia Yang commited on
Commit
579f9c3
·
verified ·
1 Parent(s): 83e8d73

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +215 -0
utils.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils.py - Combined audio utilities for 9year Anniversary App
2
+ """
3
+ Audio processing utilities:
4
+ - Voice/instrumental separation (Demucs)
5
+ - Voice conversion (RVC)
6
+ - Audio mixing and synthesis
7
+ """
8
+
9
+ from pathlib import Path
10
+ from pydub import AudioSegment
11
+ import subprocess
12
+ import sys
13
+ import os
14
+ import tempfile
15
+ import shutil
16
+ import traceback
17
+
18
+ # ============ Audio Utils ============
19
+ def load_audio(file_path):
20
+ """Load audio file"""
21
+ import librosa
22
+ audio, sr = librosa.load(file_path, sr=None)
23
+ return audio, sr
24
+
25
+ def save_audio(file_path, audio, sr):
26
+ """Save audio file"""
27
+ import soundfile as sf
28
+ sf.write(file_path, audio, sr)
29
+
30
+ # ============ Vocal Separation ============
31
+ def separate_vocals_and_instrumental(input_audio: Path, output_dir: Path):
32
+ """
33
+ Separate vocals and instrumental using Demucs
34
+
35
+ Returns:
36
+ tuple: (vocals_path, instrumental_path) or (None, None) if failed
37
+ """
38
+ output_dir = Path(output_dir)
39
+ output_dir.mkdir(parents=True, exist_ok=True)
40
+
41
+ vocals_path = output_dir / "vocals.wav"
42
+ instrumental_path = output_dir / "instrumental.wav"
43
+
44
+ try:
45
+ print("🎵 Starting vocal separation with Demucs...")
46
+
47
+ cmd = [
48
+ sys.executable, "-m", "demucs",
49
+ "--two-stems", "vocals",
50
+ "-n", "htdemucs",
51
+ "-o", str(output_dir),
52
+ str(input_audio)
53
+ ]
54
+
55
+ result = subprocess.run(
56
+ cmd,
57
+ capture_output=True,
58
+ text=True,
59
+ timeout=600 # 10 minute timeout
60
+ )
61
+
62
+ # Find output files
63
+ input_stem = Path(input_audio).stem
64
+ demucs_output = output_dir / "htdemucs" / input_stem
65
+
66
+ if demucs_output.exists():
67
+ vocals_file = demucs_output / "vocals.wav"
68
+ no_vocals_file = demucs_output / "no_vocals.wav"
69
+
70
+ if vocals_file.exists() and no_vocals_file.exists():
71
+ shutil.move(str(vocals_file), str(vocals_path))
72
+ shutil.move(str(no_vocals_file), str(instrumental_path))
73
+
74
+ # Cleanup
75
+ shutil.rmtree(output_dir / "htdemucs", ignore_errors=True)
76
+
77
+ print(f"✅ Separation complete!")
78
+ return vocals_path, instrumental_path
79
+
80
+ print(f"⚠️ Demucs output not found")
81
+ return None, None
82
+
83
+ except subprocess.TimeoutExpired:
84
+ print("⚠️ Demucs timeout, skipping separation")
85
+ return None, None
86
+ except Exception as e:
87
+ print(f"⚠️ Separation failed: {e}")
88
+ return None, None
89
+
90
+ # ============ Audio Merging ============
91
+ def merge_vocals_and_instrumental(converted_vocals: Path, instrumental: Path, output_file: Path):
92
+ """
93
+ Merge converted vocals with instrumental
94
+
95
+ Returns:
96
+ bool: Success status
97
+ """
98
+ try:
99
+ vocals_audio = AudioSegment.from_file(str(converted_vocals))
100
+ instrumental_audio = AudioSegment.from_file(str(instrumental))
101
+
102
+ # Normalize sample rates
103
+ target_sr = 44100
104
+ if vocals_audio.frame_rate != target_sr:
105
+ vocals_audio = vocals_audio.set_frame_rate(target_sr)
106
+ if instrumental_audio.frame_rate != target_sr:
107
+ instrumental_audio = instrumental_audio.set_frame_rate(target_sr)
108
+
109
+ # Stereo
110
+ if vocals_audio.channels == 1:
111
+ vocals_audio = vocals_audio.set_channels(2)
112
+ if instrumental_audio.channels == 1:
113
+ instrumental_audio = instrumental_audio.set_channels(2)
114
+
115
+ # Match lengths
116
+ vocals_len = len(vocals_audio)
117
+ instrumental_len = len(instrumental_audio)
118
+
119
+ if vocals_len > instrumental_len:
120
+ vocals_audio = vocals_audio[:instrumental_len]
121
+ elif instrumental_len > vocals_len:
122
+ instrumental_audio = instrumental_audio[:vocals_len]
123
+
124
+ # Adjust vocal volume (slightly lower than instrumental)
125
+ vocals_dBFS = vocals_audio.dBFS
126
+ instrumental_dBFS = instrumental_audio.dBFS
127
+ target_vocals_dBFS = instrumental_dBFS - 4.0
128
+
129
+ if vocals_dBFS > target_vocals_dBFS:
130
+ vocals_audio = vocals_audio + (target_vocals_dBFS - vocals_dBFS)
131
+
132
+ # Mix
133
+ mixed_audio = instrumental_audio.overlay(vocals_audio)
134
+
135
+ # Normalize final volume
136
+ mixed_dBFS = mixed_audio.dBFS
137
+ if mixed_dBFS < -25:
138
+ mixed_audio = mixed_audio + (-20.0 - mixed_dBFS)
139
+ elif mixed_dBFS > -10:
140
+ mixed_audio = mixed_audio + (-20.0 - mixed_dBFS)
141
+
142
+ # Export
143
+ output_file = Path(output_file)
144
+ output_file.parent.mkdir(parents=True, exist_ok=True)
145
+ mixed_audio.export(str(output_file), format="wav")
146
+
147
+ return True
148
+
149
+ except Exception as e:
150
+ print(f"❌ Merge failed: {e}")
151
+ traceback.print_exc()
152
+ return False
153
+
154
+ def optimize_audio(input_file: Path, output_file: Path):
155
+ """Optimize audio quality"""
156
+ try:
157
+ audio = AudioSegment.from_file(str(input_file))
158
+
159
+ if audio.frame_rate != 44100:
160
+ audio = audio.set_frame_rate(44100)
161
+ if audio.channels == 1:
162
+ audio = audio.set_channels(2)
163
+
164
+ # Normalize volume
165
+ dBFS = audio.dBFS
166
+ if dBFS < -30 or dBFS > -10:
167
+ audio = audio + (-20.0 - dBFS)
168
+
169
+ output_file = Path(output_file)
170
+ output_file.parent.mkdir(parents=True, exist_ok=True)
171
+ audio.export(str(output_file), format="wav")
172
+
173
+ return True
174
+ except Exception as e:
175
+ print(f"Audio optimization failed: {e}")
176
+ shutil.copy2(input_file, output_file)
177
+ return False
178
+
179
+ # ============ Simple Mix (Fallback) ============
180
+ def simple_mix_audio(voice_file: Path, background_music: Path, output_file: Path,
181
+ voice_volume_db: float = 0.0, music_volume_db: float = -10.0):
182
+ """
183
+ Simple mix mode: overlay voice on background music
184
+ """
185
+ try:
186
+ voice_audio = AudioSegment.from_file(str(voice_file))
187
+ music_audio = AudioSegment.from_file(str(background_music))
188
+
189
+ if voice_volume_db != 0.0:
190
+ voice_audio = voice_audio + voice_volume_db
191
+ if music_volume_db != 0.0:
192
+ music_audio = music_audio + music_volume_db
193
+
194
+ # Match lengths
195
+ voice_len = len(voice_audio)
196
+ music_len = len(music_audio)
197
+
198
+ if voice_len > music_len:
199
+ voice_audio = voice_audio[:music_len]
200
+ elif music_len > voice_len:
201
+ num_loops = (music_len // voice_len) + 1
202
+ voice_audio = voice_audio * num_loops
203
+ voice_audio = voice_audio[:music_len]
204
+
205
+ mixed_audio = music_audio.overlay(voice_audio)
206
+
207
+ output_file = Path(output_file)
208
+ output_file.parent.mkdir(parents=True, exist_ok=True)
209
+ mixed_audio.export(str(output_file), format="wav")
210
+
211
+ return True
212
+
213
+ except Exception as e:
214
+ print(f"Simple mix failed: {e}")
215
+ return False