github-actions[bot] commited on
Commit
61f32ab
·
1 Parent(s): 207cecb

Sync from GitHub: d66c6dbf67237f862d611254033ad1302afdafd1

Browse files
Files changed (1) hide show
  1. services/audio_separator.py +67 -54
services/audio_separator.py CHANGED
@@ -142,69 +142,82 @@ class AudioSeparator:
142
  self._save_audio(source, sr, path)
143
  return {"guitar_rhythm": path, "guitar_lead": path}
144
 
145
- # MID-SIDE PROCESSING
146
- # Mid = (L + R) / 2 -> Center content (usually rhythm)
147
- # Side = (L - R) / 2 -> Stereo difference (usually lead)
148
- left = source[0:1, :] # (1, samples)
149
- right = source[1:2, :] # (1, samples)
150
-
151
- mid = (left + right) / 2.0 # Center content -> Rhythm
152
- side = (left - right) / 2.0 # Stereo diff -> Lead
153
-
154
- # Apply subtle frequency filtering for better separation
155
- # Rhythm: Emphasize low-mid (100-2000Hz) for chunky rhythm tones
156
- # Lead: Emphasize mid-high (800-8000Hz) for melodic clarity
157
- try:
158
- import scipy.signal as signal
 
159
 
160
- # Design filters
161
- nyquist = sr / 2
162
-
163
- # Rhythm: Low-pass + slight presence boost (keep fundamentals)
164
- rhythm_lowcut = 80 / nyquist
165
- rhythm_highcut = 4000 / nyquist
166
- b_rhythm, a_rhythm = signal.butter(4, [rhythm_lowcut, rhythm_highcut], btype='band')
167
-
168
- # Lead: Band-pass for melodic range
169
- lead_lowcut = 200 / nyquist
170
- lead_highcut = 8000 / nyquist
171
- b_lead, a_lead = signal.butter(4, [lead_lowcut, lead_highcut], btype='band')
 
172
 
173
- # Apply filters
174
- mid_filtered = signal.filtfilt(b_rhythm, a_rhythm, mid.numpy())
175
- side_filtered = signal.filtfilt(b_lead, a_lead, side.numpy())
176
 
177
- mid = torch.from_numpy(mid_filtered).float()
178
- side = torch.from_numpy(side_filtered).float()
179
- except Exception as e:
180
- print(f"Warning: Frequency filtering failed ({e}), using raw Mid-Side")
181
-
182
- # Make stereo for output (center both)
183
- # SWAPPED: Side = Rhythm (strumming often panned wide), Mid = Lead (melody often center)
184
- rhythm_stereo = torch.cat([side, side], dim=0)
185
- lead_stereo = torch.cat([mid, mid], dim=0)
186
-
187
- # If side is too quiet (song has no stereo separation), mix some mid into lead
188
- side_rms = torch.sqrt(torch.mean(side ** 2))
189
- mid_rms = torch.sqrt(torch.mean(mid ** 2))
190
-
191
- if side_rms < mid_rms * 0.1: # Side is <10% of mid -> almost mono mix
192
- print("Notice: Audio appears to be mostly mono. Rhythm separation may be limited.")
193
- # Create pseudo-separation using low frequencies for rhythm
194
  try:
195
- rhythm_lowpass = 2000 / nyquist
196
- b_lp, a_lp = signal.butter(4, rhythm_lowpass, btype='low')
197
- rhythm_from_mid = signal.filtfilt(b_lp, a_lp, mid.numpy())
198
- rhythm_stereo = torch.from_numpy(rhythm_from_mid).float()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  rhythm_stereo = torch.cat([rhythm_stereo, rhythm_stereo], dim=0)
200
- except:
201
- pass
 
 
 
 
 
 
202
 
203
- # Normalize to -3dB to prevent clipping
204
  def normalize(tensor):
205
  peak = tensor.abs().max()
206
  if peak > 0:
207
- target_peak = 0.707 # -3dB
208
  return tensor * (target_peak / peak)
209
  return tensor
210
 
 
142
  self._save_audio(source, sr, path)
143
  return {"guitar_rhythm": path, "guitar_lead": path}
144
 
145
+ # 1. Smart Spatial Split Check
146
+ # Calculate correlation between L and R to detect Hard Panning (Math Rock Style)
147
+ # If correlation is low, it means L and R are playing different things.
148
+ # We assume Left = Rhythm (often dropped D/lower), Right = Lead (often ornate/higher) - OR provide both as is.
149
+
150
+ # Calculate cross-correlation at lag 0
151
+ mean_l = left.mean()
152
+ mean_r = right.mean()
153
+ var_l = ((left - mean_l)**2).mean()
154
+ var_r = ((right - mean_r)**2).mean()
155
+ cov = ((left - mean_l) * (right - mean_r)).mean()
156
+
157
+ correlation = 0.0
158
+ if var_l > 0 and var_r > 0:
159
+ correlation = cov / torch.sqrt(var_l * var_r)
160
 
161
+ print(f"Guitar Stereo Correlation: {correlation:.4f}")
162
+
163
+ # Threshold for "Wide Stereo"
164
+ if abs(correlation) < 0.6:
165
+ print("Detected Wide Stereo Guitar (Math Rock Style). Using Spatial Split (L=Rhythm, R=Lead).")
166
+ # Force Hard Split
167
+ # Rhythm = Left Channel (Duplicated to Stereo)
168
+ # Lead = Right Channel (Duplicated to Stereo)
169
+ rhythm_stereo = torch.cat([left, left], dim=0)
170
+ lead_stereo = torch.cat([right, right], dim=0)
171
+ else:
172
+ print("Detected Narrow/Mono Guitar. Using Mid-Side Frequency Split.")
173
+ # Standard Mid-Side with Tighter Filters
174
 
175
+ mid = (left + right) / 2.0
176
+ side = (left - right) / 2.0
 
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  try:
179
+ import scipy.signal as signal
180
+ nyquist = sr / 2
181
+
182
+ # Rhythm: Low-Mid focus (80-1500Hz) - tighter top end
183
+ # To distinguish from lead which often occupies 800+
184
+ rhythm_low = 80 / nyquist
185
+ rhythm_high = 1200 / nyquist
186
+ b_r, a_r = signal.butter(4, [rhythm_low, rhythm_high], btype='band')
187
+
188
+ # Lead: High-Mid focus (1000-8000Hz)
189
+ lead_low = 1000 / nyquist
190
+ lead_high = 8000 / nyquist
191
+ b_l, a_l = signal.butter(4, [lead_low, lead_high], btype='band')
192
+
193
+ # Apply to Mid (Center info usually has both, but we try to separate by freq)
194
+ # We interpret 'Mid' as the main source.
195
+ rhythm_from_mid = signal.filtfilt(b_r, a_r, mid.numpy())
196
+ lead_from_mid = signal.filtfilt(b_l, a_l, mid.numpy())
197
+
198
+ # Reconstruct
199
+ # If Side exists (some stereo), add it to Lead (often spatial effects are on lead)
200
+ side_np = side.numpy()
201
+
202
+ rhythm_final = rhythm_from_mid
203
+ lead_final = lead_from_mid + (side_np * 1.5) # Boost side for lead
204
+
205
+ rhythm_stereo = torch.from_numpy(rhythm_final).float()
206
  rhythm_stereo = torch.cat([rhythm_stereo, rhythm_stereo], dim=0)
207
+
208
+ lead_stereo = torch.from_numpy(lead_final).float()
209
+ lead_stereo = torch.cat([lead_stereo, lead_stereo], dim=0)
210
+
211
+ except Exception as e:
212
+ print(f"Filter failed: {e}. Fallback to raw.")
213
+ rhythm_stereo = torch.cat([left, left], dim=0)
214
+ lead_stereo = torch.cat([right, right], dim=0)
215
 
216
+ # Normalize
217
  def normalize(tensor):
218
  peak = tensor.abs().max()
219
  if peak > 0:
220
+ target_peak = 0.89 # -1dB
221
  return tensor * (target_peak / peak)
222
  return tensor
223