latterworks commited on
Commit
802da7d
Β·
verified Β·
1 Parent(s): ac57520

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +453 -1006
app.py CHANGED
@@ -1,1164 +1,611 @@
1
- #!/usr/bin/env python3
2
- """
3
- Live Audio Singing Helper - Production Grade
4
- Advanced audio processing tool for singers and musicians
5
- Author: Lead Developer
6
- Version: 2.0.0
7
- """
8
-
9
  import gradio as gr
10
  import librosa
11
  import numpy as np
12
  import soundfile as sf
13
- from spleeter.separator import Separator
14
  import os
15
- import sys
16
- import shutil
17
  import tempfile
18
- import scipy.signal
19
- import matplotlib.pyplot as plt
20
- import traceback
21
- import logging
22
- import gc
23
  from pathlib import Path
24
- from typing import Tuple, Optional, Dict, Any, List, Union
25
- from dataclasses import dataclass
26
- from contextlib import contextmanager
27
  import warnings
28
  warnings.filterwarnings("ignore")
29
 
30
- # Style coaching imports
31
- from scipy.spatial.distance import euclidean
32
- from dtw import dtw
33
-
34
- # Configure logging
35
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
36
- logger = logging.getLogger(__name__)
37
 
38
- # Constants
39
- MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
40
- SUPPORTED_FORMATS = ['.mp3', '.wav', '.flac', '.m4a', '.ogg', '.aac']
41
- MAX_DURATION = 600 # 10 minutes
42
- TEMP_DIR_PREFIX = "audio_helper_"
43
- VERSION = "2.0.0"
 
 
44
 
45
- @dataclass
46
- class ProcessingResult:
47
- """Structured result container for audio processing operations"""
48
- success: bool
49
- message: str
50
- data: Optional[Dict[str, Any]] = None
51
- error: Optional[str] = None
52
-
53
- class AudioProcessorPro:
54
- """Professional-grade audio processor with comprehensive error handling and optimization"""
55
 
56
  def __init__(self):
57
- self.separator_2stems = None
58
- self.separator_4stems = None
59
- self.temp_dir = None
60
- self.session_id = None
61
- self._initialize_session()
62
-
63
- def _initialize_session(self):
64
- """Initialize processing session with proper cleanup"""
65
- try:
66
- self.session_id = f"{TEMP_DIR_PREFIX}{np.random.randint(100000)}"
67
- self.temp_dir = tempfile.mkdtemp(prefix=self.session_id)
68
- logger.info(f"Session initialized: {self.session_id}")
69
- except Exception as e:
70
- logger.error(f"Session initialization failed: {e}")
71
- raise
72
-
73
- @contextmanager
74
- def _safe_processing(self, operation_name: str):
75
- """Context manager for safe processing with automatic cleanup"""
76
- logger.info(f"Starting {operation_name}")
77
- try:
78
- yield
79
- logger.info(f"Completed {operation_name}")
80
- except Exception as e:
81
- logger.error(f"Error in {operation_name}: {e}")
82
- raise
83
- finally:
84
- gc.collect() # Force garbage collection
85
 
86
- def validate_audio_file(self, audio_path: str) -> ProcessingResult:
87
- """Comprehensive audio file validation"""
88
  try:
89
- if not audio_path or not os.path.exists(audio_path):
90
- return ProcessingResult(False, "Audio file not found")
91
 
92
- # Check file size
93
- file_size = os.path.getsize(audio_path)
94
- if file_size > MAX_FILE_SIZE:
95
- return ProcessingResult(False, f"File too large. Max size: {MAX_FILE_SIZE//1024//1024}MB")
96
 
97
- # Check file format
98
- file_ext = Path(audio_path).suffix.lower()
99
- if file_ext not in SUPPORTED_FORMATS:
100
- return ProcessingResult(False, f"Unsupported format. Supported: {', '.join(SUPPORTED_FORMATS)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
- # Check audio properties
103
- try:
104
- y, sr = librosa.load(audio_path, duration=1.0) # Load first second for validation
105
- duration = librosa.get_duration(filename=audio_path)
106
-
107
- if duration > MAX_DURATION:
108
- return ProcessingResult(False, f"Audio too long. Max duration: {MAX_DURATION//60} minutes")
109
-
110
- if sr < 8000:
111
- return ProcessingResult(False, "Sample rate too low (minimum 8kHz)")
112
-
113
- return ProcessingResult(True, "File validation passed", {
114
- 'duration': duration,
115
- 'sample_rate': sr,
116
- 'file_size': file_size
117
- })
118
-
119
- except Exception as e:
120
- return ProcessingResult(False, f"Invalid audio file: {str(e)}")
121
-
122
  except Exception as e:
123
- return ProcessingResult(False, f"Validation error: {str(e)}")
124
 
125
- def get_separator(self, stems: int = 2) -> Separator:
126
- """Lazy load and cache Spleeter models"""
 
 
 
127
  try:
128
- if stems == 2:
129
- if self.separator_2stems is None:
130
- logger.info("Loading Spleeter 2-stem model...")
131
- self.separator_2stems = Separator("spleeter:2stems-16kHz")
132
- return self.separator_2stems
133
- elif stems == 4:
134
- if self.separator_4stems is None:
135
- logger.info("Loading Spleeter 4-stem model...")
136
- self.separator_4stems = Separator("spleeter:4stems-16kHz")
137
- return self.separator_4stems
138
- else:
139
- raise ValueError(f"Unsupported stem count: {stems}")
140
- except Exception as e:
141
- logger.error(f"Failed to load Spleeter model: {e}")
142
- raise
143
-
144
- def extract_comprehensive_features(self, audio_path: str) -> ProcessingResult:
145
- """Extract comprehensive audio features with proper error handling"""
146
- with self._safe_processing("feature_extraction"):
147
- try:
148
- validation = self.validate_audio_file(audio_path)
149
- if not validation.success:
150
- return validation
151
-
152
- y, sr = librosa.load(audio_path, sr=None)
153
- duration = librosa.get_duration(y=y, sr=sr)
154
-
155
- # Core features
156
- tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
157
-
158
- # Spectral features
159
- spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
160
- spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
161
- spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
162
- zero_crossing_rate = librosa.feature.zero_crossing_rate(y)[0]
163
-
164
- # Timbral features
165
- mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
166
- chroma = librosa.feature.chroma_stft(y=y, sr=sr)
167
-
168
- # Dynamic features
169
- rms = librosa.feature.rms(y=y)[0]
170
-
171
- # Pitch estimation
172
- pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
173
- pitch_values = []
174
- for t in range(pitches.shape[1]):
175
- index = magnitudes[:, t].argmax()
176
- pitch = pitches[index, t]
177
- if pitch > 0:
178
- pitch_values.append(pitch)
179
-
180
- features = {
181
- # Basic properties
182
- 'duration': round(duration, 2),
183
- 'sample_rate': sr,
184
- 'file_size': validation.data['file_size'],
185
-
186
- # Rhythm and tempo
187
- 'tempo': round(tempo, 1),
188
- 'num_beats': len(beats),
189
- 'rhythm_regularity': round(np.std(np.diff(beats)), 3),
190
-
191
- # Spectral characteristics
192
- 'spectral_centroid_mean': round(np.mean(spectral_centroids), 2),
193
- 'spectral_centroid_std': round(np.std(spectral_centroids), 2),
194
- 'spectral_rolloff_mean': round(np.mean(spectral_rolloff), 2),
195
- 'spectral_bandwidth_mean': round(np.mean(spectral_bandwidth), 2),
196
- 'zero_crossing_rate_mean': round(np.mean(zero_crossing_rate), 4),
197
-
198
- # Dynamic properties
199
- 'rms_energy_mean': round(np.mean(rms), 4),
200
- 'rms_energy_std': round(np.std(rms), 4),
201
- 'dynamic_range': round(np.max(rms) - np.min(rms), 4),
202
-
203
- # Pitch information
204
- 'pitch_mean': round(np.mean(pitch_values), 2) if pitch_values else 0,
205
- 'pitch_std': round(np.std(pitch_values), 2) if pitch_values else 0,
206
- 'pitch_range': round(max(pitch_values) - min(pitch_values), 2) if len(pitch_values) > 1 else 0,
207
-
208
- # Timbral features (for advanced analysis)
209
- 'mfcc_mean': np.round(np.mean(mfccs, axis=1), 3).tolist(),
210
- 'chroma_mean': np.round(np.mean(chroma, axis=1), 3).tolist(),
211
-
212
- # Quality metrics
213
- 'signal_to_noise_ratio': round(20 * np.log10(np.mean(rms) / (np.std(rms) + 1e-10)), 2)
214
  }
215
-
216
- return ProcessingResult(True, "Feature extraction successful", features)
217
-
218
- except Exception as e:
219
- logger.error(f"Feature extraction failed: {e}")
220
- return ProcessingResult(False, f"Feature extraction failed: {str(e)}")
221
-
222
- def separate_audio_sources(self, audio_path: str, stems: int = 2,
223
- progress_callback=None) -> ProcessingResult:
224
- """Professional audio source separation with progress tracking"""
225
- with self._safe_processing(f"{stems}-stem_separation"):
226
- try:
227
- validation = self.validate_audio_file(audio_path)
228
- if not validation.success:
229
- return validation
230
-
231
- if progress_callback:
232
- progress_callback(0.1, "Initializing separator...")
233
-
234
- separator = self.get_separator(stems)
235
-
236
- if progress_callback:
237
- progress_callback(0.3, "Loading audio...")
238
-
239
- # Create unique output directory
240
- output_dir = os.path.join(self.temp_dir, f"separation_{np.random.randint(10000)}")
241
- os.makedirs(output_dir, exist_ok=True)
242
-
243
- if progress_callback:
244
- progress_callback(0.5, "Separating audio sources...")
245
-
246
- # Perform separation
247
- separator.separate_to_file(audio_path, output_dir)
248
-
249
- if progress_callback:
250
- progress_callback(0.8, "Processing results...")
251
-
252
- # Get separated files
253
- base_name = os.path.splitext(os.path.basename(audio_path))[0]
254
- result_dir = os.path.join(output_dir, base_name)
255
-
256
- separated_files = {}
257
- if stems == 2:
258
- separated_files = {
259
- 'vocals': os.path.join(result_dir, "vocals.wav"),
260
- 'accompaniment': os.path.join(result_dir, "accompaniment.wav")
261
- }
262
- else: # 4 stems
263
- separated_files = {
264
- 'vocals': os.path.join(result_dir, "vocals.wav"),
265
- 'drums': os.path.join(result_dir, "drums.wav"),
266
- 'bass': os.path.join(result_dir, "bass.wav"),
267
- 'other': os.path.join(result_dir, "other.wav")
268
- }
269
-
270
- # Verify all files exist
271
- missing_files = [k for k, v in separated_files.items() if not os.path.exists(v)]
272
- if missing_files:
273
- return ProcessingResult(False, f"Separation incomplete. Missing: {missing_files}")
274
-
275
- if progress_callback:
276
- progress_callback(1.0, "Separation complete!")
277
-
278
- return ProcessingResult(True, f"βœ… {stems}-stem separation successful!", separated_files)
279
-
280
- except Exception as e:
281
- logger.error(f"Audio separation failed: {e}")
282
- return ProcessingResult(False, f"Separation failed: {str(e)}")
283
-
284
- def apply_vocal_effects(self, audio_path: str, effects_config: Dict[str, float]) -> ProcessingResult:
285
- """Apply vocal effects with comprehensive options"""
286
- with self._safe_processing("vocal_effects"):
287
- try:
288
- validation = self.validate_audio_file(audio_path)
289
- if not validation.success:
290
- return validation
291
-
292
- y, sr = librosa.load(audio_path, sr=None)
293
- original_y = y.copy()
294
-
295
- # Apply pitch shifting
296
- pitch_shift = effects_config.get('pitch_shift', 0)
297
- if pitch_shift != 0:
298
- y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
299
-
300
- # Apply reverb
301
- reverb_amount = effects_config.get('reverb', 0)
302
- if reverb_amount > 0:
303
- reverb_length = int(0.5 * sr)
304
- impulse = np.random.randn(reverb_length) * np.exp(-np.arange(reverb_length) / (sr * 0.1))
305
- impulse *= reverb_amount
306
- y = scipy.signal.convolve(y, impulse, mode='same')
307
-
308
- # Apply chorus effect
309
- chorus_amount = effects_config.get('chorus', 0)
310
- if chorus_amount > 0:
311
- delay_samples = int(0.02 * sr) # 20ms delay
312
- delayed = np.pad(original_y, (delay_samples, 0), mode='constant')[:len(y)]
313
- y = y + chorus_amount * delayed
314
-
315
- # Apply compression
316
- compression = effects_config.get('compression', 0)
317
- if compression > 0:
318
- threshold = 0.1
319
- ratio = 1 + compression * 9 # 1:1 to 10:1 ratio
320
- mask = np.abs(y) > threshold
321
- y[mask] = np.sign(y[mask]) * (threshold + (np.abs(y[mask]) - threshold) / ratio)
322
-
323
- # Normalize to prevent clipping
324
- if np.max(np.abs(y)) > 0:
325
- y = y / np.max(np.abs(y)) * 0.95
326
-
327
- # Save processed audio
328
- output_path = os.path.join(self.temp_dir, f"processed_{np.random.randint(10000)}.wav")
329
- sf.write(output_path, y, sr)
330
-
331
- effects_applied = [k for k, v in effects_config.items() if v != 0]
332
-
333
- return ProcessingResult(True, f"Effects applied: {', '.join(effects_applied)}", {
334
- 'output_path': output_path,
335
- 'effects_applied': effects_applied
336
- })
337
-
338
- except Exception as e:
339
- logger.error(f"Effects processing failed: {e}")
340
- return ProcessingResult(False, f"Effects processing failed: {str(e)}")
341
-
342
- def cleanup_session(self):
343
- """Clean up temporary files and release resources"""
344
- try:
345
- if self.temp_dir and os.path.exists(self.temp_dir):
346
- shutil.rmtree(self.temp_dir)
347
- logger.info(f"Cleaned up session: {self.session_id}")
348
  except Exception as e:
349
- logger.warning(f"Cleanup warning: {e}")
350
-
351
- class StyleCoachingEngine:
352
- """Advanced vocal style coaching system"""
353
 
354
- def __init__(self, processor: AudioProcessorPro):
355
- self.processor = processor
356
-
357
- def extract_vocal_features(self, audio_path: str) -> ProcessingResult:
358
- """Extract detailed vocal-specific features"""
359
  try:
360
- with self.processor._safe_processing("vocal_feature_extraction"):
361
- validation = self.processor.validate_audio_file(audio_path)
362
- if not validation.success:
363
- return validation
364
-
365
- y, sr = librosa.load(audio_path, sr=None)
366
-
367
- # Advanced pitch analysis
368
- f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=librosa.note_to_hz('C2'),
369
- fmax=librosa.note_to_hz('C7'))
370
- f0_clean = f0[voiced_flag]
371
-
372
- # Formant analysis (simplified)
373
- mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
374
-
375
- # Vibrato analysis
376
- if len(f0_clean) > 10:
377
- f0_smooth = scipy.signal.medfilt(f0_clean, kernel_size=5)
378
- vibrato_extent = np.std(f0_clean - f0_smooth)
379
- else:
380
- vibrato_extent = 0
381
-
382
- # Vocal effort estimation
383
- spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
384
- vocal_effort = np.mean(spectral_centroid) / 1000 # Normalized measure
385
-
386
- features = {
387
- 'fundamental_frequency_mean': np.nanmean(f0_clean) if len(f0_clean) > 0 else 0,
388
- 'fundamental_frequency_std': np.nanstd(f0_clean) if len(f0_clean) > 0 else 0,
389
- 'pitch_range': np.nanmax(f0_clean) - np.nanmin(f0_clean) if len(f0_clean) > 0 else 0,
390
- 'vibrato_extent': vibrato_extent,
391
- 'vocal_effort': vocal_effort,
392
- 'voiced_percentage': np.mean(voiced_flag) * 100,
393
- 'mfccs': mfccs,
394
- 'duration': librosa.get_duration(y=y, sr=sr)
395
- }
396
-
397
- return ProcessingResult(True, "Vocal features extracted", features)
398
-
399
  except Exception as e:
400
- logger.error(f"Vocal feature extraction failed: {e}")
401
- return ProcessingResult(False, f"Vocal feature extraction failed: {str(e)}")
402
 
403
- def build_style_profile(self, reference_features: List[Dict]) -> ProcessingResult:
404
- """Build comprehensive style profile from reference tracks"""
405
  try:
406
- if len(reference_features) < 2:
407
- return ProcessingResult(False, "Need at least 2 reference tracks")
408
 
409
- valid_features = [f for f in reference_features if f is not None]
410
- if len(valid_features) < 2:
411
- return ProcessingResult(False, "Not enough valid reference features")
 
 
 
 
 
412
 
413
- # Aggregate features
414
- profile = {}
415
- for key in valid_features[0].keys():
416
- if key != 'mfccs': # Handle MFCCs separately
417
- values = [f[key] for f in valid_features if key in f and f[key] is not None]
418
- if values:
419
- profile[key] = np.mean(values)
420
- profile[f'{key}_std'] = np.std(values)
421
 
422
- # Handle MFCCs
423
- mfcc_arrays = [f['mfccs'] for f in valid_features if 'mfccs' in f]
424
- if mfcc_arrays:
425
- profile['mfccs'] = np.mean(mfcc_arrays, axis=0)
426
 
427
- profile['num_references'] = len(valid_features)
 
428
 
429
- return ProcessingResult(True, f"Style profile built from {len(valid_features)} references", profile)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
 
431
  except Exception as e:
432
- logger.error(f"Style profile creation failed: {e}")
433
- return ProcessingResult(False, f"Style profile creation failed: {str(e)}")
434
 
435
- def compare_performance(self, user_features: Dict, style_profile: Dict) -> ProcessingResult:
436
- """Compare user performance to style profile"""
 
 
 
437
  try:
438
- comparison = {}
 
 
 
 
439
 
440
- # Pitch comparison
441
- pitch_diff = abs(user_features.get('fundamental_frequency_mean', 0) -
442
- style_profile.get('fundamental_frequency_mean', 0))
443
- comparison['pitch_accuracy'] = max(0, 100 - (pitch_diff / 10)) # Score out of 100
 
444
 
445
- # Range comparison
446
- user_range = user_features.get('pitch_range', 0)
447
- target_range = style_profile.get('pitch_range', 0)
448
- range_ratio = min(user_range, target_range) / max(user_range, target_range) if max(user_range, target_range) > 0 else 0
449
- comparison['range_match'] = range_ratio * 100
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
- # Vibrato comparison
452
- user_vibrato = user_features.get('vibrato_extent', 0)
453
- target_vibrato = style_profile.get('vibrato_extent', 0)
454
- vibrato_diff = abs(user_vibrato - target_vibrato)
455
- comparison['vibrato_match'] = max(0, 100 - vibrato_diff * 50)
456
 
457
- # Overall style similarity
458
- comparison['overall_similarity'] = np.mean([
459
- comparison['pitch_accuracy'],
460
- comparison['range_match'],
461
- comparison['vibrato_match']
462
- ])
463
 
464
- return ProcessingResult(True, "Performance comparison complete", comparison)
 
 
 
 
 
 
 
 
 
 
465
 
466
  except Exception as e:
467
- logger.error(f"Performance comparison failed: {e}")
468
- return ProcessingResult(False, f"Performance comparison failed: {str(e)}")
 
 
 
 
 
 
 
469
 
470
- # Global processor instance
471
- processor = AudioProcessorPro()
472
- style_coach = StyleCoachingEngine(processor)
473
 
474
- def format_analysis_text(features: Dict[str, Any]) -> str:
475
  """Format analysis results for display"""
476
- if not features:
477
- return "No analysis data available"
478
 
479
- text = f"""πŸ“Š **Audio Analysis Results**
480
-
481
- 🎡 **Basic Properties**
482
- β€’ Duration: {features.get('duration', 'N/A')} seconds
483
- β€’ Sample Rate: {features.get('sample_rate', 'N/A')} Hz
484
- β€’ File Size: {features.get('file_size', 0) / 1024:.1f} KB
485
 
486
- 🎼 **Musical Characteristics**
487
- β€’ Tempo: {features.get('tempo', 'N/A')} BPM
488
- β€’ Beats Detected: {features.get('num_beats', 'N/A')}
489
- β€’ Rhythm Regularity: {features.get('rhythm_regularity', 'N/A')}
490
 
491
- πŸ”Š **Spectral Properties**
492
- β€’ Brightness (Spectral Centroid): {features.get('spectral_centroid_mean', 'N/A')} Hz
493
- β€’ Spectral Bandwidth: {features.get('spectral_bandwidth_mean', 'N/A')} Hz
494
- β€’ Zero Crossing Rate: {features.get('zero_crossing_rate_mean', 'N/A')}
 
495
 
496
- πŸ“ˆ **Dynamic Characteristics**
497
- β€’ Average Energy: {features.get('rms_energy_mean', 'N/A')}
498
- β€’ Dynamic Range: {features.get('dynamic_range', 'N/A')}
499
- β€’ Signal-to-Noise Ratio: {features.get('signal_to_noise_ratio', 'N/A')} dB
500
 
501
- 🎀 **Pitch Information**
502
- β€’ Average Pitch: {features.get('pitch_mean', 'N/A')} Hz
503
- β€’ Pitch Variation: {features.get('pitch_std', 'N/A')} Hz
504
- β€’ Pitch Range: {features.get('pitch_range', 'N/A')} Hz"""
505
-
506
- return text
507
-
508
- def process_audio_separation(audio_file, stems_mode, pitch_shift, reverb, chorus, compression):
509
- """Main audio separation processing function"""
510
  if not audio_file:
511
- return (
512
- "❌ Please upload an audio file",
513
- None, None, None, None,
514
- "No analysis available"
515
- )
516
 
517
  try:
518
- # Progress tracking
519
- progress_updates = []
520
-
521
- def progress_callback(progress, message):
522
- progress_updates.append(f"[{progress*100:.0f}%] {message}")
523
-
524
- # Analyze features first
525
- feature_result = processor.extract_comprehensive_features(audio_file)
526
- if not feature_result.success:
527
- return (
528
- f"❌ {feature_result.message}",
529
- None, None, None, None,
530
- feature_result.error or "Analysis failed"
531
- )
532
-
533
- analysis_text = format_analysis_text(feature_result.data)
534
 
535
  # Separate audio
536
- stems = 2 if stems_mode == "2-stem (Vocals + Instrumental)" else 4
537
- separation_result = processor.separate_audio_sources(audio_file, stems, progress_callback)
538
-
539
- if not separation_result.success:
540
- return (
541
- f"❌ {separation_result.message}",
542
- None, None, None, None,
543
- analysis_text
544
- )
545
-
546
- separated_files = separation_result.data
547
 
548
- # Apply effects to vocals if requested
549
- effects_config = {
550
- 'pitch_shift': pitch_shift,
551
- 'reverb': reverb,
552
- 'chorus': chorus,
553
- 'compression': compression
554
- }
555
 
556
- vocals_path = separated_files.get('vocals')
557
- if vocals_path and any(v != 0 for v in effects_config.values()):
558
- effects_result = processor.apply_vocal_effects(vocals_path, effects_config)
559
- if effects_result.success:
560
- vocals_path = effects_result.data['output_path']
561
- separation_result.message += f" | {effects_result.message}"
562
-
563
- # Prepare outputs based on stems
564
- if stems == 2:
565
  return (
566
- f"βœ… {separation_result.message}",
567
- vocals_path,
568
- separated_files.get('accompaniment'),
569
- None, None,
 
570
  analysis_text
571
  )
572
  else:
573
  return (
574
- f"βœ… {separation_result.message}",
575
- vocals_path,
576
- separated_files.get('drums'),
577
- separated_files.get('bass'),
578
- separated_files.get('other'),
579
  analysis_text
580
  )
581
 
582
  except Exception as e:
583
- logger.error(f"Audio separation processing failed: {e}")
584
- return (
585
- f"❌ Processing failed: {str(e)}",
586
- None, None, None, None,
587
- "Analysis failed due to processing error"
588
- )
589
 
590
- def process_live_recording(audio_file, pitch_shift, reverb, chorus, compression):
591
- """Process live recording with effects"""
592
  if not audio_file:
593
- return (
594
- "❌ Please record audio first",
595
- None,
596
- "No analysis available"
597
- )
598
 
599
  try:
600
- # Analyze features
601
- feature_result = processor.extract_comprehensive_features(audio_file)
602
- if not feature_result.success:
603
- return (
604
- f"❌ {feature_result.message}",
605
- None,
606
- feature_result.error or "Analysis failed"
607
- )
608
-
609
- analysis_text = format_analysis_text(feature_result.data)
610
 
611
  # Apply effects
612
- effects_config = {
613
- 'pitch_shift': pitch_shift,
614
- 'reverb': reverb,
615
- 'chorus': chorus,
616
- 'compression': compression
617
- }
618
 
619
- effects_result = processor.apply_vocal_effects(audio_file, effects_config)
620
- if not effects_result.success:
621
- return (
622
- f"❌ {effects_result.message}",
623
- None,
624
- analysis_text
625
- )
626
 
627
- return (
628
- f"βœ… {effects_result.message}",
629
- effects_result.data['output_path'],
630
- analysis_text
631
- )
 
 
 
 
632
 
633
  except Exception as e:
634
- logger.error(f"Live recording processing failed: {e}")
635
- return (
636
- f"❌ Processing failed: {str(e)}",
637
- None,
638
- "Analysis failed due to processing error"
639
- )
640
 
641
  def process_style_coaching(reference_files, user_audio):
642
- """Advanced style coaching analysis"""
643
  if not reference_files or len(reference_files) < 2:
644
- return (
645
- "❌ Please upload at least 2 reference tracks",
646
- "No references processed",
647
- "Upload reference tracks to get personalized coaching feedback"
648
- )
649
 
650
  if not user_audio:
651
- return (
652
- "❌ Please upload or record your performance",
653
- "No references processed",
654
- "Record your performance to compare with references"
655
- )
656
 
657
  try:
658
  # Process reference tracks
659
  ref_features = []
660
  ref_status = []
661
 
662
- for i, ref_file in enumerate(reference_files[:5]): # Limit to 5 references
663
- try:
664
- # Separate vocals from reference
665
- separation_result = processor.separate_audio_sources(ref_file.name, stems=2)
666
- if separation_result.success:
667
- vocals_path = separation_result.data.get('vocals')
668
- if vocals_path:
669
- # Extract vocal features
670
- vocal_result = style_coach.extract_vocal_features(vocals_path)
671
- if vocal_result.success:
672
- ref_features.append(vocal_result.data)
673
- ref_status.append(f"βœ… Reference {i+1}: Processed successfully")
674
- else:
675
- ref_status.append(f"❌ Reference {i+1}: Feature extraction failed")
676
- else:
677
- ref_status.append(f"❌ Reference {i+1}: Vocal separation failed")
678
  else:
679
- ref_status.append(f"❌ Reference {i+1}: {separation_result.message}")
680
- except Exception as e:
681
- ref_status.append(f"❌ Reference {i+1}: {str(e)}")
682
 
683
  if len(ref_features) < 2:
684
- return (
685
- "❌ Failed to process enough reference tracks",
686
- "\n".join(ref_status),
687
- "Need at least 2 valid reference tracks for style analysis"
688
- )
689
-
690
- # Build style profile
691
- profile_result = style_coach.build_style_profile(ref_features)
692
- if not profile_result.success:
693
- return (
694
- f"❌ {profile_result.message}",
695
- "\n".join(ref_status),
696
- "Style profile creation failed"
697
- )
698
 
699
  # Process user audio
700
- user_separation = processor.separate_audio_sources(user_audio, stems=2)
701
- if not user_separation.success:
702
- return (
703
- f"❌ Failed to process your audio: {user_separation.message}",
704
- "\n".join(ref_status),
705
- "Could not separate vocals from your performance"
706
- )
707
 
708
- user_vocals_path = user_separation.data.get('vocals')
709
- if not user_vocals_path:
710
- return (
711
- "❌ Could not extract vocals from your performance",
712
- "\n".join(ref_status),
713
- "Vocal separation failed"
714
- )
715
 
716
- # Extract user vocal features
717
- user_vocal_result = style_coach.extract_vocal_features(user_vocals_path)
718
- if not user_vocal_result.success:
719
- return (
720
- f"❌ {user_vocal_result.message}",
721
- "\n".join(ref_status),
722
- "Could not analyze your vocal characteristics"
723
- )
724
-
725
- # Compare performance
726
- comparison_result = style_coach.compare_performance(
727
- user_vocal_result.data,
728
- profile_result.data
729
- )
730
 
731
- if not comparison_result.success:
732
- return (
733
- f"❌ {comparison_result.message}",
734
- "\n".join(ref_status),
735
- "Performance comparison failed"
736
- )
737
-
738
- # Generate feedback
739
- comparison = comparison_result.data
740
- feedback = f"""🎯 **Style Coaching Analysis**
741
-
742
- πŸ“Š **Performance Scores**
743
- β€’ Pitch Accuracy: {comparison['pitch_accuracy']:.1f}/100
744
- β€’ Range Match: {comparison['range_match']:.1f}/100
745
- β€’ Vibrato Control: {comparison['vibrato_match']:.1f}/100
746
- β€’ **Overall Similarity: {comparison['overall_similarity']:.1f}/100**
747
 
748
- 🎡 **Detailed Feedback**
749
 
750
- **Pitch Control:**
751
- {"βœ… Excellent pitch accuracy!" if comparison['pitch_accuracy'] > 80 else
752
- "⚠️ Work on pitch accuracy. Practice scales and interval training." if comparison['pitch_accuracy'] > 60 else
753
- "❌ Significant pitch issues. Focus on basic pitch matching exercises."}
754
 
755
- **Vocal Range:**
756
- {"βœ… Great range utilization!" if comparison['range_match'] > 80 else
757
- "⚠️ Expand your vocal range to match the style." if comparison['range_match'] > 60 else
758
- "❌ Limited range usage. Practice range extension exercises."}
 
759
 
760
- **Vibrato Technique:**
761
- {"βœ… Vibrato control matches the style well!" if comparison['vibrato_match'] > 80 else
762
- "⚠️ Adjust vibrato technique to match reference style." if comparison['vibrato_match'] > 60 else
763
- "❌ Vibrato needs work. Practice vibrato control exercises."}
764
 
765
- 🎯 **Recommended Actions:**
766
- {f"β€’ Continue practicing - you're very close to the target style!" if comparison['overall_similarity'] > 80 else
767
- f"β€’ Focus on the areas scoring below 70 points" if comparison['overall_similarity'] > 60 else
768
- f"β€’ Start with basic vocal technique exercises before style matching"}
769
-
770
- πŸ“ˆ **Progress Tracking:**
771
- Analyzed {len(ref_features)} reference tracks
772
- Overall performance: {"Advanced" if comparison['overall_similarity'] > 80 else "Intermediate" if comparison['overall_similarity'] > 60 else "Beginner"}
773
- """
774
-
775
- final_status = f"βœ… Style coaching complete! Analyzed {len(ref_features)} references and generated personalized feedback."
776
 
777
- return (
778
- final_status,
779
- "\n".join(ref_status),
780
- feedback
781
- )
782
 
783
  except Exception as e:
784
- logger.error(f"Style coaching failed: {e}")
785
- return (
786
- f"❌ Style coaching failed: {str(e)}",
787
- "Processing error occurred",
788
- "An error occurred during analysis. Please try again."
789
- )
790
 
791
- # Create comprehensive Gradio interface
792
- def create_main_interface():
793
- """Create the main professional Gradio interface"""
794
-
795
- # Custom CSS for professional styling
796
- custom_css = """
797
- .gradio-container {
798
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
799
- max-width: 1200px !important;
800
- margin: auto;
801
- }
802
- .header-text {
803
- text-align: center;
804
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
805
- color: white;
806
- padding: 2rem;
807
- border-radius: 10px;
808
- margin-bottom: 2rem;
809
- }
810
- .tab-nav {
811
- border-radius: 10px 10px 0 0;
812
- }
813
- .output-text {
814
- font-family: 'Courier New', monospace;
815
- }
816
- """
817
 
818
- with gr.Blocks(css=custom_css, title="Live Audio Singing Helper Pro", theme=gr.themes.Soft()) as demo:
819
- # Header
820
  gr.HTML("""
821
- <div class="header-text">
822
- <h1>🎀 Live Audio Singing Helper Pro</h1>
823
- <p>Professional-grade audio processing for singers and musicians</p>
824
- <p><strong>Version 2.0.0</strong> | Advanced source separation, vocal effects, and AI-powered style coaching</p>
825
  </div>
826
  """)
827
 
828
  with gr.Tabs():
829
- # Tab 1: Audio Separation & Analysis
830
- with gr.Tab("🎡 Audio Separation & Analysis", elem_classes="tab-nav"):
831
- gr.Markdown("""
832
- ### Professional Audio Source Separation
833
- Upload your audio files to separate vocals from instruments with state-of-the-art AI models.
834
- Apply professional vocal effects and get detailed audio analysis.
835
- """)
836
 
837
  with gr.Row():
838
- with gr.Column(scale=1):
839
- audio_input = gr.Audio(
840
- type="filepath",
841
- label="πŸ“ Upload Audio File",
842
- sources=["upload"],
843
- show_download_button=True
844
- )
845
-
846
- stems_mode = gr.Dropdown(
847
  choices=["2-stem (Vocals + Instrumental)", "4-stem (Vocals + Drums + Bass + Other)"],
848
  value="2-stem (Vocals + Instrumental)",
849
- label="🎯 Separation Mode",
850
- info="Choose the complexity of separation"
851
- )
852
-
853
- with gr.Group():
854
- gr.Markdown("**πŸŽ›οΈ Vocal Effects**")
855
- with gr.Row():
856
- pitch_shift = gr.Slider(
857
- minimum=-12, maximum=12, value=0, step=0.5,
858
- label="Pitch Shift (semitones)",
859
- info="Transpose vocals up or down"
860
- )
861
- reverb = gr.Slider(
862
- minimum=0, maximum=0.5, value=0, step=0.05,
863
- label="Reverb Amount",
864
- info="Add spatial depth"
865
- )
866
- with gr.Row():
867
- chorus = gr.Slider(
868
- minimum=0, maximum=0.3, value=0, step=0.05,
869
- label="Chorus Effect",
870
- info="Add vocal thickness"
871
- )
872
- compression = gr.Slider(
873
- minimum=0, maximum=1, value=0, step=0.1,
874
- label="Compression",
875
- info="Even out dynamics"
876
- )
877
-
878
- process_btn = gr.Button(
879
- "πŸš€ Process Audio",
880
- variant="primary",
881
- size="lg"
882
  )
 
883
 
884
- with gr.Column(scale=1):
885
- status_output = gr.Textbox(
886
- label="πŸ“‹ Processing Status",
887
- interactive=False,
888
- lines=3
889
- )
890
-
891
- analysis_output = gr.Textbox(
892
- label="πŸ“Š Audio Analysis",
893
- interactive=False,
894
- lines=20,
895
- elem_classes="output-text"
896
- )
897
 
898
- # Output audio files
899
  with gr.Row():
900
- audio_output1 = gr.Audio(
901
- label="🎀 Vocals/Primary",
902
- show_download_button=True
903
- )
904
- audio_output2 = gr.Audio(
905
- label="🎼 Instrumental/Drums",
906
- show_download_button=True
907
- )
908
 
909
  with gr.Row():
910
- audio_output3 = gr.Audio(
911
- label="🎸 Bass (4-stem only)",
912
- show_download_button=True
913
- )
914
- audio_output4 = gr.Audio(
915
- label="🎹 Other (4-stem only)",
916
- show_download_button=True
917
- )
918
 
919
- # Tab 2: Live Recording & Effects
920
- with gr.Tab("πŸŽ™οΈ Live Recording & Effects"):
921
- gr.Markdown("""
922
- ### Real-time Recording & Vocal Processing
923
- Record your voice directly and apply professional vocal effects in real-time.
924
- Perfect for vocal practice and experimentation.
925
- """)
926
 
927
  with gr.Row():
928
- with gr.Column(scale=1):
929
- mic_input = gr.Audio(
930
- type="filepath",
931
- sources=["microphone"],
932
- label="πŸŽ™οΈ Record Your Voice",
933
- show_download_button=True
934
- )
935
-
936
- with gr.Group():
937
- gr.Markdown("**πŸŽ›οΈ Real-time Effects**")
938
- with gr.Row():
939
- live_pitch = gr.Slider(
940
- minimum=-12, maximum=12, value=0, step=0.5,
941
- label="Pitch Correction",
942
- info="Real-time pitch adjustment"
943
- )
944
- live_reverb = gr.Slider(
945
- minimum=0, maximum=0.5, value=0, step=0.05,
946
- label="Studio Reverb",
947
- info="Professional reverb effect"
948
- )
949
- with gr.Row():
950
- live_chorus = gr.Slider(
951
- minimum=0, maximum=0.3, value=0, step=0.05,
952
- label="Vocal Doubling",
953
- info="Thicken your voice"
954
- )
955
- live_compression = gr.Slider(
956
- minimum=0, maximum=1, value=0, step=0.1,
957
- label="Dynamic Control",
958
- info="Professional compression"
959
- )
960
-
961
- live_process_btn = gr.Button(
962
- "🎡 Process Recording",
963
- variant="primary",
964
- size="lg"
965
- )
966
 
967
- with gr.Column(scale=1):
968
- live_status = gr.Textbox(
969
- label="πŸ“‹ Processing Status",
970
- interactive=False,
971
- lines=3
972
- )
973
-
974
- live_analysis = gr.Textbox(
975
- label="πŸ“Š Recording Analysis",
976
- interactive=False,
977
- lines=15,
978
- elem_classes="output-text"
979
- )
980
 
981
- live_output = gr.Audio(
982
- label="🎧 Processed Recording",
983
- show_download_button=True
984
- )
985
 
986
- # Tab 3: Style Coaching
987
- with gr.Tab("🎭 AI Style Coaching"):
988
- gr.Markdown("""
989
- ### Professional Vocal Style Analysis & Coaching
990
- Upload reference tracks from artists you want to emulate, then record your performance.
991
- Get detailed AI-powered feedback on how to improve your vocal style.
992
- """)
993
 
994
  with gr.Row():
995
- with gr.Column(scale=1):
996
- reference_files = gr.File(
997
- label="πŸ“š Reference Tracks (2-5 songs)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
998
  file_count="multiple",
999
- file_types=["audio"],
1000
- info="Upload songs from artists whose style you want to learn"
1001
- )
1002
-
1003
- user_performance = gr.Audio(
1004
- type="filepath",
1005
- label="🎀 Your Performance",
1006
- sources=["upload", "microphone"],
1007
- show_download_button=True,
1008
- info="Record or upload your singing"
1009
  )
1010
-
1011
- coach_btn = gr.Button(
1012
- "🎯 Analyze Style & Get Coaching",
1013
- variant="primary",
1014
- size="lg"
1015
  )
 
1016
 
1017
- with gr.Column(scale=1):
1018
- coaching_status = gr.Textbox(
1019
- label="πŸ“‹ Analysis Status",
1020
- interactive=False,
1021
- lines=4
1022
- )
1023
-
1024
- reference_status = gr.Textbox(
1025
- label="πŸ“š Reference Processing",
1026
- interactive=False,
1027
- lines=8
1028
- )
1029
 
1030
- coaching_feedback = gr.Textbox(
1031
- label="🎯 Personalized Coaching Feedback",
1032
- interactive=False,
1033
- lines=25,
1034
- elem_classes="output-text"
1035
- )
1036
 
1037
- # Tab 4: Help & Documentation
1038
- with gr.Tab("ℹ️ Help & Documentation"):
1039
- gr.Markdown(f"""
1040
- # πŸ“– Live Audio Singing Helper Pro Documentation
1041
-
1042
- **Version:** {VERSION}
1043
- **Author:** Lead Developer Team
1044
- **Last Updated:** 2024
1045
-
1046
- ## πŸš€ Features Overview
1047
-
1048
- ### 🎡 Audio Separation & Analysis
1049
- - **Source Separation**: Advanced AI-powered vocal isolation using Spleeter
1050
- - **Multi-stem Options**: 2-stem (vocals/instrumental) or 4-stem (vocals/drums/bass/other)
1051
- - **Professional Effects**: Pitch shifting, reverb, chorus, and compression
1052
- - **Detailed Analysis**: Comprehensive audio feature extraction and visualization
1053
-
1054
- ### πŸŽ™οΈ Live Recording & Effects
1055
- - **Real-time Recording**: Direct microphone input with instant processing
1056
- - **Professional Effects Chain**: Studio-quality vocal processing
1057
- - **Live Analysis**: Instant feedback on your recording characteristics
1058
-
1059
- ### 🎭 AI Style Coaching
1060
- - **Reference-based Learning**: Upload tracks from artists you want to emulate
1061
- - **AI-powered Analysis**: Advanced vocal characteristic comparison
1062
- - **Personalized Feedback**: Specific recommendations for improvement
1063
- - **Progress Tracking**: Monitor your vocal development over time
1064
-
1065
- ## πŸ“‹ Supported Formats
1066
- - **Input**: MP3, WAV, FLAC, M4A, OGG, AAC
1067
- - **Output**: High-quality WAV files
1068
- - **Maximum File Size**: 50MB per file
1069
- - **Maximum Duration**: 10 minutes per track
1070
 
1071
- ## 🎯 Usage Tips
1072
 
1073
- ### For Best Results:
1074
- 1. **Use high-quality recordings** - Better input = better output
1075
- 2. **Keep files under 5 minutes** for faster processing
1076
- 3. **Use full songs** for style coaching (not isolated vocals)
1077
- 4. **Record in a quiet environment** for live recording
1078
- 5. **Choose similar genres** for reference tracks in style coaching
1079
 
1080
- ### Troubleshooting:
1081
- - **File too large**: Compress your audio or trim to shorter length
1082
- - **Poor separation quality**: Try with different audio files or formats
1083
- - **Style coaching not working**: Ensure reference tracks have clear vocals
1084
 
1085
- ## πŸ”§ Technical Specifications
1086
- - **Separation Engine**: Spleeter by Deezer Research
1087
- - **Audio Processing**: librosa + scipy
1088
- - **AI Analysis**: Advanced signal processing algorithms
1089
- - **Effects Processing**: Professional-grade audio effects
1090
 
1091
- ## 🎼 Advanced Tips for Musicians
 
 
 
 
1092
 
1093
- ### Vocal Practice:
1094
- - Use separated vocals to practice harmonies
1095
- - Apply pitch correction to hear your target pitch
1096
- - Use compression to understand dynamic control
1097
 
1098
- ### Style Development:
1099
- - Analyze multiple songs from the same artist for consistency
1100
- - Compare your progress over time using the same reference tracks
1101
- - Focus on one aspect at a time (pitch, timing, tone)
1102
 
1103
- ### Recording Techniques:
1104
- - Record multiple takes and compare analyses
1105
- - Experiment with different microphone distances
1106
- - Use effects subtly for natural-sounding results
1107
 
1108
- ## πŸ†˜ Support & Contact
1109
- For technical support or feature requests, please contact the development team.
 
1110
 
1111
  ---
1112
-
1113
- **⚠️ Important Notes:**
1114
- - This tool is for educational and practice purposes
1115
- - Respect copyright when using reference tracks
1116
- - Results may vary based on audio quality and complexity
1117
- - Processing times depend on file size and server load
1118
  """)
1119
 
1120
- # Event handlers with comprehensive error handling
1121
- process_btn.click(
1122
- fn=process_audio_separation,
1123
- inputs=[audio_input, stems_mode, pitch_shift, reverb, chorus, compression],
1124
- outputs=[status_output, audio_output1, audio_output2, audio_output3, audio_output4, analysis_output],
1125
- show_progress=True
1126
  )
1127
 
1128
- live_process_btn.click(
1129
- fn=process_live_recording,
1130
- inputs=[mic_input, live_pitch, live_reverb, live_chorus, live_compression],
1131
- outputs=[live_status, live_output, live_analysis],
1132
- show_progress=True
1133
  )
1134
 
1135
- coach_btn.click(
1136
- fn=process_style_coaching,
1137
- inputs=[reference_files, user_performance],
1138
- outputs=[coaching_status, reference_status, coaching_feedback],
1139
- show_progress=True
 
 
 
 
 
1140
  )
1141
 
1142
- return demo
1143
 
1144
  if __name__ == "__main__":
1145
- try:
1146
- logger.info(f"Starting Live Audio Singing Helper Pro v{VERSION}")
1147
- demo = create_main_interface()
1148
- demo.launch(
1149
- server_name="0.0.0.0",
1150
- server_port=7860,
1151
- show_api=True,
1152
- show_error=True,
1153
- quiet=False
1154
- )
1155
- except Exception as e:
1156
- logger.error(f"Failed to launch application: {e}")
1157
- traceback.print_exc()
1158
- sys.exit(1)
1159
- finally:
1160
- # Cleanup on exit
1161
- try:
1162
- processor.cleanup_session()
1163
- except:
1164
- pass
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import librosa
3
  import numpy as np
4
  import soundfile as sf
 
5
  import os
 
 
6
  import tempfile
7
+ import shutil
 
 
 
 
8
  from pathlib import Path
 
 
 
9
  import warnings
10
  warnings.filterwarnings("ignore")
11
 
12
+ # Import for advanced features
13
+ try:
14
+ from spleeter.separator import Separator
15
+ SPLEETER_AVAILABLE = True
16
+ except ImportError:
17
+ SPLEETER_AVAILABLE = False
18
+ print("Spleeter not available - source separation disabled")
19
 
20
+ try:
21
+ import scipy.signal
22
+ from scipy.spatial.distance import euclidean
23
+ from dtw import dtw
24
+ ADVANCED_FEATURES = True
25
+ except ImportError:
26
+ ADVANCED_FEATURES = False
27
+ print("Advanced features not available")
28
 
29
+ class AudioEngine:
30
+ """Clean, professional audio processing engine"""
 
 
 
 
 
 
 
 
31
 
32
  def __init__(self):
33
+ self.temp_dir = tempfile.mkdtemp()
34
+ self.separators = {} # Cache for Spleeter models
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ def analyze_audio(self, audio_path):
37
+ """Extract comprehensive audio features"""
38
  try:
39
+ # Load audio
40
+ y, sr = librosa.load(audio_path)
41
 
42
+ # Basic properties
43
+ duration = len(y) / sr
44
+ tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
 
45
 
46
+ # Spectral features
47
+ spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
48
+ spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
49
+ zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
50
+
51
+ # Energy features
52
+ rms_energy = np.mean(librosa.feature.rms(y=y))
53
+
54
+ # Pitch estimation
55
+ pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
56
+ pitch_values = []
57
+ for t in range(pitches.shape[1]):
58
+ index = magnitudes[:, t].argmax()
59
+ pitch = pitches[index, t]
60
+ if pitch > 0:
61
+ pitch_values.append(pitch)
62
+
63
+ avg_pitch = np.mean(pitch_values) if pitch_values else 0
64
+
65
+ return {
66
+ 'success': True,
67
+ 'duration': round(duration, 2),
68
+ 'tempo': round(tempo, 1),
69
+ 'sample_rate': sr,
70
+ 'spectral_centroid': round(spectral_centroid, 2),
71
+ 'spectral_rolloff': round(spectral_rolloff, 2),
72
+ 'zero_crossing_rate': round(zero_crossing_rate, 4),
73
+ 'rms_energy': round(rms_energy, 4),
74
+ 'average_pitch': round(avg_pitch, 2),
75
+ 'pitch_count': len(pitch_values),
76
+ 'beats_detected': len(beats)
77
+ }
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  except Exception as e:
80
+ return {'success': False, 'error': str(e)}
81
 
82
+ def separate_vocals(self, audio_path, model_type="2stems"):
83
+ """Separate vocals using Spleeter"""
84
+ if not SPLEETER_AVAILABLE:
85
+ return {'success': False, 'error': 'Spleeter not available'}
86
+
87
  try:
88
+ # Load or create separator
89
+ if model_type not in self.separators:
90
+ self.separators[model_type] = Separator(f'spleeter:{model_type}-16kHz')
91
+
92
+ separator = self.separators[model_type]
93
+
94
+ # Create output directory
95
+ output_dir = os.path.join(self.temp_dir, f"separation_{np.random.randint(10000)}")
96
+ os.makedirs(output_dir, exist_ok=True)
97
+
98
+ # Separate
99
+ separator.separate_to_file(audio_path, output_dir)
100
+
101
+ # Get results
102
+ audio_name = Path(audio_path).stem
103
+ result_dir = os.path.join(output_dir, audio_name)
104
+
105
+ if model_type == "2stems":
106
+ vocals_path = os.path.join(result_dir, "vocals.wav")
107
+ accompaniment_path = os.path.join(result_dir, "accompaniment.wav")
108
+
109
+ return {
110
+ 'success': True,
111
+ 'vocals': vocals_path if os.path.exists(vocals_path) else None,
112
+ 'accompaniment': accompaniment_path if os.path.exists(accompaniment_path) else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  }
114
+
115
+ elif model_type == "4stems":
116
+ vocals_path = os.path.join(result_dir, "vocals.wav")
117
+ drums_path = os.path.join(result_dir, "drums.wav")
118
+ bass_path = os.path.join(result_dir, "bass.wav")
119
+ other_path = os.path.join(result_dir, "other.wav")
120
+
121
+ return {
122
+ 'success': True,
123
+ 'vocals': vocals_path if os.path.exists(vocals_path) else None,
124
+ 'drums': drums_path if os.path.exists(drums_path) else None,
125
+ 'bass': bass_path if os.path.exists(bass_path) else None,
126
+ 'other': other_path if os.path.exists(other_path) else None
127
+ }
128
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  except Exception as e:
130
+ return {'success': False, 'error': str(e)}
 
 
 
131
 
132
+ def apply_effects(self, audio_path, pitch_shift=0, reverb=0):
133
+ """Apply vocal effects"""
 
 
 
134
  try:
135
+ y, sr = librosa.load(audio_path)
136
+
137
+ # Apply pitch shift
138
+ if pitch_shift != 0:
139
+ y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
140
+
141
+ # Apply reverb (simple convolution)
142
+ if reverb > 0 and ADVANCED_FEATURES:
143
+ reverb_length = int(0.5 * sr)
144
+ impulse = np.random.randn(reverb_length) * np.exp(-np.arange(reverb_length) / (sr * 0.1))
145
+ y = scipy.signal.convolve(y, impulse * reverb, mode='same')
146
+ y = y / np.max(np.abs(y)) # Normalize
147
+
148
+ # Save processed audio
149
+ output_path = os.path.join(self.temp_dir, f"processed_{np.random.randint(10000)}.wav")
150
+ sf.write(output_path, y, sr)
151
+
152
+ return {'success': True, 'output': output_path}
153
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  except Exception as e:
155
+ return {'success': False, 'error': str(e)}
 
156
 
157
+ def extract_vocal_features(self, audio_path):
158
+ """Extract features for style coaching"""
159
  try:
160
+ y, sr = librosa.load(audio_path)
 
161
 
162
+ # Pitch analysis
163
+ pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
164
+ pitch_values = []
165
+ for t in range(pitches.shape[1]):
166
+ index = magnitudes[:, t].argmax()
167
+ pitch = pitches[index, t]
168
+ if pitch > 0:
169
+ pitch_values.append(pitch)
170
 
171
+ if not pitch_values:
172
+ return {'success': False, 'error': 'No pitch detected'}
 
 
 
 
 
 
173
 
174
+ # Basic vocal metrics
175
+ mean_pitch = np.mean(pitch_values)
176
+ pitch_std = np.std(pitch_values)
177
+ pitch_range = max(pitch_values) - min(pitch_values)
178
 
179
+ # Tempo
180
+ tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
181
 
182
+ # Spectral features
183
+ spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
184
+
185
+ # Energy
186
+ rms_energy = np.mean(librosa.feature.rms(y=y))
187
+
188
+ return {
189
+ 'success': True,
190
+ 'mean_pitch': mean_pitch,
191
+ 'pitch_std': pitch_std,
192
+ 'pitch_range': pitch_range,
193
+ 'tempo': tempo,
194
+ 'spectral_centroid': spectral_centroid,
195
+ 'rms_energy': rms_energy
196
+ }
197
 
198
  except Exception as e:
199
+ return {'success': False, 'error': str(e)}
 
200
 
201
+ def compare_vocal_styles(self, user_features, reference_features_list):
202
+ """Compare user vocals to reference style"""
203
+ if not ADVANCED_FEATURES:
204
+ return {'success': False, 'error': 'Advanced features not available'}
205
+
206
  try:
207
+ # Average reference features
208
+ ref_avg = {}
209
+ for key in ['mean_pitch', 'pitch_std', 'pitch_range', 'tempo', 'spectral_centroid', 'rms_energy']:
210
+ values = [ref[key] for ref in reference_features_list if key in ref]
211
+ ref_avg[key] = np.mean(values) if values else 0
212
 
213
+ # Calculate differences
214
+ pitch_diff = abs(user_features['mean_pitch'] - ref_avg['mean_pitch'])
215
+ tempo_diff = abs(user_features['tempo'] - ref_avg['tempo'])
216
+ timbre_diff = abs(user_features['spectral_centroid'] - ref_avg['spectral_centroid'])
217
+ energy_diff = abs(user_features['rms_energy'] - ref_avg['rms_energy'])
218
 
219
+ # Generate feedback
220
+ feedback = []
221
+
222
+ if pitch_diff > 50:
223
+ feedback.append(f"🎡 Pitch: Your average pitch differs by {pitch_diff:.1f} Hz. Practice matching the reference key.")
224
+ else:
225
+ feedback.append("🎡 Pitch: Good pitch accuracy!")
226
+
227
+ if tempo_diff > 10:
228
+ feedback.append(f"⏱️ Tempo: Your tempo differs by {tempo_diff:.1f} BPM. Work on timing consistency.")
229
+ else:
230
+ feedback.append("⏱️ Tempo: Good timing!")
231
+
232
+ if timbre_diff > 500:
233
+ feedback.append("πŸ—£οΈ Timbre: Try adjusting your vocal tone to match the reference style.")
234
+ else:
235
+ feedback.append("πŸ—£οΈ Timbre: Good vocal tone match!")
236
 
237
+ if energy_diff > 0.1:
238
+ feedback.append("πŸ”Š Energy: Adjust your vocal intensity to match the reference.")
239
+ else:
240
+ feedback.append("πŸ”Š Energy: Good energy level!")
 
241
 
242
+ overall_score = max(0, 100 - (pitch_diff/2 + tempo_diff + timbre_diff/10 + energy_diff*100))
 
 
 
 
 
243
 
244
+ return {
245
+ 'success': True,
246
+ 'score': round(overall_score, 1),
247
+ 'feedback': feedback,
248
+ 'metrics': {
249
+ 'pitch_diff': round(pitch_diff, 1),
250
+ 'tempo_diff': round(tempo_diff, 1),
251
+ 'timbre_diff': round(timbre_diff, 1),
252
+ 'energy_diff': round(energy_diff, 3)
253
+ }
254
+ }
255
 
256
  except Exception as e:
257
+ return {'success': False, 'error': str(e)}
258
+
259
+ def cleanup(self):
260
+ """Clean up temporary files"""
261
+ try:
262
+ if os.path.exists(self.temp_dir):
263
+ shutil.rmtree(self.temp_dir)
264
+ except Exception:
265
+ pass
266
 
267
+ # Global engine instance
268
+ engine = AudioEngine()
 
269
 
270
+ def format_analysis_results(analysis):
271
  """Format analysis results for display"""
272
+ if not analysis['success']:
273
+ return f"❌ Analysis failed: {analysis['error']}"
274
 
275
+ return f"""πŸ“Š Audio Analysis Results
 
 
 
 
 
276
 
277
+ 🎡 Basic Properties:
278
+ β€’ Duration: {analysis['duration']} seconds
279
+ β€’ Sample Rate: {analysis['sample_rate']} Hz
280
+ β€’ Tempo: {analysis['tempo']} BPM
281
 
282
+ πŸ”Š Audio Characteristics:
283
+ β€’ Spectral Centroid: {analysis['spectral_centroid']} Hz
284
+ β€’ Spectral Rolloff: {analysis['spectral_rolloff']} Hz
285
+ β€’ Zero Crossing Rate: {analysis['zero_crossing_rate']}
286
+ β€’ RMS Energy: {analysis['rms_energy']}
287
 
288
+ 🎀 Vocal Information:
289
+ β€’ Average Pitch: {analysis['average_pitch']} Hz
290
+ β€’ Pitch Points Detected: {analysis['pitch_count']}
291
+ β€’ Beats Detected: {analysis['beats_detected']}"""
292
 
293
+ def process_audio_separation(audio_file, separation_mode):
294
+ """Main audio separation function"""
 
 
 
 
 
 
 
295
  if not audio_file:
296
+ return "❌ Please upload an audio file", None, None, None, None, ""
297
+
298
+ if not SPLEETER_AVAILABLE:
299
+ return "❌ Spleeter not available for source separation", None, None, None, None, ""
 
300
 
301
  try:
302
+ # Analyze audio first
303
+ analysis = engine.analyze_audio(audio_file)
304
+ analysis_text = format_analysis_results(analysis)
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
  # Separate audio
307
+ model_type = "2stems" if "2-stem" in separation_mode else "4stems"
308
+ separation_result = engine.separate_vocals(audio_file, model_type)
 
 
 
 
 
 
 
 
 
309
 
310
+ if not separation_result['success']:
311
+ return f"❌ Separation failed: {separation_result['error']}", None, None, None, None, analysis_text
 
 
 
 
 
312
 
313
+ if model_type == "2stems":
 
 
 
 
 
 
 
 
314
  return (
315
+ "βœ… 2-stem separation completed successfully!",
316
+ separation_result.get('vocals'),
317
+ separation_result.get('accompaniment'),
318
+ None,
319
+ None,
320
  analysis_text
321
  )
322
  else:
323
  return (
324
+ "βœ… 4-stem separation completed successfully!",
325
+ separation_result.get('vocals'),
326
+ separation_result.get('drums'),
327
+ separation_result.get('bass'),
328
+ separation_result.get('other'),
329
  analysis_text
330
  )
331
 
332
  except Exception as e:
333
+ return f"❌ Processing error: {str(e)}", None, None, None, None, ""
 
 
 
 
 
334
 
335
+ def process_vocal_effects(audio_file, pitch_shift, reverb_amount):
336
+ """Apply vocal effects to audio"""
337
  if not audio_file:
338
+ return "❌ Please upload an audio file", None, ""
 
 
 
 
339
 
340
  try:
341
+ # Analyze original
342
+ analysis = engine.analyze_audio(audio_file)
343
+ analysis_text = format_analysis_results(analysis)
 
 
 
 
 
 
 
344
 
345
  # Apply effects
346
+ effects_result = engine.apply_effects(audio_file, pitch_shift, reverb_amount)
 
 
 
 
 
347
 
348
+ if not effects_result['success']:
349
+ return f"❌ Effects failed: {effects_result['error']}", None, analysis_text
 
 
 
 
 
350
 
351
+ effects_applied = []
352
+ if pitch_shift != 0:
353
+ effects_applied.append(f"Pitch: {pitch_shift:+.1f} semitones")
354
+ if reverb_amount > 0:
355
+ effects_applied.append(f"Reverb: {reverb_amount:.2f}")
356
+
357
+ status = f"βœ… Effects applied: {', '.join(effects_applied)}" if effects_applied else "βœ… Audio processed (no effects)"
358
+
359
+ return status, effects_result['output'], analysis_text
360
 
361
  except Exception as e:
362
+ return f"❌ Processing error: {str(e)}", None, ""
 
 
 
 
 
363
 
364
  def process_style_coaching(reference_files, user_audio):
365
+ """Style coaching analysis"""
366
  if not reference_files or len(reference_files) < 2:
367
+ return "❌ Upload at least 2 reference tracks", "", ""
 
 
 
 
368
 
369
  if not user_audio:
370
+ return "❌ Please record or upload your performance", "", ""
371
+
372
+ if not SPLEETER_AVAILABLE or not ADVANCED_FEATURES:
373
+ return "❌ Style coaching requires advanced features", "", ""
 
374
 
375
  try:
376
  # Process reference tracks
377
  ref_features = []
378
  ref_status = []
379
 
380
+ for i, ref_file in enumerate(reference_files[:5]):
381
+ # Separate vocals
382
+ separation_result = engine.separate_vocals(ref_file.name, "2stems")
383
+ if separation_result['success'] and separation_result.get('vocals'):
384
+ # Extract features
385
+ features = engine.extract_vocal_features(separation_result['vocals'])
386
+ if features['success']:
387
+ ref_features.append(features)
388
+ ref_status.append(f"βœ… Reference {i+1}: Processed")
 
 
 
 
 
 
 
389
  else:
390
+ ref_status.append(f"❌ Reference {i+1}: Feature extraction failed")
391
+ else:
392
+ ref_status.append(f"❌ Reference {i+1}: Vocal separation failed")
393
 
394
  if len(ref_features) < 2:
395
+ return "❌ Need at least 2 valid reference tracks", "\n".join(ref_status), ""
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
  # Process user audio
398
+ user_separation = engine.separate_vocals(user_audio, "2stems")
399
+ if not user_separation['success'] or not user_separation.get('vocals'):
400
+ return "❌ Could not separate vocals from your performance", "\n".join(ref_status), ""
 
 
 
 
401
 
402
+ user_features = engine.extract_vocal_features(user_separation['vocals'])
403
+ if not user_features['success']:
404
+ return "❌ Could not analyze your vocal features", "\n".join(ref_status), ""
 
 
 
 
405
 
406
+ # Compare styles
407
+ comparison = engine.compare_vocal_styles(user_features, ref_features)
408
+ if not comparison['success']:
409
+ return f"❌ Style comparison failed: {comparison['error']}", "\n".join(ref_status), ""
 
 
 
 
 
 
 
 
 
 
410
 
411
+ # Format feedback
412
+ feedback_text = f"""🎯 Vocal Style Coaching Results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
 
414
+ πŸ“Š Overall Score: {comparison['score']}/100
415
 
416
+ 🎡 Detailed Feedback:
417
+ {chr(10).join(comparison['feedback'])}
 
 
418
 
419
+ πŸ“ˆ Technical Metrics:
420
+ β€’ Pitch Difference: {comparison['metrics']['pitch_diff']} Hz
421
+ β€’ Tempo Difference: {comparison['metrics']['tempo_diff']} BPM
422
+ β€’ Timbre Difference: {comparison['metrics']['timbre_diff']} Hz
423
+ β€’ Energy Difference: {comparison['metrics']['energy_diff']}
424
 
425
+ 🎯 Recommendations:
426
+ {f"πŸ”₯ Excellent! You're very close to the target style." if comparison['score'] > 80 else
427
+ f"πŸ“ˆ Good progress! Focus on the areas mentioned above." if comparison['score'] > 60 else
428
+ f"πŸ’ͺ Keep practicing! Work on basic vocal technique first."}
429
 
430
+ References analyzed: {len(ref_features)}/5"""
 
 
 
 
 
 
 
 
 
 
431
 
432
+ return f"βœ… Style coaching complete! Score: {comparison['score']}/100", "\n".join(ref_status), feedback_text
 
 
 
 
433
 
434
  except Exception as e:
435
+ return f"❌ Coaching failed: {str(e)}", "", ""
 
 
 
 
 
436
 
437
+ # Create main interface
438
+ def create_app():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
 
440
+ with gr.Blocks(title="Audio Singing Helper", theme=gr.themes.Soft()) as app:
441
+
442
  gr.HTML("""
443
+ <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
444
+ <h1>🎀 Audio Singing Helper</h1>
445
+ <p>Professional audio processing for singers and musicians</p>
 
446
  </div>
447
  """)
448
 
449
  with gr.Tabs():
450
+
451
+ # Audio Separation Tab
452
+ with gr.Tab("🎡 Audio Separation"):
453
+ gr.Markdown("### Separate vocals from instrumental tracks")
 
 
 
454
 
455
  with gr.Row():
456
+ with gr.Column():
457
+ sep_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
458
+ sep_mode = gr.Dropdown(
 
 
 
 
 
 
459
  choices=["2-stem (Vocals + Instrumental)", "4-stem (Vocals + Drums + Bass + Other)"],
460
  value="2-stem (Vocals + Instrumental)",
461
+ label="Separation Mode"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  )
463
+ sep_button = gr.Button("🎯 Separate Audio", variant="primary")
464
 
465
+ with gr.Column():
466
+ sep_status = gr.Textbox(label="Status", lines=2)
467
+ sep_analysis = gr.Textbox(label="Audio Analysis", lines=12)
 
 
 
 
 
 
 
 
 
 
468
 
 
469
  with gr.Row():
470
+ sep_vocals = gr.Audio(label="🎀 Vocals")
471
+ sep_instrumental = gr.Audio(label="🎼 Instrumental/Drums")
 
 
 
 
 
 
472
 
473
  with gr.Row():
474
+ sep_bass = gr.Audio(label="🎸 Bass")
475
+ sep_other = gr.Audio(label="🎹 Other")
 
 
 
 
 
 
476
 
477
+ # Vocal Effects Tab
478
+ with gr.Tab("πŸŽ›οΈ Vocal Effects"):
479
+ gr.Markdown("### Apply professional vocal effects")
 
 
 
 
480
 
481
  with gr.Row():
482
+ with gr.Column():
483
+ fx_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
484
+ fx_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Shift (semitones)")
485
+ fx_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb Amount")
486
+ fx_button = gr.Button("🎡 Apply Effects", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
 
488
+ with gr.Column():
489
+ fx_status = gr.Textbox(label="Status", lines=2)
490
+ fx_analysis = gr.Textbox(label="Audio Analysis", lines=10)
 
 
 
 
 
 
 
 
 
 
491
 
492
+ fx_output = gr.Audio(label="🎧 Processed Audio")
 
 
 
493
 
494
+ # Live Recording Tab
495
+ with gr.Tab("πŸŽ™οΈ Live Recording"):
496
+ gr.Markdown("### Record and process your voice in real-time")
 
 
 
 
497
 
498
  with gr.Row():
499
+ with gr.Column():
500
+ live_audio = gr.Audio(type="filepath", sources=["microphone"], label="Record Your Voice")
501
+ live_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Correction")
502
+ live_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb")
503
+ live_button = gr.Button("🎀 Process Recording", variant="primary")
504
+
505
+ with gr.Column():
506
+ live_status = gr.Textbox(label="Status", lines=2)
507
+ live_analysis = gr.Textbox(label="Recording Analysis", lines=10)
508
+
509
+ live_output = gr.Audio(label="🎧 Processed Recording")
510
+
511
+ # Style Coaching Tab
512
+ with gr.Tab("🎭 Style Coaching"):
513
+ gr.Markdown("### Get personalized vocal coaching feedback")
514
+
515
+ with gr.Row():
516
+ with gr.Column():
517
+ coach_refs = gr.File(
518
+ label="Reference Tracks (2-5 files)",
519
  file_count="multiple",
520
+ file_types=["audio"]
 
 
 
 
 
 
 
 
 
521
  )
522
+ coach_user = gr.Audio(
523
+ type="filepath",
524
+ label="Your Performance",
525
+ sources=["upload", "microphone"]
 
526
  )
527
+ coach_button = gr.Button("🎯 Get Coaching", variant="primary")
528
 
529
+ with gr.Column():
530
+ coach_status = gr.Textbox(label="Status", lines=3)
531
+ coach_refs_status = gr.Textbox(label="Reference Processing", lines=8)
 
 
 
 
 
 
 
 
 
532
 
533
+ coach_feedback = gr.Textbox(label="🎯 Coaching Feedback", lines=15)
 
 
 
 
 
534
 
535
+ # Help Tab
536
+ with gr.Tab("ℹ️ Help"):
537
+ gr.Markdown("""
538
+ # 🎀 Audio Singing Helper - User Guide
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
 
540
+ ## Features
541
 
542
+ ### 🎡 Audio Separation
543
+ - Upload any song to separate vocals from instruments
544
+ - Choose 2-stem (vocals + instrumental) or 4-stem (vocals + drums + bass + other)
545
+ - Get detailed audio analysis of your tracks
 
 
546
 
547
+ ### πŸŽ›οΈ Vocal Effects
548
+ - Apply pitch shifting (-12 to +12 semitones)
549
+ - Add reverb for spatial depth
550
+ - Process any audio file with professional effects
551
 
552
+ ### πŸŽ™οΈ Live Recording
553
+ - Record directly from your microphone
554
+ - Apply real-time pitch correction and reverb
555
+ - Perfect for vocal practice and experimentation
 
556
 
557
+ ### 🎭 Style Coaching
558
+ - Upload 2-5 reference tracks from artists you want to emulate
559
+ - Record or upload your performance
560
+ - Get AI-powered feedback on pitch, timing, and vocal characteristics
561
+ - Receive a score and specific improvement suggestions
562
 
563
+ ## Tips for Best Results
 
 
 
564
 
565
+ - **Use high-quality audio files** - better input = better results
566
+ - **Keep files under 5 minutes** for faster processing
567
+ - **For style coaching**: Choose references from similar genres
568
+ - **Record in quiet environments** for best analysis
569
 
570
+ ## Supported Formats
571
+ - Input: MP3, WAV, FLAC, M4A, OGG
572
+ - Output: High-quality WAV files
 
573
 
574
+ ## Technical Requirements
575
+ - Some features require additional dependencies
576
+ - Processing time varies based on file length and complexity
577
 
578
  ---
579
+ Built for singers and musicians worldwide 🌍
 
 
 
 
 
580
  """)
581
 
582
+ # Connect all the event handlers
583
+ sep_button.click(
584
+ process_audio_separation,
585
+ inputs=[sep_audio_input, sep_mode],
586
+ outputs=[sep_status, sep_vocals, sep_instrumental, sep_bass, sep_other, sep_analysis]
 
587
  )
588
 
589
+ fx_button.click(
590
+ process_vocal_effects,
591
+ inputs=[fx_audio_input, fx_pitch, fx_reverb],
592
+ outputs=[fx_status, fx_output, fx_analysis]
 
593
  )
594
 
595
+ live_button.click(
596
+ process_vocal_effects,
597
+ inputs=[live_audio, live_pitch, live_reverb],
598
+ outputs=[live_status, live_output, live_analysis]
599
+ )
600
+
601
+ coach_button.click(
602
+ process_style_coaching,
603
+ inputs=[coach_refs, coach_user],
604
+ outputs=[coach_status, coach_refs_status, coach_feedback]
605
  )
606
 
607
+ return app
608
 
609
  if __name__ == "__main__":
610
+ app = create_app()
611
+ app.launch()