Spaces:

latterworks
/

sing

Runtime error

App Files Files Community

latterworks commited on May 25, 2025

Commit

802da7d

verified ·

1 Parent(s): ac57520

Update app.py

Browse files

Files changed (1) hide show

app.py +453 -1006

app.py CHANGED Viewed

@@ -1,1164 +1,611 @@
-#!/usr/bin/env python3
-"""
-Live Audio Singing Helper - Production Grade
-Advanced audio processing tool for singers and musicians
-Author: Lead Developer
-Version: 2.0.0
-"""
 import gradio as gr
 import librosa
 import numpy as np
 import soundfile as sf
-from spleeter.separator import Separator
 import os
-import sys
-import shutil
 import tempfile
-import scipy.signal
-import matplotlib.pyplot as plt
-import traceback
-import logging
-import gc
 from pathlib import Path
-from typing import Tuple, Optional, Dict, Any, List, Union
-from dataclasses import dataclass
-from contextlib import contextmanager
 import warnings
 warnings.filterwarnings("ignore")
-# Style coaching imports
-from scipy.spatial.distance import euclidean
-from dtw import dtw
-# Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-# Constants
-MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB
-SUPPORTED_FORMATS = ['.mp3', '.wav', '.flac', '.m4a', '.ogg', '.aac']
-MAX_DURATION = 600  # 10 minutes
-TEMP_DIR_PREFIX = "audio_helper_"
-VERSION = "2.0.0"
-@dataclass
-class ProcessingResult:
-    """Structured result container for audio processing operations"""
-    success: bool
-    message: str
-    data: Optional[Dict[str, Any]] = None
-    error: Optional[str] = None
-class AudioProcessorPro:
-    """Professional-grade audio processor with comprehensive error handling and optimization"""
     def __init__(self):
-        self.separator_2stems = None
-        self.separator_4stems = None
-        self.temp_dir = None
-        self.session_id = None
-        self._initialize_session()
-    def _initialize_session(self):
-        """Initialize processing session with proper cleanup"""
-        try:
-            self.session_id = f"{TEMP_DIR_PREFIX}{np.random.randint(100000)}"
-            self.temp_dir = tempfile.mkdtemp(prefix=self.session_id)
-            logger.info(f"Session initialized: {self.session_id}")
-        except Exception as e:
-            logger.error(f"Session initialization failed: {e}")
-            raise
-    @contextmanager
-    def _safe_processing(self, operation_name: str):
-        """Context manager for safe processing with automatic cleanup"""
-        logger.info(f"Starting {operation_name}")
-        try:
-            yield
-            logger.info(f"Completed {operation_name}")
-        except Exception as e:
-            logger.error(f"Error in {operation_name}: {e}")
-            raise
-        finally:
-            gc.collect()  # Force garbage collection
-    def validate_audio_file(self, audio_path: str) -> ProcessingResult:
-        """Comprehensive audio file validation"""
         try:
-            if not audio_path or not os.path.exists(audio_path):
-                return ProcessingResult(False, "Audio file not found")
-            # Check file size
-            file_size = os.path.getsize(audio_path)
-            if file_size > MAX_FILE_SIZE:
-                return ProcessingResult(False, f"File too large. Max size: {MAX_FILE_SIZE//1024//1024}MB")
-            # Check file format
-            file_ext = Path(audio_path).suffix.lower()
-            if file_ext not in SUPPORTED_FORMATS:
-                return ProcessingResult(False, f"Unsupported format. Supported: {', '.join(SUPPORTED_FORMATS)}")
-            # Check audio properties
-            try:
-                y, sr = librosa.load(audio_path, duration=1.0)  # Load first second for validation
-                duration = librosa.get_duration(filename=audio_path)
-                if duration > MAX_DURATION:
-                    return ProcessingResult(False, f"Audio too long. Max duration: {MAX_DURATION//60} minutes")
-                if sr < 8000:
-                    return ProcessingResult(False, "Sample rate too low (minimum 8kHz)")
-                return ProcessingResult(True, "File validation passed", {
-                    'duration': duration,
-                    'sample_rate': sr,
-                    'file_size': file_size
-                })
-            except Exception as e:
-                return ProcessingResult(False, f"Invalid audio file: {str(e)}")
         except Exception as e:
-            return ProcessingResult(False, f"Validation error: {str(e)}")
-    def get_separator(self, stems: int = 2) -> Separator:
-        """Lazy load and cache Spleeter models"""
         try:
-            if stems == 2:
-                if self.separator_2stems is None:
-                    logger.info("Loading Spleeter 2-stem model...")
-                    self.separator_2stems = Separator("spleeter:2stems-16kHz")
-                return self.separator_2stems
-            elif stems == 4:
-                if self.separator_4stems is None:
-                    logger.info("Loading Spleeter 4-stem model...")
-                    self.separator_4stems = Separator("spleeter:4stems-16kHz")
-                return self.separator_4stems
-            else:
-                raise ValueError(f"Unsupported stem count: {stems}")
-        except Exception as e:
-            logger.error(f"Failed to load Spleeter model: {e}")
-            raise
-    def extract_comprehensive_features(self, audio_path: str) -> ProcessingResult:
-        """Extract comprehensive audio features with proper error handling"""
-        with self._safe_processing("feature_extraction"):
-            try:
-                validation = self.validate_audio_file(audio_path)
-                if not validation.success:
-                    return validation
-                y, sr = librosa.load(audio_path, sr=None)
-                duration = librosa.get_duration(y=y, sr=sr)
-                # Core features
-                tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
-                # Spectral features
-                spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
-                spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
-                spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
-                zero_crossing_rate = librosa.feature.zero_crossing_rate(y)[0]
-                # Timbral features
-                mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
-                chroma = librosa.feature.chroma_stft(y=y, sr=sr)
-                # Dynamic features
-                rms = librosa.feature.rms(y=y)[0]
-                # Pitch estimation
-                pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
-                pitch_values = []
-                for t in range(pitches.shape[1]):
-                    index = magnitudes[:, t].argmax()
-                    pitch = pitches[index, t]
-                    if pitch > 0:
-                        pitch_values.append(pitch)
-                features = {
-                    # Basic properties
-                    'duration': round(duration, 2),
-                    'sample_rate': sr,
-                    'file_size': validation.data['file_size'],
-                    # Rhythm and tempo
-                    'tempo': round(tempo, 1),
-                    'num_beats': len(beats),
-                    'rhythm_regularity': round(np.std(np.diff(beats)), 3),
-                    # Spectral characteristics
-                    'spectral_centroid_mean': round(np.mean(spectral_centroids), 2),
-                    'spectral_centroid_std': round(np.std(spectral_centroids), 2),
-                    'spectral_rolloff_mean': round(np.mean(spectral_rolloff), 2),
-                    'spectral_bandwidth_mean': round(np.mean(spectral_bandwidth), 2),
-                    'zero_crossing_rate_mean': round(np.mean(zero_crossing_rate), 4),
-                    # Dynamic properties
-                    'rms_energy_mean': round(np.mean(rms), 4),
-                    'rms_energy_std': round(np.std(rms), 4),
-                    'dynamic_range': round(np.max(rms) - np.min(rms), 4),
-                    # Pitch information
-                    'pitch_mean': round(np.mean(pitch_values), 2) if pitch_values else 0,
-                    'pitch_std': round(np.std(pitch_values), 2) if pitch_values else 0,
-                    'pitch_range': round(max(pitch_values) - min(pitch_values), 2) if len(pitch_values) > 1 else 0,
-                    # Timbral features (for advanced analysis)
-                    'mfcc_mean': np.round(np.mean(mfccs, axis=1), 3).tolist(),
-                    'chroma_mean': np.round(np.mean(chroma, axis=1), 3).tolist(),
-                    # Quality metrics
-                    'signal_to_noise_ratio': round(20 * np.log10(np.mean(rms) / (np.std(rms) + 1e-10)), 2)
                 }
-                return ProcessingResult(True, "Feature extraction successful", features)
-            except Exception as e:
-                logger.error(f"Feature extraction failed: {e}")
-                return ProcessingResult(False, f"Feature extraction failed: {str(e)}")
-    def separate_audio_sources(self, audio_path: str, stems: int = 2,
-                              progress_callback=None) -> ProcessingResult:
-        """Professional audio source separation with progress tracking"""
-        with self._safe_processing(f"{stems}-stem_separation"):
-            try:
-                validation = self.validate_audio_file(audio_path)
-                if not validation.success:
-                    return validation
-                if progress_callback:
-                    progress_callback(0.1, "Initializing separator...")
-                separator = self.get_separator(stems)
-                if progress_callback:
-                    progress_callback(0.3, "Loading audio...")
-                # Create unique output directory
-                output_dir = os.path.join(self.temp_dir, f"separation_{np.random.randint(10000)}")
-                os.makedirs(output_dir, exist_ok=True)
-                if progress_callback:
-                    progress_callback(0.5, "Separating audio sources...")
-                # Perform separation
-                separator.separate_to_file(audio_path, output_dir)
-                if progress_callback:
-                    progress_callback(0.8, "Processing results...")
-                # Get separated files
-                base_name = os.path.splitext(os.path.basename(audio_path))[0]
-                result_dir = os.path.join(output_dir, base_name)
-                separated_files = {}
-                if stems == 2:
-                    separated_files = {
-                        'vocals': os.path.join(result_dir, "vocals.wav"),
-                        'accompaniment': os.path.join(result_dir, "accompaniment.wav")
-                    }
-                else:  # 4 stems
-                    separated_files = {
-                        'vocals': os.path.join(result_dir, "vocals.wav"),
-                        'drums': os.path.join(result_dir, "drums.wav"),
-                        'bass': os.path.join(result_dir, "bass.wav"),
-                        'other': os.path.join(result_dir, "other.wav")
-                    }
-                # Verify all files exist
-                missing_files = [k for k, v in separated_files.items() if not os.path.exists(v)]
-                if missing_files:
-                    return ProcessingResult(False, f"Separation incomplete. Missing: {missing_files}")
-                if progress_callback:
-                    progress_callback(1.0, "Separation complete!")
-                return ProcessingResult(True, f"✅ {stems}-stem separation successful!", separated_files)
-            except Exception as e:
-                logger.error(f"Audio separation failed: {e}")
-                return ProcessingResult(False, f"Separation failed: {str(e)}")
-    def apply_vocal_effects(self, audio_path: str, effects_config: Dict[str, float]) -> ProcessingResult:
-        """Apply vocal effects with comprehensive options"""
-        with self._safe_processing("vocal_effects"):
-            try:
-                validation = self.validate_audio_file(audio_path)
-                if not validation.success:
-                    return validation
-                y, sr = librosa.load(audio_path, sr=None)
-                original_y = y.copy()
-                # Apply pitch shifting
-                pitch_shift = effects_config.get('pitch_shift', 0)
-                if pitch_shift != 0:
-                    y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
-                # Apply reverb
-                reverb_amount = effects_config.get('reverb', 0)
-                if reverb_amount > 0:
-                    reverb_length = int(0.5 * sr)
-                    impulse = np.random.randn(reverb_length) * np.exp(-np.arange(reverb_length) / (sr * 0.1))
-                    impulse *= reverb_amount
-                    y = scipy.signal.convolve(y, impulse, mode='same')
-                # Apply chorus effect
-                chorus_amount = effects_config.get('chorus', 0)
-                if chorus_amount > 0:
-                    delay_samples = int(0.02 * sr)  # 20ms delay
-                    delayed = np.pad(original_y, (delay_samples, 0), mode='constant')[:len(y)]
-                    y = y + chorus_amount * delayed
-                # Apply compression
-                compression = effects_config.get('compression', 0)
-                if compression > 0:
-                    threshold = 0.1
-                    ratio = 1 + compression * 9  # 1:1 to 10:1 ratio
-                    mask = np.abs(y) > threshold
-                    y[mask] = np.sign(y[mask]) * (threshold + (np.abs(y[mask]) - threshold) / ratio)
-                # Normalize to prevent clipping
-                if np.max(np.abs(y)) > 0:
-                    y = y / np.max(np.abs(y)) * 0.95
-                # Save processed audio
-                output_path = os.path.join(self.temp_dir, f"processed_{np.random.randint(10000)}.wav")
-                sf.write(output_path, y, sr)
-                effects_applied = [k for k, v in effects_config.items() if v != 0]
-                return ProcessingResult(True, f"Effects applied: {', '.join(effects_applied)}", {
-                    'output_path': output_path,
-                    'effects_applied': effects_applied
-                })
-            except Exception as e:
-                logger.error(f"Effects processing failed: {e}")
-                return ProcessingResult(False, f"Effects processing failed: {str(e)}")
-    def cleanup_session(self):
-        """Clean up temporary files and release resources"""
-        try:
-            if self.temp_dir and os.path.exists(self.temp_dir):
-                shutil.rmtree(self.temp_dir)
-                logger.info(f"Cleaned up session: {self.session_id}")
         except Exception as e:
-            logger.warning(f"Cleanup warning: {e}")
-class StyleCoachingEngine:
-    """Advanced vocal style coaching system"""
-    def __init__(self, processor: AudioProcessorPro):
-        self.processor = processor
-    def extract_vocal_features(self, audio_path: str) -> ProcessingResult:
-        """Extract detailed vocal-specific features"""
         try:
-            with self.processor._safe_processing("vocal_feature_extraction"):
-                validation = self.processor.validate_audio_file(audio_path)
-                if not validation.success:
-                    return validation
-                y, sr = librosa.load(audio_path, sr=None)
-                # Advanced pitch analysis
-                f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=librosa.note_to_hz('C2'),
-                                                           fmax=librosa.note_to_hz('C7'))
-                f0_clean = f0[voiced_flag]
-                # Formant analysis (simplified)
-                mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
-                # Vibrato analysis
-                if len(f0_clean) > 10:
-                    f0_smooth = scipy.signal.medfilt(f0_clean, kernel_size=5)
-                    vibrato_extent = np.std(f0_clean - f0_smooth)
-                else:
-                    vibrato_extent = 0
-                # Vocal effort estimation
-                spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
-                vocal_effort = np.mean(spectral_centroid) / 1000  # Normalized measure
-                features = {
-                    'fundamental_frequency_mean': np.nanmean(f0_clean) if len(f0_clean) > 0 else 0,
-                    'fundamental_frequency_std': np.nanstd(f0_clean) if len(f0_clean) > 0 else 0,
-                    'pitch_range': np.nanmax(f0_clean) - np.nanmin(f0_clean) if len(f0_clean) > 0 else 0,
-                    'vibrato_extent': vibrato_extent,
-                    'vocal_effort': vocal_effort,
-                    'voiced_percentage': np.mean(voiced_flag) * 100,
-                    'mfccs': mfccs,
-                    'duration': librosa.get_duration(y=y, sr=sr)
-                }
-                return ProcessingResult(True, "Vocal features extracted", features)
         except Exception as e:
-            logger.error(f"Vocal feature extraction failed: {e}")
-            return ProcessingResult(False, f"Vocal feature extraction failed: {str(e)}")
-    def build_style_profile(self, reference_features: List[Dict]) -> ProcessingResult:
-        """Build comprehensive style profile from reference tracks"""
         try:
-            if len(reference_features) < 2:
-                return ProcessingResult(False, "Need at least 2 reference tracks")
-            valid_features = [f for f in reference_features if f is not None]
-            if len(valid_features) < 2:
-                return ProcessingResult(False, "Not enough valid reference features")
-            # Aggregate features
-            profile = {}
-            for key in valid_features[0].keys():
-                if key != 'mfccs':  # Handle MFCCs separately
-                    values = [f[key] for f in valid_features if key in f and f[key] is not None]
-                    if values:
-                        profile[key] = np.mean(values)
-                        profile[f'{key}_std'] = np.std(values)
-            # Handle MFCCs
-            mfcc_arrays = [f['mfccs'] for f in valid_features if 'mfccs' in f]
-            if mfcc_arrays:
-                profile['mfccs'] = np.mean(mfcc_arrays, axis=0)
-            profile['num_references'] = len(valid_features)
-            return ProcessingResult(True, f"Style profile built from {len(valid_features)} references", profile)
         except Exception as e:
-            logger.error(f"Style profile creation failed: {e}")
-            return ProcessingResult(False, f"Style profile creation failed: {str(e)}")
-    def compare_performance(self, user_features: Dict, style_profile: Dict) -> ProcessingResult:
-        """Compare user performance to style profile"""
         try:
-            comparison = {}
-            # Pitch comparison
-            pitch_diff = abs(user_features.get('fundamental_frequency_mean', 0) -
-                           style_profile.get('fundamental_frequency_mean', 0))
-            comparison['pitch_accuracy'] = max(0, 100 - (pitch_diff / 10))  # Score out of 100
-            # Range comparison
-            user_range = user_features.get('pitch_range', 0)
-            target_range = style_profile.get('pitch_range', 0)
-            range_ratio = min(user_range, target_range) / max(user_range, target_range) if max(user_range, target_range) > 0 else 0
-            comparison['range_match'] = range_ratio * 100
-            # Vibrato comparison
-            user_vibrato = user_features.get('vibrato_extent', 0)
-            target_vibrato = style_profile.get('vibrato_extent', 0)
-            vibrato_diff = abs(user_vibrato - target_vibrato)
-            comparison['vibrato_match'] = max(0, 100 - vibrato_diff * 50)
-            # Overall style similarity
-            comparison['overall_similarity'] = np.mean([
-                comparison['pitch_accuracy'],
-                comparison['range_match'],
-                comparison['vibrato_match']
-            ])
-            return ProcessingResult(True, "Performance comparison complete", comparison)
         except Exception as e:
-            logger.error(f"Performance comparison failed: {e}")
-            return ProcessingResult(False, f"Performance comparison failed: {str(e)}")
-# Global processor instance
-processor = AudioProcessorPro()
-style_coach = StyleCoachingEngine(processor)
-def format_analysis_text(features: Dict[str, Any]) -> str:
     """Format analysis results for display"""
-    if not features:
-        return "No analysis data available"
-    text = f"""📊 **Audio Analysis Results**
-🎵 **Basic Properties**
-• Duration: {features.get('duration', 'N/A')} seconds
-• Sample Rate: {features.get('sample_rate', 'N/A')} Hz
-• File Size: {features.get('file_size', 0) / 1024:.1f} KB
-🎼 **Musical Characteristics**
-• Tempo: {features.get('tempo', 'N/A')} BPM
-• Beats Detected: {features.get('num_beats', 'N/A')}
-• Rhythm Regularity: {features.get('rhythm_regularity', 'N/A')}
-🔊 **Spectral Properties**
-• Brightness (Spectral Centroid): {features.get('spectral_centroid_mean', 'N/A')} Hz
-• Spectral Bandwidth: {features.get('spectral_bandwidth_mean', 'N/A')} Hz
-• Zero Crossing Rate: {features.get('zero_crossing_rate_mean', 'N/A')}
-📈 **Dynamic Characteristics**
-• Average Energy: {features.get('rms_energy_mean', 'N/A')}
-• Dynamic Range: {features.get('dynamic_range', 'N/A')}
-• Signal-to-Noise Ratio: {features.get('signal_to_noise_ratio', 'N/A')} dB
-🎤 **Pitch Information**
-• Average Pitch: {features.get('pitch_mean', 'N/A')} Hz
-• Pitch Variation: {features.get('pitch_std', 'N/A')} Hz
-• Pitch Range: {features.get('pitch_range', 'N/A')} Hz"""
-    return text
-def process_audio_separation(audio_file, stems_mode, pitch_shift, reverb, chorus, compression):
-    """Main audio separation processing function"""
     if not audio_file:
-        return (
-            "❌ Please upload an audio file",
-            None, None, None, None,
-            "No analysis available"
-        )
     try:
-        # Progress tracking
-        progress_updates = []
-        def progress_callback(progress, message):
-            progress_updates.append(f"[{progress*100:.0f}%] {message}")
-        # Analyze features first
-        feature_result = processor.extract_comprehensive_features(audio_file)
-        if not feature_result.success:
-            return (
-                f"❌ {feature_result.message}",
-                None, None, None, None,
-                feature_result.error or "Analysis failed"
-            )
-        analysis_text = format_analysis_text(feature_result.data)
         # Separate audio
-        stems = 2 if stems_mode == "2-stem (Vocals + Instrumental)" else 4
-        separation_result = processor.separate_audio_sources(audio_file, stems, progress_callback)
-        if not separation_result.success:
-            return (
-                f"❌ {separation_result.message}",
-                None, None, None, None,
-                analysis_text
-            )
-        separated_files = separation_result.data
-        # Apply effects to vocals if requested
-        effects_config = {
-            'pitch_shift': pitch_shift,
-            'reverb': reverb,
-            'chorus': chorus,
-            'compression': compression
-        }
-        vocals_path = separated_files.get('vocals')
-        if vocals_path and any(v != 0 for v in effects_config.values()):
-            effects_result = processor.apply_vocal_effects(vocals_path, effects_config)
-            if effects_result.success:
-                vocals_path = effects_result.data['output_path']
-                separation_result.message += f" | {effects_result.message}"
-        # Prepare outputs based on stems
-        if stems == 2:
             return (
-                f"✅ {separation_result.message}",
-                vocals_path,
-                separated_files.get('accompaniment'),
-                None, None,
                 analysis_text
             )
         else:
             return (
-                f"✅ {separation_result.message}",
-                vocals_path,
-                separated_files.get('drums'),
-                separated_files.get('bass'),
-                separated_files.get('other'),
                 analysis_text
             )
     except Exception as e:
-        logger.error(f"Audio separation processing failed: {e}")
-        return (
-            f"❌ Processing failed: {str(e)}",
-            None, None, None, None,
-            "Analysis failed due to processing error"
-        )
-def process_live_recording(audio_file, pitch_shift, reverb, chorus, compression):
-    """Process live recording with effects"""
     if not audio_file:
-        return (
-            "❌ Please record audio first",
-            None,
-            "No analysis available"
-        )
     try:
-        # Analyze features
-        feature_result = processor.extract_comprehensive_features(audio_file)
-        if not feature_result.success:
-            return (
-                f"❌ {feature_result.message}",
-                None,
-                feature_result.error or "Analysis failed"
-            )
-        analysis_text = format_analysis_text(feature_result.data)
         # Apply effects
-        effects_config = {
-            'pitch_shift': pitch_shift,
-            'reverb': reverb,
-            'chorus': chorus,
-            'compression': compression
-        }
-        effects_result = processor.apply_vocal_effects(audio_file, effects_config)
-        if not effects_result.success:
-            return (
-                f"❌ {effects_result.message}",
-                None,
-                analysis_text
-            )
-        return (
-            f"✅ {effects_result.message}",
-            effects_result.data['output_path'],
-            analysis_text
-        )
     except Exception as e:
-        logger.error(f"Live recording processing failed: {e}")
-        return (
-            f"❌ Processing failed: {str(e)}",
-            None,
-            "Analysis failed due to processing error"
-        )
 def process_style_coaching(reference_files, user_audio):
-    """Advanced style coaching analysis"""
     if not reference_files or len(reference_files) < 2:
-        return (
-            "❌ Please upload at least 2 reference tracks",
-            "No references processed",
-            "Upload reference tracks to get personalized coaching feedback"
-        )
     if not user_audio:
-        return (
-            "❌ Please upload or record your performance",
-            "No references processed",
-            "Record your performance to compare with references"
-        )
     try:
         # Process reference tracks
         ref_features = []
         ref_status = []
-        for i, ref_file in enumerate(reference_files[:5]):  # Limit to 5 references
-            try:
-                # Separate vocals from reference
-                separation_result = processor.separate_audio_sources(ref_file.name, stems=2)
-                if separation_result.success:
-                    vocals_path = separation_result.data.get('vocals')
-                    if vocals_path:
-                        # Extract vocal features
-                        vocal_result = style_coach.extract_vocal_features(vocals_path)
-                        if vocal_result.success:
-                            ref_features.append(vocal_result.data)
-                            ref_status.append(f"✅ Reference {i+1}: Processed successfully")
-                        else:
-                            ref_status.append(f"❌ Reference {i+1}: Feature extraction failed")
-                    else:
-                        ref_status.append(f"❌ Reference {i+1}: Vocal separation failed")
                 else:
-                    ref_status.append(f"❌ Reference {i+1}: {separation_result.message}")
-            except Exception as e:
-                ref_status.append(f"❌ Reference {i+1}: {str(e)}")
         if len(ref_features) < 2:
-            return (
-                "❌ Failed to process enough reference tracks",
-                "\n".join(ref_status),
-                "Need at least 2 valid reference tracks for style analysis"
-            )
-        # Build style profile
-        profile_result = style_coach.build_style_profile(ref_features)
-        if not profile_result.success:
-            return (
-                f"❌ {profile_result.message}",
-                "\n".join(ref_status),
-                "Style profile creation failed"
-            )
         # Process user audio
-        user_separation = processor.separate_audio_sources(user_audio, stems=2)
-        if not user_separation.success:
-            return (
-                f"❌ Failed to process your audio: {user_separation.message}",
-                "\n".join(ref_status),
-                "Could not separate vocals from your performance"
-            )
-        user_vocals_path = user_separation.data.get('vocals')
-        if not user_vocals_path:
-            return (
-                "❌ Could not extract vocals from your performance",
-                "\n".join(ref_status),
-                "Vocal separation failed"
-            )
-        # Extract user vocal features
-        user_vocal_result = style_coach.extract_vocal_features(user_vocals_path)
-        if not user_vocal_result.success:
-            return (
-                f"❌ {user_vocal_result.message}",
-                "\n".join(ref_status),
-                "Could not analyze your vocal characteristics"
-            )
-        # Compare performance
-        comparison_result = style_coach.compare_performance(
-            user_vocal_result.data,
-            profile_result.data
-        )
-        if not comparison_result.success:
-            return (
-                f"❌ {comparison_result.message}",
-                "\n".join(ref_status),
-                "Performance comparison failed"
-            )
-        # Generate feedback
-        comparison = comparison_result.data
-        feedback = f"""🎯 **Style Coaching Analysis**
-📊 **Performance Scores**
-• Pitch Accuracy: {comparison['pitch_accuracy']:.1f}/100
-• Range Match: {comparison['range_match']:.1f}/100
-• Vibrato Control: {comparison['vibrato_match']:.1f}/100
-• **Overall Similarity: {comparison['overall_similarity']:.1f}/100**
-🎵 **Detailed Feedback**
-**Pitch Control:**
-{"✅ Excellent pitch accuracy!" if comparison['pitch_accuracy'] > 80 else
- "⚠️ Work on pitch accuracy. Practice scales and interval training." if comparison['pitch_accuracy'] > 60 else
- "❌ Significant pitch issues. Focus on basic pitch matching exercises."}
-**Vocal Range:**
-{"✅ Great range utilization!" if comparison['range_match'] > 80 else
- "⚠️ Expand your vocal range to match the style." if comparison['range_match'] > 60 else
- "❌ Limited range usage. Practice range extension exercises."}
-**Vibrato Technique:**
-{"✅ Vibrato control matches the style well!" if comparison['vibrato_match'] > 80 else
- "⚠️ Adjust vibrato technique to match reference style." if comparison['vibrato_match'] > 60 else
- "❌ Vibrato needs work. Practice vibrato control exercises."}
-🎯 **Recommended Actions:**
-{f"• Continue practicing - you're very close to the target style!" if comparison['overall_similarity'] > 80 else
- f"• Focus on the areas scoring below 70 points" if comparison['overall_similarity'] > 60 else
- f"• Start with basic vocal technique exercises before style matching"}
-📈 **Progress Tracking:**
-Analyzed {len(ref_features)} reference tracks
-Overall performance: {"Advanced" if comparison['overall_similarity'] > 80 else "Intermediate" if comparison['overall_similarity'] > 60 else "Beginner"}
-"""
-        final_status = f"✅ Style coaching complete! Analyzed {len(ref_features)} references and generated personalized feedback."
-        return (
-            final_status,
-            "\n".join(ref_status),
-            feedback
-        )
     except Exception as e:
-        logger.error(f"Style coaching failed: {e}")
-        return (
-            f"❌ Style coaching failed: {str(e)}",
-            "Processing error occurred",
-            "An error occurred during analysis. Please try again."
-        )
-# Create comprehensive Gradio interface
-def create_main_interface():
-    """Create the main professional Gradio interface"""
-    # Custom CSS for professional styling
-    custom_css = """
-    .gradio-container {
-        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-        max-width: 1200px !important;
-        margin: auto;
-    }
-    .header-text {
-        text-align: center;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        color: white;
-        padding: 2rem;
-        border-radius: 10px;
-        margin-bottom: 2rem;
-    }
-    .tab-nav {
-        border-radius: 10px 10px 0 0;
-    }
-    .output-text {
-        font-family: 'Courier New', monospace;
-    }
-    """
-    with gr.Blocks(css=custom_css, title="Live Audio Singing Helper Pro", theme=gr.themes.Soft()) as demo:
-        # Header
         gr.HTML("""
-        <div class="header-text">
-            <h1>🎤 Live Audio Singing Helper Pro</h1>
-            <p>Professional-grade audio processing for singers and musicians</p>
-            <p><strong>Version 2.0.0</strong> | Advanced source separation, vocal effects, and AI-powered style coaching</p>
         </div>
         """)
         with gr.Tabs():
-            # Tab 1: Audio Separation & Analysis
-            with gr.Tab("🎵 Audio Separation & Analysis", elem_classes="tab-nav"):
-                gr.Markdown("""
-                ### Professional Audio Source Separation
-                Upload your audio files to separate vocals from instruments with state-of-the-art AI models.
-                Apply professional vocal effects and get detailed audio analysis.
-                """)
                 with gr.Row():
-                    with gr.Column(scale=1):
-                        audio_input = gr.Audio(
-                            type="filepath",
-                            label="📁 Upload Audio File",
-                            sources=["upload"],
-                            show_download_button=True
-                        )
-                        stems_mode = gr.Dropdown(
                             choices=["2-stem (Vocals + Instrumental)", "4-stem (Vocals + Drums + Bass + Other)"],
                             value="2-stem (Vocals + Instrumental)",
-                            label="🎯 Separation Mode",
-                            info="Choose the complexity of separation"
-                        )
-                        with gr.Group():
-                            gr.Markdown("**🎛️ Vocal Effects**")
-                            with gr.Row():
-                                pitch_shift = gr.Slider(
-                                    minimum=-12, maximum=12, value=0, step=0.5,
-                                    label="Pitch Shift (semitones)",
-                                    info="Transpose vocals up or down"
-                                )
-                                reverb = gr.Slider(
-                                    minimum=0, maximum=0.5, value=0, step=0.05,
-                                    label="Reverb Amount",
-                                    info="Add spatial depth"
-                                )
-                            with gr.Row():
-                                chorus = gr.Slider(
-                                    minimum=0, maximum=0.3, value=0, step=0.05,
-                                    label="Chorus Effect",
-                                    info="Add vocal thickness"
-                                )
-                                compression = gr.Slider(
-                                    minimum=0, maximum=1, value=0, step=0.1,
-                                    label="Compression",
-                                    info="Even out dynamics"
-                                )
-                        process_btn = gr.Button(
-                            "🚀 Process Audio",
-                            variant="primary",
-                            size="lg"
                         )
-                    with gr.Column(scale=1):
-                        status_output = gr.Textbox(
-                            label="📋 Processing Status",
-                            interactive=False,
-                            lines=3
-                        )
-                        analysis_output = gr.Textbox(
-                            label="📊 Audio Analysis",
-                            interactive=False,
-                            lines=20,
-                            elem_classes="output-text"
-                        )
-                # Output audio files
                 with gr.Row():
-                    audio_output1 = gr.Audio(
-                        label="🎤 Vocals/Primary",
-                        show_download_button=True
-                    )
-                    audio_output2 = gr.Audio(
-                        label="🎼 Instrumental/Drums",
-                        show_download_button=True
-                    )
                 with gr.Row():
-                    audio_output3 = gr.Audio(
-                        label="🎸 Bass (4-stem only)",
-                        show_download_button=True
-                    )
-                    audio_output4 = gr.Audio(
-                        label="🎹 Other (4-stem only)",
-                        show_download_button=True
-                    )
-            # Tab 2: Live Recording & Effects
-            with gr.Tab("🎙️ Live Recording & Effects"):
-                gr.Markdown("""
-                ### Real-time Recording & Vocal Processing
-                Record your voice directly and apply professional vocal effects in real-time.
-                Perfect for vocal practice and experimentation.
-                """)
                 with gr.Row():
-                    with gr.Column(scale=1):
-                        mic_input = gr.Audio(
-                            type="filepath",
-                            sources=["microphone"],
-                            label="🎙️ Record Your Voice",
-                            show_download_button=True
-                        )
-                        with gr.Group():
-                            gr.Markdown("**🎛️ Real-time Effects**")
-                            with gr.Row():
-                                live_pitch = gr.Slider(
-                                    minimum=-12, maximum=12, value=0, step=0.5,
-                                    label="Pitch Correction",
-                                    info="Real-time pitch adjustment"
-                                )
-                                live_reverb = gr.Slider(
-                                    minimum=0, maximum=0.5, value=0, step=0.05,
-                                    label="Studio Reverb",
-                                    info="Professional reverb effect"
-                                )
-                            with gr.Row():
-                                live_chorus = gr.Slider(
-                                    minimum=0, maximum=0.3, value=0, step=0.05,
-                                    label="Vocal Doubling",
-                                    info="Thicken your voice"
-                                )
-                                live_compression = gr.Slider(
-                                    minimum=0, maximum=1, value=0, step=0.1,
-                                    label="Dynamic Control",
-                                    info="Professional compression"
-                                )
-                        live_process_btn = gr.Button(
-                            "🎵 Process Recording",
-                            variant="primary",
-                            size="lg"
-                        )
-                    with gr.Column(scale=1):
-                        live_status = gr.Textbox(
-                            label="📋 Processing Status",
-                            interactive=False,
-                            lines=3
-                        )
-                        live_analysis = gr.Textbox(
-                            label="📊 Recording Analysis",
-                            interactive=False,
-                            lines=15,
-                            elem_classes="output-text"
-                        )
-                live_output = gr.Audio(
-                    label="🎧 Processed Recording",
-                    show_download_button=True
-                )
-            # Tab 3: Style Coaching
-            with gr.Tab("🎭 AI Style Coaching"):
-                gr.Markdown("""
-                ### Professional Vocal Style Analysis & Coaching
-                Upload reference tracks from artists you want to emulate, then record your performance.
-                Get detailed AI-powered feedback on how to improve your vocal style.
-                """)
                 with gr.Row():
-                    with gr.Column(scale=1):
-                        reference_files = gr.File(
-                            label="📚 Reference Tracks (2-5 songs)",
                             file_count="multiple",
-                            file_types=["audio"],
-                            info="Upload songs from artists whose style you want to learn"
-                        )
-                        user_performance = gr.Audio(
-                            type="filepath",
-                            label="🎤 Your Performance",
-                            sources=["upload", "microphone"],
-                            show_download_button=True,
-                            info="Record or upload your singing"
                         )
-                        coach_btn = gr.Button(
-                            "🎯 Analyze Style & Get Coaching",
-                            variant="primary",
-                            size="lg"
                         )
-                    with gr.Column(scale=1):
-                        coaching_status = gr.Textbox(
-                            label="📋 Analysis Status",
-                            interactive=False,
-                            lines=4
-                        )
-                        reference_status = gr.Textbox(
-                            label="📚 Reference Processing",
-                            interactive=False,
-                            lines=8
-                        )
-                coaching_feedback = gr.Textbox(
-                    label="🎯 Personalized Coaching Feedback",
-                    interactive=False,
-                    lines=25,
-                    elem_classes="output-text"
-                )
-            # Tab 4: Help & Documentation
-            with gr.Tab("ℹ️ Help & Documentation"):
-                gr.Markdown(f"""
-                # 📖 Live Audio Singing Helper Pro Documentation
-                **Version:** {VERSION}
-                **Author:** Lead Developer Team
-                **Last Updated:** 2024
-                ## 🚀 Features Overview
-                ### 🎵 Audio Separation & Analysis
-                - **Source Separation**: Advanced AI-powered vocal isolation using Spleeter
-                - **Multi-stem Options**: 2-stem (vocals/instrumental) or 4-stem (vocals/drums/bass/other)
-                - **Professional Effects**: Pitch shifting, reverb, chorus, and compression
-                - **Detailed Analysis**: Comprehensive audio feature extraction and visualization
-                ### 🎙️ Live Recording & Effects
-                - **Real-time Recording**: Direct microphone input with instant processing
-                - **Professional Effects Chain**: Studio-quality vocal processing
-                - **Live Analysis**: Instant feedback on your recording characteristics
-                ### 🎭 AI Style Coaching
-                - **Reference-based Learning**: Upload tracks from artists you want to emulate
-                - **AI-powered Analysis**: Advanced vocal characteristic comparison
-                - **Personalized Feedback**: Specific recommendations for improvement
-                - **Progress Tracking**: Monitor your vocal development over time
-                ## 📋 Supported Formats
-                - **Input**: MP3, WAV, FLAC, M4A, OGG, AAC
-                - **Output**: High-quality WAV files
-                - **Maximum File Size**: 50MB per file
-                - **Maximum Duration**: 10 minutes per track
-                ## 🎯 Usage Tips
-                ### For Best Results:
-                1. **Use high-quality recordings** - Better input = better output
-                2. **Keep files under 5 minutes** for faster processing
-                3. **Use full songs** for style coaching (not isolated vocals)
-                4. **Record in a quiet environment** for live recording
-                5. **Choose similar genres** for reference tracks in style coaching
-                ### Troubleshooting:
-                - **File too large**: Compress your audio or trim to shorter length
-                - **Poor separation quality**: Try with different audio files or formats
-                - **Style coaching not working**: Ensure reference tracks have clear vocals
-                ## 🔧 Technical Specifications
-                - **Separation Engine**: Spleeter by Deezer Research
-                - **Audio Processing**: librosa + scipy
-                - **AI Analysis**: Advanced signal processing algorithms
-                - **Effects Processing**: Professional-grade audio effects
-                ## 🎼 Advanced Tips for Musicians
-                ### Vocal Practice:
-                - Use separated vocals to practice harmonies
-                - Apply pitch correction to hear your target pitch
-                - Use compression to understand dynamic control
-                ### Style Development:
-                - Analyze multiple songs from the same artist for consistency
-                - Compare your progress over time using the same reference tracks
-                - Focus on one aspect at a time (pitch, timing, tone)
-                ### Recording Techniques:
-                - Record multiple takes and compare analyses
-                - Experiment with different microphone distances
-                - Use effects subtly for natural-sounding results
-                ## 🆘 Support & Contact
-                For technical support or feature requests, please contact the development team.
                 ---
-                **⚠️ Important Notes:**
-                - This tool is for educational and practice purposes
-                - Respect copyright when using reference tracks
-                - Results may vary based on audio quality and complexity
-                - Processing times depend on file size and server load
                 """)
-        # Event handlers with comprehensive error handling
-        process_btn.click(
-            fn=process_audio_separation,
-            inputs=[audio_input, stems_mode, pitch_shift, reverb, chorus, compression],
-            outputs=[status_output, audio_output1, audio_output2, audio_output3, audio_output4, analysis_output],
-            show_progress=True
         )
-        live_process_btn.click(
-            fn=process_live_recording,
-            inputs=[mic_input, live_pitch, live_reverb, live_chorus, live_compression],
-            outputs=[live_status, live_output, live_analysis],
-            show_progress=True
         )
-        coach_btn.click(
-            fn=process_style_coaching,
-            inputs=[reference_files, user_performance],
-            outputs=[coaching_status, reference_status, coaching_feedback],
-            show_progress=True
         )
-    return demo
 if __name__ == "__main__":
-    try:
-        logger.info(f"Starting Live Audio Singing Helper Pro v{VERSION}")
-        demo = create_main_interface()
-        demo.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            show_api=True,
-            show_error=True,
-            quiet=False
-        )
-    except Exception as e:
-        logger.error(f"Failed to launch application: {e}")
-        traceback.print_exc()
-        sys.exit(1)
-    finally:
-        # Cleanup on exit
-        try:
-            processor.cleanup_session()
-        except:
-            pass

 import gradio as gr
 import librosa
 import numpy as np
 import soundfile as sf
 import os
 import tempfile
+import shutil
 from pathlib import Path
 import warnings
 warnings.filterwarnings("ignore")
+# Import for advanced features
+try:
+    from spleeter.separator import Separator
+    SPLEETER_AVAILABLE = True
+except ImportError:
+    SPLEETER_AVAILABLE = False
+    print("Spleeter not available - source separation disabled")
+try:
+    import scipy.signal
+    from scipy.spatial.distance import euclidean
+    from dtw import dtw
+    ADVANCED_FEATURES = True
+except ImportError:
+    ADVANCED_FEATURES = False
+    print("Advanced features not available")
+class AudioEngine:
+    """Clean, professional audio processing engine"""
     def __init__(self):
+        self.temp_dir = tempfile.mkdtemp()
+        self.separators = {}  # Cache for Spleeter models
+    def analyze_audio(self, audio_path):
+        """Extract comprehensive audio features"""
         try:
+            # Load audio
+            y, sr = librosa.load(audio_path)
+            # Basic properties
+            duration = len(y) / sr
+            tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
+            # Spectral features
+            spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
+            spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
+            zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
+            # Energy features
+            rms_energy = np.mean(librosa.feature.rms(y=y))
+            # Pitch estimation
+            pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
+            pitch_values = []
+            for t in range(pitches.shape[1]):
+                index = magnitudes[:, t].argmax()
+                pitch = pitches[index, t]
+                if pitch > 0:
+                    pitch_values.append(pitch)
+            avg_pitch = np.mean(pitch_values) if pitch_values else 0
+            return {
+                'success': True,
+                'duration': round(duration, 2),
+                'tempo': round(tempo, 1),
+                'sample_rate': sr,
+                'spectral_centroid': round(spectral_centroid, 2),
+                'spectral_rolloff': round(spectral_rolloff, 2),
+                'zero_crossing_rate': round(zero_crossing_rate, 4),
+                'rms_energy': round(rms_energy, 4),
+                'average_pitch': round(avg_pitch, 2),
+                'pitch_count': len(pitch_values),
+                'beats_detected': len(beats)
+            }
         except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def separate_vocals(self, audio_path, model_type="2stems"):
+        """Separate vocals using Spleeter"""
+        if not SPLEETER_AVAILABLE:
+            return {'success': False, 'error': 'Spleeter not available'}
         try:
+            # Load or create separator
+            if model_type not in self.separators:
+                self.separators[model_type] = Separator(f'spleeter:{model_type}-16kHz')
+            separator = self.separators[model_type]
+            # Create output directory
+            output_dir = os.path.join(self.temp_dir, f"separation_{np.random.randint(10000)}")
+            os.makedirs(output_dir, exist_ok=True)
+            # Separate
+            separator.separate_to_file(audio_path, output_dir)
+            # Get results
+            audio_name = Path(audio_path).stem
+            result_dir = os.path.join(output_dir, audio_name)
+            if model_type == "2stems":
+                vocals_path = os.path.join(result_dir, "vocals.wav")
+                accompaniment_path = os.path.join(result_dir, "accompaniment.wav")
+                return {
+                    'success': True,
+                    'vocals': vocals_path if os.path.exists(vocals_path) else None,
+                    'accompaniment': accompaniment_path if os.path.exists(accompaniment_path) else None
                 }
+            elif model_type == "4stems":
+                vocals_path = os.path.join(result_dir, "vocals.wav")
+                drums_path = os.path.join(result_dir, "drums.wav")
+                bass_path = os.path.join(result_dir, "bass.wav")
+                other_path = os.path.join(result_dir, "other.wav")
+                return {
+                    'success': True,
+                    'vocals': vocals_path if os.path.exists(vocals_path) else None,
+                    'drums': drums_path if os.path.exists(drums_path) else None,
+                    'bass': bass_path if os.path.exists(bass_path) else None,
+                    'other': other_path if os.path.exists(other_path) else None
+                }
         except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def apply_effects(self, audio_path, pitch_shift=0, reverb=0):
+        """Apply vocal effects"""
         try:
+            y, sr = librosa.load(audio_path)
+            # Apply pitch shift
+            if pitch_shift != 0:
+                y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
+            # Apply reverb (simple convolution)
+            if reverb > 0 and ADVANCED_FEATURES:
+                reverb_length = int(0.5 * sr)
+                impulse = np.random.randn(reverb_length) * np.exp(-np.arange(reverb_length) / (sr * 0.1))
+                y = scipy.signal.convolve(y, impulse * reverb, mode='same')
+                y = y / np.max(np.abs(y))  # Normalize
+            # Save processed audio
+            output_path = os.path.join(self.temp_dir, f"processed_{np.random.randint(10000)}.wav")
+            sf.write(output_path, y, sr)
+            return {'success': True, 'output': output_path}
         except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def extract_vocal_features(self, audio_path):
+        """Extract features for style coaching"""
         try:
+            y, sr = librosa.load(audio_path)
+            # Pitch analysis
+            pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
+            pitch_values = []
+            for t in range(pitches.shape[1]):
+                index = magnitudes[:, t].argmax()
+                pitch = pitches[index, t]
+                if pitch > 0:
+                    pitch_values.append(pitch)
+            if not pitch_values:
+                return {'success': False, 'error': 'No pitch detected'}
+            # Basic vocal metrics
+            mean_pitch = np.mean(pitch_values)
+            pitch_std = np.std(pitch_values)
+            pitch_range = max(pitch_values) - min(pitch_values)
+            # Tempo
+            tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
+            # Spectral features
+            spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
+            # Energy
+            rms_energy = np.mean(librosa.feature.rms(y=y))
+            return {
+                'success': True,
+                'mean_pitch': mean_pitch,
+                'pitch_std': pitch_std,
+                'pitch_range': pitch_range,
+                'tempo': tempo,
+                'spectral_centroid': spectral_centroid,
+                'rms_energy': rms_energy
+            }
         except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def compare_vocal_styles(self, user_features, reference_features_list):
+        """Compare user vocals to reference style"""
+        if not ADVANCED_FEATURES:
+            return {'success': False, 'error': 'Advanced features not available'}
         try:
+            # Average reference features
+            ref_avg = {}
+            for key in ['mean_pitch', 'pitch_std', 'pitch_range', 'tempo', 'spectral_centroid', 'rms_energy']:
+                values = [ref[key] for ref in reference_features_list if key in ref]
+                ref_avg[key] = np.mean(values) if values else 0
+            # Calculate differences
+            pitch_diff = abs(user_features['mean_pitch'] - ref_avg['mean_pitch'])
+            tempo_diff = abs(user_features['tempo'] - ref_avg['tempo'])
+            timbre_diff = abs(user_features['spectral_centroid'] - ref_avg['spectral_centroid'])
+            energy_diff = abs(user_features['rms_energy'] - ref_avg['rms_energy'])
+            # Generate feedback
+            feedback = []
+            if pitch_diff > 50:
+                feedback.append(f"🎵 Pitch: Your average pitch differs by {pitch_diff:.1f} Hz. Practice matching the reference key.")
+            else:
+                feedback.append("🎵 Pitch: Good pitch accuracy!")
+            if tempo_diff > 10:
+                feedback.append(f"⏱️ Tempo: Your tempo differs by {tempo_diff:.1f} BPM. Work on timing consistency.")
+            else:
+                feedback.append("⏱️ Tempo: Good timing!")
+            if timbre_diff > 500:
+                feedback.append("🗣️ Timbre: Try adjusting your vocal tone to match the reference style.")
+            else:
+                feedback.append("🗣️ Timbre: Good vocal tone match!")
+            if energy_diff > 0.1:
+                feedback.append("🔊 Energy: Adjust your vocal intensity to match the reference.")
+            else:
+                feedback.append("🔊 Energy: Good energy level!")
+            overall_score = max(0, 100 - (pitch_diff/2 + tempo_diff + timbre_diff/10 + energy_diff*100))
+            return {
+                'success': True,
+                'score': round(overall_score, 1),
+                'feedback': feedback,
+                'metrics': {
+                    'pitch_diff': round(pitch_diff, 1),
+                    'tempo_diff': round(tempo_diff, 1),
+                    'timbre_diff': round(timbre_diff, 1),
+                    'energy_diff': round(energy_diff, 3)
+                }
+            }
         except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def cleanup(self):
+        """Clean up temporary files"""
+        try:
+            if os.path.exists(self.temp_dir):
+                shutil.rmtree(self.temp_dir)
+        except Exception:
+            pass
+# Global engine instance
+engine = AudioEngine()
+def format_analysis_results(analysis):
     """Format analysis results for display"""
+    if not analysis['success']:
+        return f"❌ Analysis failed: {analysis['error']}"
+    return f"""📊 Audio Analysis Results
+🎵 Basic Properties:
+• Duration: {analysis['duration']} seconds
+• Sample Rate: {analysis['sample_rate']} Hz
+• Tempo: {analysis['tempo']} BPM
+🔊 Audio Characteristics:
+• Spectral Centroid: {analysis['spectral_centroid']} Hz
+• Spectral Rolloff: {analysis['spectral_rolloff']} Hz
+• Zero Crossing Rate: {analysis['zero_crossing_rate']}
+• RMS Energy: {analysis['rms_energy']}
+🎤 Vocal Information:
+• Average Pitch: {analysis['average_pitch']} Hz
+• Pitch Points Detected: {analysis['pitch_count']}
+• Beats Detected: {analysis['beats_detected']}"""
+def process_audio_separation(audio_file, separation_mode):
+    """Main audio separation function"""
     if not audio_file:
+        return "❌ Please upload an audio file", None, None, None, None, ""
+    if not SPLEETER_AVAILABLE:
+        return "❌ Spleeter not available for source separation", None, None, None, None, ""
     try:
+        # Analyze audio first
+        analysis = engine.analyze_audio(audio_file)
+        analysis_text = format_analysis_results(analysis)
         # Separate audio
+        model_type = "2stems" if "2-stem" in separation_mode else "4stems"
+        separation_result = engine.separate_vocals(audio_file, model_type)
+        if not separation_result['success']:
+            return f"❌ Separation failed: {separation_result['error']}", None, None, None, None, analysis_text
+        if model_type == "2stems":
             return (
+                "✅ 2-stem separation completed successfully!",
+                separation_result.get('vocals'),
+                separation_result.get('accompaniment'),
+                None,
+                None,
                 analysis_text
             )
         else:
             return (
+                "✅ 4-stem separation completed successfully!",
+                separation_result.get('vocals'),
+                separation_result.get('drums'),
+                separation_result.get('bass'),
+                separation_result.get('other'),
                 analysis_text
             )
     except Exception as e:
+        return f"❌ Processing error: {str(e)}", None, None, None, None, ""
+def process_vocal_effects(audio_file, pitch_shift, reverb_amount):
+    """Apply vocal effects to audio"""
     if not audio_file:
+        return "❌ Please upload an audio file", None, ""
     try:
+        # Analyze original
+        analysis = engine.analyze_audio(audio_file)
+        analysis_text = format_analysis_results(analysis)
         # Apply effects
+        effects_result = engine.apply_effects(audio_file, pitch_shift, reverb_amount)
+        if not effects_result['success']:
+            return f"❌ Effects failed: {effects_result['error']}", None, analysis_text
+        effects_applied = []
+        if pitch_shift != 0:
+            effects_applied.append(f"Pitch: {pitch_shift:+.1f} semitones")
+        if reverb_amount > 0:
+            effects_applied.append(f"Reverb: {reverb_amount:.2f}")
+        status = f"✅ Effects applied: {', '.join(effects_applied)}" if effects_applied else "✅ Audio processed (no effects)"
+        return status, effects_result['output'], analysis_text
     except Exception as e:
+        return f"❌ Processing error: {str(e)}", None, ""
 def process_style_coaching(reference_files, user_audio):
+    """Style coaching analysis"""
     if not reference_files or len(reference_files) < 2:
+        return "❌ Upload at least 2 reference tracks", "", ""
     if not user_audio:
+        return "❌ Please record or upload your performance", "", ""
+    if not SPLEETER_AVAILABLE or not ADVANCED_FEATURES:
+        return "❌ Style coaching requires advanced features", "", ""
     try:
         # Process reference tracks
         ref_features = []
         ref_status = []
+        for i, ref_file in enumerate(reference_files[:5]):
+            # Separate vocals
+            separation_result = engine.separate_vocals(ref_file.name, "2stems")
+            if separation_result['success'] and separation_result.get('vocals'):
+                # Extract features
+                features = engine.extract_vocal_features(separation_result['vocals'])
+                if features['success']:
+                    ref_features.append(features)
+                    ref_status.append(f"✅ Reference {i+1}: Processed")
                 else:
+                    ref_status.append(f"❌ Reference {i+1}: Feature extraction failed")
+            else:
+                ref_status.append(f"❌ Reference {i+1}: Vocal separation failed")
         if len(ref_features) < 2:
+            return "❌ Need at least 2 valid reference tracks", "\n".join(ref_status), ""
         # Process user audio
+        user_separation = engine.separate_vocals(user_audio, "2stems")
+        if not user_separation['success'] or not user_separation.get('vocals'):
+            return "❌ Could not separate vocals from your performance", "\n".join(ref_status), ""
+        user_features = engine.extract_vocal_features(user_separation['vocals'])
+        if not user_features['success']:
+            return "❌ Could not analyze your vocal features", "\n".join(ref_status), ""
+        # Compare styles
+        comparison = engine.compare_vocal_styles(user_features, ref_features)
+        if not comparison['success']:
+            return f"❌ Style comparison failed: {comparison['error']}", "\n".join(ref_status), ""
+        # Format feedback
+        feedback_text = f"""🎯 Vocal Style Coaching Results
+📊 Overall Score: {comparison['score']}/100
+🎵 Detailed Feedback:
+{chr(10).join(comparison['feedback'])}
+📈 Technical Metrics:
+• Pitch Difference: {comparison['metrics']['pitch_diff']} Hz
+• Tempo Difference: {comparison['metrics']['tempo_diff']} BPM
+• Timbre Difference: {comparison['metrics']['timbre_diff']} Hz
+• Energy Difference: {comparison['metrics']['energy_diff']}
+🎯 Recommendations:
+{f"🔥 Excellent! You're very close to the target style." if comparison['score'] > 80 else
+ f"📈 Good progress! Focus on the areas mentioned above." if comparison['score'] > 60 else
+ f"💪 Keep practicing! Work on basic vocal technique first."}
+References analyzed: {len(ref_features)}/5"""
+        return f"✅ Style coaching complete! Score: {comparison['score']}/100", "\n".join(ref_status), feedback_text
     except Exception as e:
+        return f"❌ Coaching failed: {str(e)}", "", ""
+# Create main interface
+def create_app():
+    with gr.Blocks(title="Audio Singing Helper", theme=gr.themes.Soft()) as app:
         gr.HTML("""
+        <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
+            <h1>🎤 Audio Singing Helper</h1>
+            <p>Professional audio processing for singers and musicians</p>
         </div>
         """)
         with gr.Tabs():
+            # Audio Separation Tab
+            with gr.Tab("🎵 Audio Separation"):
+                gr.Markdown("### Separate vocals from instrumental tracks")
                 with gr.Row():
+                    with gr.Column():
+                        sep_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
+                        sep_mode = gr.Dropdown(
                             choices=["2-stem (Vocals + Instrumental)", "4-stem (Vocals + Drums + Bass + Other)"],
                             value="2-stem (Vocals + Instrumental)",
+                            label="Separation Mode"
                         )
+                        sep_button = gr.Button("🎯 Separate Audio", variant="primary")
+                    with gr.Column():
+                        sep_status = gr.Textbox(label="Status", lines=2)
+                        sep_analysis = gr.Textbox(label="Audio Analysis", lines=12)
                 with gr.Row():
+                    sep_vocals = gr.Audio(label="🎤 Vocals")
+                    sep_instrumental = gr.Audio(label="🎼 Instrumental/Drums")
                 with gr.Row():
+                    sep_bass = gr.Audio(label="🎸 Bass")
+                    sep_other = gr.Audio(label="🎹 Other")
+            # Vocal Effects Tab
+            with gr.Tab("🎛️ Vocal Effects"):
+                gr.Markdown("### Apply professional vocal effects")
                 with gr.Row():
+                    with gr.Column():
+                        fx_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
+                        fx_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Shift (semitones)")
+                        fx_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb Amount")
+                        fx_button = gr.Button("🎵 Apply Effects", variant="primary")
+                    with gr.Column():
+                        fx_status = gr.Textbox(label="Status", lines=2)
+                        fx_analysis = gr.Textbox(label="Audio Analysis", lines=10)
+                fx_output = gr.Audio(label="🎧 Processed Audio")
+            # Live Recording Tab
+            with gr.Tab("🎙️ Live Recording"):
+                gr.Markdown("### Record and process your voice in real-time")
                 with gr.Row():
+                    with gr.Column():
+                        live_audio = gr.Audio(type="filepath", sources=["microphone"], label="Record Your Voice")
+                        live_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Correction")
+                        live_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb")
+                        live_button = gr.Button("🎤 Process Recording", variant="primary")
+                    with gr.Column():
+                        live_status = gr.Textbox(label="Status", lines=2)
+                        live_analysis = gr.Textbox(label="Recording Analysis", lines=10)
+                live_output = gr.Audio(label="🎧 Processed Recording")
+            # Style Coaching Tab
+            with gr.Tab("🎭 Style Coaching"):
+                gr.Markdown("### Get personalized vocal coaching feedback")
+                with gr.Row():
+                    with gr.Column():
+                        coach_refs = gr.File(
+                            label="Reference Tracks (2-5 files)",
                             file_count="multiple",
+                            file_types=["audio"]
                         )
+                        coach_user = gr.Audio(
+                            type="filepath",
+                            label="Your Performance",
+                            sources=["upload", "microphone"]
                         )
+                        coach_button = gr.Button("🎯 Get Coaching", variant="primary")
+                    with gr.Column():
+                        coach_status = gr.Textbox(label="Status", lines=3)
+                        coach_refs_status = gr.Textbox(label="Reference Processing", lines=8)
+                coach_feedback = gr.Textbox(label="🎯 Coaching Feedback", lines=15)
+            # Help Tab
+            with gr.Tab("ℹ️ Help"):
+                gr.Markdown("""
+                # 🎤 Audio Singing Helper - User Guide
+                ## Features
+                ### 🎵 Audio Separation
+                - Upload any song to separate vocals from instruments
+                - Choose 2-stem (vocals + instrumental) or 4-stem (vocals + drums + bass + other)
+                - Get detailed audio analysis of your tracks
+                ### 🎛️ Vocal Effects
+                - Apply pitch shifting (-12 to +12 semitones)
+                - Add reverb for spatial depth
+                - Process any audio file with professional effects
+                ### 🎙️ Live Recording
+                - Record directly from your microphone
+                - Apply real-time pitch correction and reverb
+                - Perfect for vocal practice and experimentation
+                ### 🎭 Style Coaching
+                - Upload 2-5 reference tracks from artists you want to emulate
+                - Record or upload your performance
+                - Get AI-powered feedback on pitch, timing, and vocal characteristics
+                - Receive a score and specific improvement suggestions
+                ## Tips for Best Results
+                - **Use high-quality audio files** - better input = better results
+                - **Keep files under 5 minutes** for faster processing
+                - **For style coaching**: Choose references from similar genres
+                - **Record in quiet environments** for best analysis
+                ## Supported Formats
+                - Input: MP3, WAV, FLAC, M4A, OGG
+                - Output: High-quality WAV files
+                ## Technical Requirements
+                - Some features require additional dependencies
+                - Processing time varies based on file length and complexity
                 ---
+                Built for singers and musicians worldwide 🌍
                 """)
+        # Connect all the event handlers
+        sep_button.click(
+            process_audio_separation,
+            inputs=[sep_audio_input, sep_mode],
+            outputs=[sep_status, sep_vocals, sep_instrumental, sep_bass, sep_other, sep_analysis]
         )
+        fx_button.click(
+            process_vocal_effects,
+            inputs=[fx_audio_input, fx_pitch, fx_reverb],
+            outputs=[fx_status, fx_output, fx_analysis]
         )
+        live_button.click(
+            process_vocal_effects,
+            inputs=[live_audio, live_pitch, live_reverb],
+            outputs=[live_status, live_output, live_analysis]
+        )
+        coach_button.click(
+            process_style_coaching,
+            inputs=[coach_refs, coach_user],
+            outputs=[coach_status, coach_refs_status, coach_feedback]
         )
+    return app
 if __name__ == "__main__":
+    app = create_app()
+    app.launch()