SingA

Sleeping

App Files Files Community

latterworks commited on May 25

Commit

b98e447

verified ·

1 Parent(s): f3ffcfd

Create APP.PY

Browse files

Files changed (1) hide show

APP.PY +611 -0

APP.PY ADDED Viewed

	@@ -0,0 +1,611 @@

+import gradio as gr
+import librosa
+import numpy as np
+import soundfile as sf
+import os
+import tempfile
+import shutil
+from pathlib import Path
+import warnings
+warnings.filterwarnings("ignore")
+# Import for advanced features
+try:
+    from spleeter.separator import Separator
+    SPLEETER_AVAILABLE = True
+except ImportError:
+    SPLEETER_AVAILABLE = False
+    print("Spleeter not available - source separation disabled")
+try:
+    import scipy.signal
+    from scipy.spatial.distance import euclidean
+    from dtw import dtw
+    ADVANCED_FEATURES = True
+except ImportError:
+    ADVANCED_FEATURES = False
+    print("Advanced features not available")
+class AudioEngine:
+    """Clean, professional audio processing engine"""
+    def __init__(self):
+        self.temp_dir = tempfile.mkdtemp()
+        self.separators = {}  # Cache for Spleeter models
+    def analyze_audio(self, audio_path):
+        """Extract comprehensive audio features"""
+        try:
+            # Load audio
+            y, sr = librosa.load(audio_path)
+            # Basic properties
+            duration = len(y) / sr
+            tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
+            # Spectral features
+            spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
+            spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
+            zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
+            # Energy features
+            rms_energy = np.mean(librosa.feature.rms(y=y))
+            # Pitch estimation
+            pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
+            pitch_values = []
+            for t in range(pitches.shape[1]):
+                index = magnitudes[:, t].argmax()
+                pitch = pitches[index, t]
+                if pitch > 0:
+                    pitch_values.append(pitch)
+            avg_pitch = np.mean(pitch_values) if pitch_values else 0
+            return {
+                'success': True,
+                'duration': round(duration, 2),
+                'tempo': round(tempo, 1),
+                'sample_rate': sr,
+                'spectral_centroid': round(spectral_centroid, 2),
+                'spectral_rolloff': round(spectral_rolloff, 2),
+                'zero_crossing_rate': round(zero_crossing_rate, 4),
+                'rms_energy': round(rms_energy, 4),
+                'average_pitch': round(avg_pitch, 2),
+                'pitch_count': len(pitch_values),
+                'beats_detected': len(beats)
+            }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def separate_vocals(self, audio_path, model_type="2stems"):
+        """Separate vocals using Spleeter"""
+        if not SPLEETER_AVAILABLE:
+            return {'success': False, 'error': 'Spleeter not available'}
+        try:
+            # Load or create separator
+            if model_type not in self.separators:
+                self.separators[model_type] = Separator(f'spleeter:{model_type}-16kHz')
+            separator = self.separators[model_type]
+            # Create output directory
+            output_dir = os.path.join(self.temp_dir, f"separation_{np.random.randint(10000)}")
+            os.makedirs(output_dir, exist_ok=True)
+            # Separate
+            separator.separate_to_file(audio_path, output_dir)
+            # Get results
+            audio_name = Path(audio_path).stem
+            result_dir = os.path.join(output_dir, audio_name)
+            if model_type == "2stems":
+                vocals_path = os.path.join(result_dir, "vocals.wav")
+                accompaniment_path = os.path.join(result_dir, "accompaniment.wav")
+                return {
+                    'success': True,
+                    'vocals': vocals_path if os.path.exists(vocals_path) else None,
+                    'accompaniment': accompaniment_path if os.path.exists(accompaniment_path) else None
+                }
+            elif model_type == "4stems":
+                vocals_path = os.path.join(result_dir, "vocals.wav")
+                drums_path = os.path.join(result_dir, "drums.wav")
+                bass_path = os.path.join(result_dir, "bass.wav")
+                other_path = os.path.join(result_dir, "other.wav")
+                return {
+                    'success': True,
+                    'vocals': vocals_path if os.path.exists(vocals_path) else None,
+                    'drums': drums_path if os.path.exists(drums_path) else None,
+                    'bass': bass_path if os.path.exists(bass_path) else None,
+                    'other': other_path if os.path.exists(other_path) else None
+                }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def apply_effects(self, audio_path, pitch_shift=0, reverb=0):
+        """Apply vocal effects"""
+        try:
+            y, sr = librosa.load(audio_path)
+            # Apply pitch shift
+            if pitch_shift != 0:
+                y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
+            # Apply reverb (simple convolution)
+            if reverb > 0 and ADVANCED_FEATURES:
+                reverb_length = int(0.5 * sr)
+                impulse = np.random.randn(reverb_length) * np.exp(-np.arange(reverb_length) / (sr * 0.1))
+                y = scipy.signal.convolve(y, impulse * reverb, mode='same')
+                y = y / np.max(np.abs(y))  # Normalize
+            # Save processed audio
+            output_path = os.path.join(self.temp_dir, f"processed_{np.random.randint(10000)}.wav")
+            sf.write(output_path, y, sr)
+            return {'success': True, 'output': output_path}
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def extract_vocal_features(self, audio_path):
+        """Extract features for style coaching"""
+        try:
+            y, sr = librosa.load(audio_path)
+            # Pitch analysis
+            pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
+            pitch_values = []
+            for t in range(pitches.shape[1]):
+                index = magnitudes[:, t].argmax()
+                pitch = pitches[index, t]
+                if pitch > 0:
+                    pitch_values.append(pitch)
+            if not pitch_values:
+                return {'success': False, 'error': 'No pitch detected'}
+            # Basic vocal metrics
+            mean_pitch = np.mean(pitch_values)
+            pitch_std = np.std(pitch_values)
+            pitch_range = max(pitch_values) - min(pitch_values)
+            # Tempo
+            tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
+            # Spectral features
+            spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
+            # Energy
+            rms_energy = np.mean(librosa.feature.rms(y=y))
+            return {
+                'success': True,
+                'mean_pitch': mean_pitch,
+                'pitch_std': pitch_std,
+                'pitch_range': pitch_range,
+                'tempo': tempo,
+                'spectral_centroid': spectral_centroid,
+                'rms_energy': rms_energy
+            }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def compare_vocal_styles(self, user_features, reference_features_list):
+        """Compare user vocals to reference style"""
+        if not ADVANCED_FEATURES:
+            return {'success': False, 'error': 'Advanced features not available'}
+        try:
+            # Average reference features
+            ref_avg = {}
+            for key in ['mean_pitch', 'pitch_std', 'pitch_range', 'tempo', 'spectral_centroid', 'rms_energy']:
+                values = [ref[key] for ref in reference_features_list if key in ref]
+                ref_avg[key] = np.mean(values) if values else 0
+            # Calculate differences
+            pitch_diff = abs(user_features['mean_pitch'] - ref_avg['mean_pitch'])
+            tempo_diff = abs(user_features['tempo'] - ref_avg['tempo'])
+            timbre_diff = abs(user_features['spectral_centroid'] - ref_avg['spectral_centroid'])
+            energy_diff = abs(user_features['rms_energy'] - ref_avg['rms_energy'])
+            # Generate feedback
+            feedback = []
+            if pitch_diff > 50:
+                feedback.append(f"🎵 Pitch: Your average pitch differs by {pitch_diff:.1f} Hz. Practice matching the reference key.")
+            else:
+                feedback.append("🎵 Pitch: Good pitch accuracy!")
+            if tempo_diff > 10:
+                feedback.append(f"⏱️ Tempo: Your tempo differs by {tempo_diff:.1f} BPM. Work on timing consistency.")
+            else:
+                feedback.append("⏱️ Tempo: Good timing!")
+            if timbre_diff > 500:
+                feedback.append("🗣️ Timbre: Try adjusting your vocal tone to match the reference style.")
+            else:
+                feedback.append("🗣️ Timbre: Good vocal tone match!")
+            if energy_diff > 0.1:
+                feedback.append("🔊 Energy: Adjust your vocal intensity to match the reference.")
+            else:
+                feedback.append("🔊 Energy: Good energy level!")
+            overall_score = max(0, 100 - (pitch_diff/2 + tempo_diff + timbre_diff/10 + energy_diff*100))
+            return {
+                'success': True,
+                'score': round(overall_score, 1),
+                'feedback': feedback,
+                'metrics': {
+                    'pitch_diff': round(pitch_diff, 1),
+                    'tempo_diff': round(tempo_diff, 1),
+                    'timbre_diff': round(timbre_diff, 1),
+                    'energy_diff': round(energy_diff, 3)
+                }
+            }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def cleanup(self):
+        """Clean up temporary files"""
+        try:
+            if os.path.exists(self.temp_dir):
+                shutil.rmtree(self.temp_dir)
+        except Exception:
+            pass
+# Global engine instance
+engine = AudioEngine()
+def format_analysis_results(analysis):
+    """Format analysis results for display"""
+    if not analysis['success']:
+        return f"❌ Analysis failed: {analysis['error']}"
+    return f"""📊 Audio Analysis Results
+🎵 Basic Properties:
+• Duration: {analysis['duration']} seconds
+• Sample Rate: {analysis['sample_rate']} Hz
+• Tempo: {analysis['tempo']} BPM
+🔊 Audio Characteristics:
+• Spectral Centroid: {analysis['spectral_centroid']} Hz
+• Spectral Rolloff: {analysis['spectral_rolloff']} Hz
+• Zero Crossing Rate: {analysis['zero_crossing_rate']}
+• RMS Energy: {analysis['rms_energy']}
+🎤 Vocal Information:
+• Average Pitch: {analysis['average_pitch']} Hz
+• Pitch Points Detected: {analysis['pitch_count']}
+• Beats Detected: {analysis['beats_detected']}"""
+def process_audio_separation(audio_file, separation_mode):
+    """Main audio separation function"""
+    if not audio_file:
+        return "❌ Please upload an audio file", None, None, None, None, ""
+    if not SPLEETER_AVAILABLE:
+        return "❌ Spleeter not available for source separation", None, None, None, None, ""
+    try:
+        # Analyze audio first
+        analysis = engine.analyze_audio(audio_file)
+        analysis_text = format_analysis_results(analysis)
+        # Separate audio
+        model_type = "2stems" if "2-stem" in separation_mode else "4stems"
+        separation_result = engine.separate_vocals(audio_file, model_type)
+        if not separation_result['success']:
+            return f"❌ Separation failed: {separation_result['error']}", None, None, None, None, analysis_text
+        if model_type == "2stems":
+            return (
+                "✅ 2-stem separation completed successfully!",
+                separation_result.get('vocals'),
+                separation_result.get('accompaniment'),
+                None,
+                None,
+                analysis_text
+            )
+        else:
+            return (
+                "✅ 4-stem separation completed successfully!",
+                separation_result.get('vocals'),
+                separation_result.get('drums'),
+                separation_result.get('bass'),
+                separation_result.get('other'),
+                analysis_text
+            )
+    except Exception as e:
+        return f"❌ Processing error: {str(e)}", None, None, None, None, ""
+def process_vocal_effects(audio_file, pitch_shift, reverb_amount):
+    """Apply vocal effects to audio"""
+    if not audio_file:
+        return "❌ Please upload an audio file", None, ""
+    try:
+        # Analyze original
+        analysis = engine.analyze_audio(audio_file)
+        analysis_text = format_analysis_results(analysis)
+        # Apply effects
+        effects_result = engine.apply_effects(audio_file, pitch_shift, reverb_amount)
+        if not effects_result['success']:
+            return f"❌ Effects failed: {effects_result['error']}", None, analysis_text
+        effects_applied = []
+        if pitch_shift != 0:
+            effects_applied.append(f"Pitch: {pitch_shift:+.1f} semitones")
+        if reverb_amount > 0:
+            effects_applied.append(f"Reverb: {reverb_amount:.2f}")
+        status = f"✅ Effects applied: {', '.join(effects_applied)}" if effects_applied else "✅ Audio processed (no effects)"
+        return status, effects_result['output'], analysis_text
+    except Exception as e:
+        return f"❌ Processing error: {str(e)}", None, ""
+def process_style_coaching(reference_files, user_audio):
+    """Style coaching analysis"""
+    if not reference_files or len(reference_files) < 2:
+        return "❌ Upload at least 2 reference tracks", "", ""
+    if not user_audio:
+        return "❌ Please record or upload your performance", "", ""
+    if not SPLEETER_AVAILABLE or not ADVANCED_FEATURES:
+        return "❌ Style coaching requires advanced features", "", ""
+    try:
+        # Process reference tracks
+        ref_features = []
+        ref_status = []
+        for i, ref_file in enumerate(reference_files[:5]):
+            # Separate vocals
+            separation_result = engine.separate_vocals(ref_file.name, "2stems")
+            if separation_result['success'] and separation_result.get('vocals'):
+                # Extract features
+                features = engine.extract_vocal_features(separation_result['vocals'])
+                if features['success']:
+                    ref_features.append(features)
+                    ref_status.append(f"✅ Reference {i+1}: Processed")
+                else:
+                    ref_status.append(f"❌ Reference {i+1}: Feature extraction failed")
+            else:
+                ref_status.append(f"❌ Reference {i+1}: Vocal separation failed")
+        if len(ref_features) < 2:
+            return "❌ Need at least 2 valid reference tracks", "\n".join(ref_status), ""
+        # Process user audio
+        user_separation = engine.separate_vocals(user_audio, "2stems")
+        if not user_separation['success'] or not user_separation.get('vocals'):
+            return "❌ Could not separate vocals from your performance", "\n".join(ref_status), ""
+        user_features = engine.extract_vocal_features(user_separation['vocals'])
+        if not user_features['success']:
+            return "❌ Could not analyze your vocal features", "\n".join(ref_status), ""
+        # Compare styles
+        comparison = engine.compare_vocal_styles(user_features, ref_features)
+        if not comparison['success']:
+            return f"❌ Style comparison failed: {comparison['error']}", "\n".join(ref_status), ""
+        # Format feedback
+        feedback_text = f"""🎯 Vocal Style Coaching Results
+📊 Overall Score: {comparison['score']}/100
+🎵 Detailed Feedback:
+{chr(10).join(comparison['feedback'])}
+📈 Technical Metrics:
+• Pitch Difference: {comparison['metrics']['pitch_diff']} Hz
+• Tempo Difference: {comparison['metrics']['tempo_diff']} BPM
+• Timbre Difference: {comparison['metrics']['timbre_diff']} Hz
+• Energy Difference: {comparison['metrics']['energy_diff']}
+🎯 Recommendations:
+{f"🔥 Excellent! You're very close to the target style." if comparison['score'] > 80 else
+ f"📈 Good progress! Focus on the areas mentioned above." if comparison['score'] > 60 else
+ f"💪 Keep practicing! Work on basic vocal technique first."}
+References analyzed: {len(ref_features)}/5"""
+        return f"✅ Style coaching complete! Score: {comparison['score']}/100", "\n".join(ref_status), feedback_text
+    except Exception as e:
+        return f"❌ Coaching failed: {str(e)}", "", ""
+# Create main interface
+def create_app():
+    with gr.Blocks(title="Audio Singing Helper") as app:
+        gr.HTML("""
+        <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
+            <h1>🎤 Audio Singing Helper</h1>
+            <p>Professional audio processing for singers and musicians</p>
+        </div>
+        """)
+        with gr.Tabs():
+            # Audio Separation Tab
+            with gr.Tab("🎵 Audio Separation"):
+                gr.Markdown("### Separate vocals from instrumental tracks")
+                with gr.Row():
+                    with gr.Column():
+                        sep_audio_input = gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"])
+                        sep_mode = gr.Dropdown(
+                            choices=["2-stem (Vocals + Instrumental)", "4-stem (Vocals + Drums + Bass + Other)"],
+                            value="2-stem (Vocals + Instrumental)",
+                            label="Separation Mode"
+                        )
+                        sep_button = gr.Button("🎯 Separate Audio", variant="primary")
+                    with gr.Column():
+                        sep_status = gr.Textbox(label="Status", lines=2, interactive=False)
+                        sep_analysis = gr.Textbox(label="Audio Analysis", lines=12, interactive=False)
+                with gr.Row():
+                    sep_vocals = gr.Audio(label="🎤 Vocals", show_download_button=True)
+                    sep_instrumental = gr.Audio(label="🎼 Instrumental/Drums", show_download_button=True)
+                with gr.Row():
+                    sep_bass = gr.Audio(label="🎸 Bass", show_download_button=True)
+                    sep_other = gr.Audio(label="🎹 Other", show_download_button=True)
+            # Vocal Effects Tab
+            with gr.Tab("🎛️ Vocal Effects"):
+                gr.Markdown("### Apply professional vocal effects")
+                with gr.Row():
+                    with gr.Column():
+                        fx_audio_input = gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"])
+                        fx_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Shift (semitones)")
+                        fx_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb Amount")
+                        fx_button = gr.Button("🎵 Apply Effects", variant="primary")
+                    with gr.Column():
+                        fx_status = gr.Textbox(label="Status", lines=2, interactive=False)
+                        fx_analysis = gr.Textbox(label="Audio Analysis", lines=10, interactive=False)
+                fx_output = gr.Audio(label="🎧 Processed Audio", show_download_button=True)
+            # Live Recording Tab
+            with gr.Tab("🎙️ Live Recording"):
+                gr.Markdown("### Record and process your voice in real-time")
+                with gr.Row():
+                    with gr.Column():
+                        live_audio = gr.Audio(type="filepath", sources=["microphone"], label="Record Your Voice")
+                        live_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Correction")
+                        live_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb")
+                        live_button = gr.Button("🎤 Process Recording", variant="primary")
+                    with gr.Column():
+                        live_status = gr.Textbox(label="Status", lines=2, interactive=False)
+                        live_analysis = gr.Textbox(label="Recording Analysis", lines=10, interactive=False)
+                live_output = gr.Audio(label="🎧 Processed Recording", show_download_button=True)
+            # Style Coaching Tab
+            with gr.Tab("🎭 Style Coaching"):
+                gr.Markdown("### Get personalized vocal coaching feedback")
+                with gr.Row():
+                    with gr.Column():
+                        coach_refs = gr.File(
+                            label="Reference Tracks (2-5 files)",
+                            file_count="multiple",
+                            file_types=["audio"]
+                        )
+                        coach_user = gr.Audio(
+                            type="filepath",
+                            label="Your Performance",
+                            sources=["upload", "microphone"]
+                        )
+                        coach_button = gr.Button("🎯 Get Coaching", variant="primary")
+                    with gr.Column():
+                        coach_status = gr.Textbox(label="Status", lines=3, interactive=False)
+                        coach_refs_status = gr.Textbox(label="Reference Processing", lines=8, interactive=False)
+                coach_feedback = gr.Textbox(label="🎯 Coaching Feedback", lines=15, interactive=False)
+            # Help Tab
+            with gr.Tab("ℹ️ Help"):
+                gr.Markdown("""
+                # 🎤 Audio Singing Helper - User Guide
+                ## Features
+                ### 🎵 Audio Separation
+                - Upload any song to separate vocals from instruments
+                - Choose 2-stem (vocals + instrumental) or 4-stem (vocals + drums + bass + other)
+                - Get detailed audio analysis of your tracks
+                ### 🎛️ Vocal Effects
+                - Apply pitch shifting (-12 to +12 semitones)
+                - Add reverb for spatial depth
+                - Process any audio file with professional effects
+                ### 🎙️ Live Recording
+                - Record directly from your microphone
+                - Apply real-time pitch correction and reverb
+                - Perfect for vocal practice and experimentation
+                ### 🎭 Style Coaching
+                - Upload 2-5 reference tracks from artists you want to emulate
+                - Record or upload your performance
+                - Get AI-powered feedback on pitch, timing, and vocal characteristics
+                - Receive a score and specific improvement suggestions
+                ## Tips for Best Results
+                - **Use high-quality audio files** - better input = better results
+                - **Keep files under 5 minutes** for faster processing
+                - **For style coaching**: Choose references from similar genres
+                - **Record in quiet environments** for best analysis
+                ## Supported Formats
+                - Input: MP3, WAV, FLAC, M4A, OGG
+                - Output: High-quality WAV files
+                ## Technical Requirements
+                - Some features require additional dependencies
+                - Processing time varies based on file length and complexity
+                ---
+                Built for singers and musicians worldwide 🌍
+                """)
+        # Connect all the event handlers
+        sep_button.click(
+            process_audio_separation,
+            inputs=[sep_audio_input, sep_mode],
+            outputs=[sep_status, sep_vocals, sep_instrumental, sep_bass, sep_other, sep_analysis]
+        )
+        fx_button.click(
+            process_vocal_effects,
+            inputs=[fx_audio_input, fx_pitch, fx_reverb],
+            outputs=[fx_status, fx_output, fx_analysis]
+        )
+        live_button.click(
+            process_vocal_effects,
+            inputs=[live_audio, live_pitch, live_reverb],
+            outputs=[live_status, live_output, live_analysis]
+        )
+        coach_button.click(
+            process_style_coaching,
+            inputs=[coach_refs, coach_user],
+            outputs=[coach_status, coach_refs_status, coach_feedback]
+        )
+    return app
+if __name__ == "__main__":
+    app = create_app()
+    app.launch()