Create APP.PY
Browse files
APP.PY
ADDED
|
@@ -0,0 +1,611 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import librosa
|
| 3 |
+
import numpy as np
|
| 4 |
+
import soundfile as sf
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
import shutil
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
import warnings
|
| 10 |
+
warnings.filterwarnings("ignore")
|
| 11 |
+
|
| 12 |
+
# Import for advanced features
|
| 13 |
+
try:
|
| 14 |
+
from spleeter.separator import Separator
|
| 15 |
+
SPLEETER_AVAILABLE = True
|
| 16 |
+
except ImportError:
|
| 17 |
+
SPLEETER_AVAILABLE = False
|
| 18 |
+
print("Spleeter not available - source separation disabled")
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
import scipy.signal
|
| 22 |
+
from scipy.spatial.distance import euclidean
|
| 23 |
+
from dtw import dtw
|
| 24 |
+
ADVANCED_FEATURES = True
|
| 25 |
+
except ImportError:
|
| 26 |
+
ADVANCED_FEATURES = False
|
| 27 |
+
print("Advanced features not available")
|
| 28 |
+
|
| 29 |
+
class AudioEngine:
|
| 30 |
+
"""Clean, professional audio processing engine"""
|
| 31 |
+
|
| 32 |
+
def __init__(self):
|
| 33 |
+
self.temp_dir = tempfile.mkdtemp()
|
| 34 |
+
self.separators = {} # Cache for Spleeter models
|
| 35 |
+
|
| 36 |
+
def analyze_audio(self, audio_path):
|
| 37 |
+
"""Extract comprehensive audio features"""
|
| 38 |
+
try:
|
| 39 |
+
# Load audio
|
| 40 |
+
y, sr = librosa.load(audio_path)
|
| 41 |
+
|
| 42 |
+
# Basic properties
|
| 43 |
+
duration = len(y) / sr
|
| 44 |
+
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
|
| 45 |
+
|
| 46 |
+
# Spectral features
|
| 47 |
+
spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
|
| 48 |
+
spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
|
| 49 |
+
zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
|
| 50 |
+
|
| 51 |
+
# Energy features
|
| 52 |
+
rms_energy = np.mean(librosa.feature.rms(y=y))
|
| 53 |
+
|
| 54 |
+
# Pitch estimation
|
| 55 |
+
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
|
| 56 |
+
pitch_values = []
|
| 57 |
+
for t in range(pitches.shape[1]):
|
| 58 |
+
index = magnitudes[:, t].argmax()
|
| 59 |
+
pitch = pitches[index, t]
|
| 60 |
+
if pitch > 0:
|
| 61 |
+
pitch_values.append(pitch)
|
| 62 |
+
|
| 63 |
+
avg_pitch = np.mean(pitch_values) if pitch_values else 0
|
| 64 |
+
|
| 65 |
+
return {
|
| 66 |
+
'success': True,
|
| 67 |
+
'duration': round(duration, 2),
|
| 68 |
+
'tempo': round(tempo, 1),
|
| 69 |
+
'sample_rate': sr,
|
| 70 |
+
'spectral_centroid': round(spectral_centroid, 2),
|
| 71 |
+
'spectral_rolloff': round(spectral_rolloff, 2),
|
| 72 |
+
'zero_crossing_rate': round(zero_crossing_rate, 4),
|
| 73 |
+
'rms_energy': round(rms_energy, 4),
|
| 74 |
+
'average_pitch': round(avg_pitch, 2),
|
| 75 |
+
'pitch_count': len(pitch_values),
|
| 76 |
+
'beats_detected': len(beats)
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
except Exception as e:
|
| 80 |
+
return {'success': False, 'error': str(e)}
|
| 81 |
+
|
| 82 |
+
def separate_vocals(self, audio_path, model_type="2stems"):
|
| 83 |
+
"""Separate vocals using Spleeter"""
|
| 84 |
+
if not SPLEETER_AVAILABLE:
|
| 85 |
+
return {'success': False, 'error': 'Spleeter not available'}
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
# Load or create separator
|
| 89 |
+
if model_type not in self.separators:
|
| 90 |
+
self.separators[model_type] = Separator(f'spleeter:{model_type}-16kHz')
|
| 91 |
+
|
| 92 |
+
separator = self.separators[model_type]
|
| 93 |
+
|
| 94 |
+
# Create output directory
|
| 95 |
+
output_dir = os.path.join(self.temp_dir, f"separation_{np.random.randint(10000)}")
|
| 96 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 97 |
+
|
| 98 |
+
# Separate
|
| 99 |
+
separator.separate_to_file(audio_path, output_dir)
|
| 100 |
+
|
| 101 |
+
# Get results
|
| 102 |
+
audio_name = Path(audio_path).stem
|
| 103 |
+
result_dir = os.path.join(output_dir, audio_name)
|
| 104 |
+
|
| 105 |
+
if model_type == "2stems":
|
| 106 |
+
vocals_path = os.path.join(result_dir, "vocals.wav")
|
| 107 |
+
accompaniment_path = os.path.join(result_dir, "accompaniment.wav")
|
| 108 |
+
|
| 109 |
+
return {
|
| 110 |
+
'success': True,
|
| 111 |
+
'vocals': vocals_path if os.path.exists(vocals_path) else None,
|
| 112 |
+
'accompaniment': accompaniment_path if os.path.exists(accompaniment_path) else None
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
elif model_type == "4stems":
|
| 116 |
+
vocals_path = os.path.join(result_dir, "vocals.wav")
|
| 117 |
+
drums_path = os.path.join(result_dir, "drums.wav")
|
| 118 |
+
bass_path = os.path.join(result_dir, "bass.wav")
|
| 119 |
+
other_path = os.path.join(result_dir, "other.wav")
|
| 120 |
+
|
| 121 |
+
return {
|
| 122 |
+
'success': True,
|
| 123 |
+
'vocals': vocals_path if os.path.exists(vocals_path) else None,
|
| 124 |
+
'drums': drums_path if os.path.exists(drums_path) else None,
|
| 125 |
+
'bass': bass_path if os.path.exists(bass_path) else None,
|
| 126 |
+
'other': other_path if os.path.exists(other_path) else None
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
except Exception as e:
|
| 130 |
+
return {'success': False, 'error': str(e)}
|
| 131 |
+
|
| 132 |
+
def apply_effects(self, audio_path, pitch_shift=0, reverb=0):
|
| 133 |
+
"""Apply vocal effects"""
|
| 134 |
+
try:
|
| 135 |
+
y, sr = librosa.load(audio_path)
|
| 136 |
+
|
| 137 |
+
# Apply pitch shift
|
| 138 |
+
if pitch_shift != 0:
|
| 139 |
+
y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
|
| 140 |
+
|
| 141 |
+
# Apply reverb (simple convolution)
|
| 142 |
+
if reverb > 0 and ADVANCED_FEATURES:
|
| 143 |
+
reverb_length = int(0.5 * sr)
|
| 144 |
+
impulse = np.random.randn(reverb_length) * np.exp(-np.arange(reverb_length) / (sr * 0.1))
|
| 145 |
+
y = scipy.signal.convolve(y, impulse * reverb, mode='same')
|
| 146 |
+
y = y / np.max(np.abs(y)) # Normalize
|
| 147 |
+
|
| 148 |
+
# Save processed audio
|
| 149 |
+
output_path = os.path.join(self.temp_dir, f"processed_{np.random.randint(10000)}.wav")
|
| 150 |
+
sf.write(output_path, y, sr)
|
| 151 |
+
|
| 152 |
+
return {'success': True, 'output': output_path}
|
| 153 |
+
|
| 154 |
+
except Exception as e:
|
| 155 |
+
return {'success': False, 'error': str(e)}
|
| 156 |
+
|
| 157 |
+
def extract_vocal_features(self, audio_path):
|
| 158 |
+
"""Extract features for style coaching"""
|
| 159 |
+
try:
|
| 160 |
+
y, sr = librosa.load(audio_path)
|
| 161 |
+
|
| 162 |
+
# Pitch analysis
|
| 163 |
+
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
|
| 164 |
+
pitch_values = []
|
| 165 |
+
for t in range(pitches.shape[1]):
|
| 166 |
+
index = magnitudes[:, t].argmax()
|
| 167 |
+
pitch = pitches[index, t]
|
| 168 |
+
if pitch > 0:
|
| 169 |
+
pitch_values.append(pitch)
|
| 170 |
+
|
| 171 |
+
if not pitch_values:
|
| 172 |
+
return {'success': False, 'error': 'No pitch detected'}
|
| 173 |
+
|
| 174 |
+
# Basic vocal metrics
|
| 175 |
+
mean_pitch = np.mean(pitch_values)
|
| 176 |
+
pitch_std = np.std(pitch_values)
|
| 177 |
+
pitch_range = max(pitch_values) - min(pitch_values)
|
| 178 |
+
|
| 179 |
+
# Tempo
|
| 180 |
+
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
|
| 181 |
+
|
| 182 |
+
# Spectral features
|
| 183 |
+
spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
|
| 184 |
+
|
| 185 |
+
# Energy
|
| 186 |
+
rms_energy = np.mean(librosa.feature.rms(y=y))
|
| 187 |
+
|
| 188 |
+
return {
|
| 189 |
+
'success': True,
|
| 190 |
+
'mean_pitch': mean_pitch,
|
| 191 |
+
'pitch_std': pitch_std,
|
| 192 |
+
'pitch_range': pitch_range,
|
| 193 |
+
'tempo': tempo,
|
| 194 |
+
'spectral_centroid': spectral_centroid,
|
| 195 |
+
'rms_energy': rms_energy
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
except Exception as e:
|
| 199 |
+
return {'success': False, 'error': str(e)}
|
| 200 |
+
|
| 201 |
+
def compare_vocal_styles(self, user_features, reference_features_list):
|
| 202 |
+
"""Compare user vocals to reference style"""
|
| 203 |
+
if not ADVANCED_FEATURES:
|
| 204 |
+
return {'success': False, 'error': 'Advanced features not available'}
|
| 205 |
+
|
| 206 |
+
try:
|
| 207 |
+
# Average reference features
|
| 208 |
+
ref_avg = {}
|
| 209 |
+
for key in ['mean_pitch', 'pitch_std', 'pitch_range', 'tempo', 'spectral_centroid', 'rms_energy']:
|
| 210 |
+
values = [ref[key] for ref in reference_features_list if key in ref]
|
| 211 |
+
ref_avg[key] = np.mean(values) if values else 0
|
| 212 |
+
|
| 213 |
+
# Calculate differences
|
| 214 |
+
pitch_diff = abs(user_features['mean_pitch'] - ref_avg['mean_pitch'])
|
| 215 |
+
tempo_diff = abs(user_features['tempo'] - ref_avg['tempo'])
|
| 216 |
+
timbre_diff = abs(user_features['spectral_centroid'] - ref_avg['spectral_centroid'])
|
| 217 |
+
energy_diff = abs(user_features['rms_energy'] - ref_avg['rms_energy'])
|
| 218 |
+
|
| 219 |
+
# Generate feedback
|
| 220 |
+
feedback = []
|
| 221 |
+
|
| 222 |
+
if pitch_diff > 50:
|
| 223 |
+
feedback.append(f"π΅ Pitch: Your average pitch differs by {pitch_diff:.1f} Hz. Practice matching the reference key.")
|
| 224 |
+
else:
|
| 225 |
+
feedback.append("π΅ Pitch: Good pitch accuracy!")
|
| 226 |
+
|
| 227 |
+
if tempo_diff > 10:
|
| 228 |
+
feedback.append(f"β±οΈ Tempo: Your tempo differs by {tempo_diff:.1f} BPM. Work on timing consistency.")
|
| 229 |
+
else:
|
| 230 |
+
feedback.append("β±οΈ Tempo: Good timing!")
|
| 231 |
+
|
| 232 |
+
if timbre_diff > 500:
|
| 233 |
+
feedback.append("π£οΈ Timbre: Try adjusting your vocal tone to match the reference style.")
|
| 234 |
+
else:
|
| 235 |
+
feedback.append("π£οΈ Timbre: Good vocal tone match!")
|
| 236 |
+
|
| 237 |
+
if energy_diff > 0.1:
|
| 238 |
+
feedback.append("π Energy: Adjust your vocal intensity to match the reference.")
|
| 239 |
+
else:
|
| 240 |
+
feedback.append("π Energy: Good energy level!")
|
| 241 |
+
|
| 242 |
+
overall_score = max(0, 100 - (pitch_diff/2 + tempo_diff + timbre_diff/10 + energy_diff*100))
|
| 243 |
+
|
| 244 |
+
return {
|
| 245 |
+
'success': True,
|
| 246 |
+
'score': round(overall_score, 1),
|
| 247 |
+
'feedback': feedback,
|
| 248 |
+
'metrics': {
|
| 249 |
+
'pitch_diff': round(pitch_diff, 1),
|
| 250 |
+
'tempo_diff': round(tempo_diff, 1),
|
| 251 |
+
'timbre_diff': round(timbre_diff, 1),
|
| 252 |
+
'energy_diff': round(energy_diff, 3)
|
| 253 |
+
}
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
except Exception as e:
|
| 257 |
+
return {'success': False, 'error': str(e)}
|
| 258 |
+
|
| 259 |
+
def cleanup(self):
|
| 260 |
+
"""Clean up temporary files"""
|
| 261 |
+
try:
|
| 262 |
+
if os.path.exists(self.temp_dir):
|
| 263 |
+
shutil.rmtree(self.temp_dir)
|
| 264 |
+
except Exception:
|
| 265 |
+
pass
|
| 266 |
+
|
| 267 |
+
# Global engine instance
|
| 268 |
+
engine = AudioEngine()
|
| 269 |
+
|
| 270 |
+
def format_analysis_results(analysis):
|
| 271 |
+
"""Format analysis results for display"""
|
| 272 |
+
if not analysis['success']:
|
| 273 |
+
return f"β Analysis failed: {analysis['error']}"
|
| 274 |
+
|
| 275 |
+
return f"""π Audio Analysis Results
|
| 276 |
+
|
| 277 |
+
π΅ Basic Properties:
|
| 278 |
+
β’ Duration: {analysis['duration']} seconds
|
| 279 |
+
β’ Sample Rate: {analysis['sample_rate']} Hz
|
| 280 |
+
β’ Tempo: {analysis['tempo']} BPM
|
| 281 |
+
|
| 282 |
+
π Audio Characteristics:
|
| 283 |
+
β’ Spectral Centroid: {analysis['spectral_centroid']} Hz
|
| 284 |
+
β’ Spectral Rolloff: {analysis['spectral_rolloff']} Hz
|
| 285 |
+
β’ Zero Crossing Rate: {analysis['zero_crossing_rate']}
|
| 286 |
+
β’ RMS Energy: {analysis['rms_energy']}
|
| 287 |
+
|
| 288 |
+
π€ Vocal Information:
|
| 289 |
+
β’ Average Pitch: {analysis['average_pitch']} Hz
|
| 290 |
+
β’ Pitch Points Detected: {analysis['pitch_count']}
|
| 291 |
+
β’ Beats Detected: {analysis['beats_detected']}"""
|
| 292 |
+
|
| 293 |
+
def process_audio_separation(audio_file, separation_mode):
|
| 294 |
+
"""Main audio separation function"""
|
| 295 |
+
if not audio_file:
|
| 296 |
+
return "β Please upload an audio file", None, None, None, None, ""
|
| 297 |
+
|
| 298 |
+
if not SPLEETER_AVAILABLE:
|
| 299 |
+
return "β Spleeter not available for source separation", None, None, None, None, ""
|
| 300 |
+
|
| 301 |
+
try:
|
| 302 |
+
# Analyze audio first
|
| 303 |
+
analysis = engine.analyze_audio(audio_file)
|
| 304 |
+
analysis_text = format_analysis_results(analysis)
|
| 305 |
+
|
| 306 |
+
# Separate audio
|
| 307 |
+
model_type = "2stems" if "2-stem" in separation_mode else "4stems"
|
| 308 |
+
separation_result = engine.separate_vocals(audio_file, model_type)
|
| 309 |
+
|
| 310 |
+
if not separation_result['success']:
|
| 311 |
+
return f"β Separation failed: {separation_result['error']}", None, None, None, None, analysis_text
|
| 312 |
+
|
| 313 |
+
if model_type == "2stems":
|
| 314 |
+
return (
|
| 315 |
+
"β
2-stem separation completed successfully!",
|
| 316 |
+
separation_result.get('vocals'),
|
| 317 |
+
separation_result.get('accompaniment'),
|
| 318 |
+
None,
|
| 319 |
+
None,
|
| 320 |
+
analysis_text
|
| 321 |
+
)
|
| 322 |
+
else:
|
| 323 |
+
return (
|
| 324 |
+
"β
4-stem separation completed successfully!",
|
| 325 |
+
separation_result.get('vocals'),
|
| 326 |
+
separation_result.get('drums'),
|
| 327 |
+
separation_result.get('bass'),
|
| 328 |
+
separation_result.get('other'),
|
| 329 |
+
analysis_text
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
except Exception as e:
|
| 333 |
+
return f"β Processing error: {str(e)}", None, None, None, None, ""
|
| 334 |
+
|
| 335 |
+
def process_vocal_effects(audio_file, pitch_shift, reverb_amount):
|
| 336 |
+
"""Apply vocal effects to audio"""
|
| 337 |
+
if not audio_file:
|
| 338 |
+
return "β Please upload an audio file", None, ""
|
| 339 |
+
|
| 340 |
+
try:
|
| 341 |
+
# Analyze original
|
| 342 |
+
analysis = engine.analyze_audio(audio_file)
|
| 343 |
+
analysis_text = format_analysis_results(analysis)
|
| 344 |
+
|
| 345 |
+
# Apply effects
|
| 346 |
+
effects_result = engine.apply_effects(audio_file, pitch_shift, reverb_amount)
|
| 347 |
+
|
| 348 |
+
if not effects_result['success']:
|
| 349 |
+
return f"β Effects failed: {effects_result['error']}", None, analysis_text
|
| 350 |
+
|
| 351 |
+
effects_applied = []
|
| 352 |
+
if pitch_shift != 0:
|
| 353 |
+
effects_applied.append(f"Pitch: {pitch_shift:+.1f} semitones")
|
| 354 |
+
if reverb_amount > 0:
|
| 355 |
+
effects_applied.append(f"Reverb: {reverb_amount:.2f}")
|
| 356 |
+
|
| 357 |
+
status = f"β
Effects applied: {', '.join(effects_applied)}" if effects_applied else "β
Audio processed (no effects)"
|
| 358 |
+
|
| 359 |
+
return status, effects_result['output'], analysis_text
|
| 360 |
+
|
| 361 |
+
except Exception as e:
|
| 362 |
+
return f"β Processing error: {str(e)}", None, ""
|
| 363 |
+
|
| 364 |
+
def process_style_coaching(reference_files, user_audio):
|
| 365 |
+
"""Style coaching analysis"""
|
| 366 |
+
if not reference_files or len(reference_files) < 2:
|
| 367 |
+
return "β Upload at least 2 reference tracks", "", ""
|
| 368 |
+
|
| 369 |
+
if not user_audio:
|
| 370 |
+
return "β Please record or upload your performance", "", ""
|
| 371 |
+
|
| 372 |
+
if not SPLEETER_AVAILABLE or not ADVANCED_FEATURES:
|
| 373 |
+
return "β Style coaching requires advanced features", "", ""
|
| 374 |
+
|
| 375 |
+
try:
|
| 376 |
+
# Process reference tracks
|
| 377 |
+
ref_features = []
|
| 378 |
+
ref_status = []
|
| 379 |
+
|
| 380 |
+
for i, ref_file in enumerate(reference_files[:5]):
|
| 381 |
+
# Separate vocals
|
| 382 |
+
separation_result = engine.separate_vocals(ref_file.name, "2stems")
|
| 383 |
+
if separation_result['success'] and separation_result.get('vocals'):
|
| 384 |
+
# Extract features
|
| 385 |
+
features = engine.extract_vocal_features(separation_result['vocals'])
|
| 386 |
+
if features['success']:
|
| 387 |
+
ref_features.append(features)
|
| 388 |
+
ref_status.append(f"β
Reference {i+1}: Processed")
|
| 389 |
+
else:
|
| 390 |
+
ref_status.append(f"β Reference {i+1}: Feature extraction failed")
|
| 391 |
+
else:
|
| 392 |
+
ref_status.append(f"β Reference {i+1}: Vocal separation failed")
|
| 393 |
+
|
| 394 |
+
if len(ref_features) < 2:
|
| 395 |
+
return "β Need at least 2 valid reference tracks", "\n".join(ref_status), ""
|
| 396 |
+
|
| 397 |
+
# Process user audio
|
| 398 |
+
user_separation = engine.separate_vocals(user_audio, "2stems")
|
| 399 |
+
if not user_separation['success'] or not user_separation.get('vocals'):
|
| 400 |
+
return "β Could not separate vocals from your performance", "\n".join(ref_status), ""
|
| 401 |
+
|
| 402 |
+
user_features = engine.extract_vocal_features(user_separation['vocals'])
|
| 403 |
+
if not user_features['success']:
|
| 404 |
+
return "β Could not analyze your vocal features", "\n".join(ref_status), ""
|
| 405 |
+
|
| 406 |
+
# Compare styles
|
| 407 |
+
comparison = engine.compare_vocal_styles(user_features, ref_features)
|
| 408 |
+
if not comparison['success']:
|
| 409 |
+
return f"β Style comparison failed: {comparison['error']}", "\n".join(ref_status), ""
|
| 410 |
+
|
| 411 |
+
# Format feedback
|
| 412 |
+
feedback_text = f"""π― Vocal Style Coaching Results
|
| 413 |
+
|
| 414 |
+
π Overall Score: {comparison['score']}/100
|
| 415 |
+
|
| 416 |
+
π΅ Detailed Feedback:
|
| 417 |
+
{chr(10).join(comparison['feedback'])}
|
| 418 |
+
|
| 419 |
+
π Technical Metrics:
|
| 420 |
+
β’ Pitch Difference: {comparison['metrics']['pitch_diff']} Hz
|
| 421 |
+
β’ Tempo Difference: {comparison['metrics']['tempo_diff']} BPM
|
| 422 |
+
β’ Timbre Difference: {comparison['metrics']['timbre_diff']} Hz
|
| 423 |
+
β’ Energy Difference: {comparison['metrics']['energy_diff']}
|
| 424 |
+
|
| 425 |
+
π― Recommendations:
|
| 426 |
+
{f"π₯ Excellent! You're very close to the target style." if comparison['score'] > 80 else
|
| 427 |
+
f"π Good progress! Focus on the areas mentioned above." if comparison['score'] > 60 else
|
| 428 |
+
f"πͺ Keep practicing! Work on basic vocal technique first."}
|
| 429 |
+
|
| 430 |
+
References analyzed: {len(ref_features)}/5"""
|
| 431 |
+
|
| 432 |
+
return f"β
Style coaching complete! Score: {comparison['score']}/100", "\n".join(ref_status), feedback_text
|
| 433 |
+
|
| 434 |
+
except Exception as e:
|
| 435 |
+
return f"β Coaching failed: {str(e)}", "", ""
|
| 436 |
+
|
| 437 |
+
# Create main interface
|
| 438 |
+
def create_app():
|
| 439 |
+
|
| 440 |
+
with gr.Blocks(title="Audio Singing Helper") as app:
|
| 441 |
+
|
| 442 |
+
gr.HTML("""
|
| 443 |
+
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
|
| 444 |
+
<h1>π€ Audio Singing Helper</h1>
|
| 445 |
+
<p>Professional audio processing for singers and musicians</p>
|
| 446 |
+
</div>
|
| 447 |
+
""")
|
| 448 |
+
|
| 449 |
+
with gr.Tabs():
|
| 450 |
+
|
| 451 |
+
# Audio Separation Tab
|
| 452 |
+
with gr.Tab("π΅ Audio Separation"):
|
| 453 |
+
gr.Markdown("### Separate vocals from instrumental tracks")
|
| 454 |
+
|
| 455 |
+
with gr.Row():
|
| 456 |
+
with gr.Column():
|
| 457 |
+
sep_audio_input = gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"])
|
| 458 |
+
sep_mode = gr.Dropdown(
|
| 459 |
+
choices=["2-stem (Vocals + Instrumental)", "4-stem (Vocals + Drums + Bass + Other)"],
|
| 460 |
+
value="2-stem (Vocals + Instrumental)",
|
| 461 |
+
label="Separation Mode"
|
| 462 |
+
)
|
| 463 |
+
sep_button = gr.Button("π― Separate Audio", variant="primary")
|
| 464 |
+
|
| 465 |
+
with gr.Column():
|
| 466 |
+
sep_status = gr.Textbox(label="Status", lines=2, interactive=False)
|
| 467 |
+
sep_analysis = gr.Textbox(label="Audio Analysis", lines=12, interactive=False)
|
| 468 |
+
|
| 469 |
+
with gr.Row():
|
| 470 |
+
sep_vocals = gr.Audio(label="π€ Vocals", show_download_button=True)
|
| 471 |
+
sep_instrumental = gr.Audio(label="πΌ Instrumental/Drums", show_download_button=True)
|
| 472 |
+
|
| 473 |
+
with gr.Row():
|
| 474 |
+
sep_bass = gr.Audio(label="πΈ Bass", show_download_button=True)
|
| 475 |
+
sep_other = gr.Audio(label="πΉ Other", show_download_button=True)
|
| 476 |
+
|
| 477 |
+
# Vocal Effects Tab
|
| 478 |
+
with gr.Tab("ποΈ Vocal Effects"):
|
| 479 |
+
gr.Markdown("### Apply professional vocal effects")
|
| 480 |
+
|
| 481 |
+
with gr.Row():
|
| 482 |
+
with gr.Column():
|
| 483 |
+
fx_audio_input = gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"])
|
| 484 |
+
fx_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Shift (semitones)")
|
| 485 |
+
fx_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb Amount")
|
| 486 |
+
fx_button = gr.Button("π΅ Apply Effects", variant="primary")
|
| 487 |
+
|
| 488 |
+
with gr.Column():
|
| 489 |
+
fx_status = gr.Textbox(label="Status", lines=2, interactive=False)
|
| 490 |
+
fx_analysis = gr.Textbox(label="Audio Analysis", lines=10, interactive=False)
|
| 491 |
+
|
| 492 |
+
fx_output = gr.Audio(label="π§ Processed Audio", show_download_button=True)
|
| 493 |
+
|
| 494 |
+
# Live Recording Tab
|
| 495 |
+
with gr.Tab("ποΈ Live Recording"):
|
| 496 |
+
gr.Markdown("### Record and process your voice in real-time")
|
| 497 |
+
|
| 498 |
+
with gr.Row():
|
| 499 |
+
with gr.Column():
|
| 500 |
+
live_audio = gr.Audio(type="filepath", sources=["microphone"], label="Record Your Voice")
|
| 501 |
+
live_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Correction")
|
| 502 |
+
live_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb")
|
| 503 |
+
live_button = gr.Button("π€ Process Recording", variant="primary")
|
| 504 |
+
|
| 505 |
+
with gr.Column():
|
| 506 |
+
live_status = gr.Textbox(label="Status", lines=2, interactive=False)
|
| 507 |
+
live_analysis = gr.Textbox(label="Recording Analysis", lines=10, interactive=False)
|
| 508 |
+
|
| 509 |
+
live_output = gr.Audio(label="π§ Processed Recording", show_download_button=True)
|
| 510 |
+
|
| 511 |
+
# Style Coaching Tab
|
| 512 |
+
with gr.Tab("π Style Coaching"):
|
| 513 |
+
gr.Markdown("### Get personalized vocal coaching feedback")
|
| 514 |
+
|
| 515 |
+
with gr.Row():
|
| 516 |
+
with gr.Column():
|
| 517 |
+
coach_refs = gr.File(
|
| 518 |
+
label="Reference Tracks (2-5 files)",
|
| 519 |
+
file_count="multiple",
|
| 520 |
+
file_types=["audio"]
|
| 521 |
+
)
|
| 522 |
+
coach_user = gr.Audio(
|
| 523 |
+
type="filepath",
|
| 524 |
+
label="Your Performance",
|
| 525 |
+
sources=["upload", "microphone"]
|
| 526 |
+
)
|
| 527 |
+
coach_button = gr.Button("π― Get Coaching", variant="primary")
|
| 528 |
+
|
| 529 |
+
with gr.Column():
|
| 530 |
+
coach_status = gr.Textbox(label="Status", lines=3, interactive=False)
|
| 531 |
+
coach_refs_status = gr.Textbox(label="Reference Processing", lines=8, interactive=False)
|
| 532 |
+
|
| 533 |
+
coach_feedback = gr.Textbox(label="π― Coaching Feedback", lines=15, interactive=False)
|
| 534 |
+
|
| 535 |
+
# Help Tab
|
| 536 |
+
with gr.Tab("βΉοΈ Help"):
|
| 537 |
+
gr.Markdown("""
|
| 538 |
+
# π€ Audio Singing Helper - User Guide
|
| 539 |
+
|
| 540 |
+
## Features
|
| 541 |
+
|
| 542 |
+
### π΅ Audio Separation
|
| 543 |
+
- Upload any song to separate vocals from instruments
|
| 544 |
+
- Choose 2-stem (vocals + instrumental) or 4-stem (vocals + drums + bass + other)
|
| 545 |
+
- Get detailed audio analysis of your tracks
|
| 546 |
+
|
| 547 |
+
### ποΈ Vocal Effects
|
| 548 |
+
- Apply pitch shifting (-12 to +12 semitones)
|
| 549 |
+
- Add reverb for spatial depth
|
| 550 |
+
- Process any audio file with professional effects
|
| 551 |
+
|
| 552 |
+
### ποΈ Live Recording
|
| 553 |
+
- Record directly from your microphone
|
| 554 |
+
- Apply real-time pitch correction and reverb
|
| 555 |
+
- Perfect for vocal practice and experimentation
|
| 556 |
+
|
| 557 |
+
### π Style Coaching
|
| 558 |
+
- Upload 2-5 reference tracks from artists you want to emulate
|
| 559 |
+
- Record or upload your performance
|
| 560 |
+
- Get AI-powered feedback on pitch, timing, and vocal characteristics
|
| 561 |
+
- Receive a score and specific improvement suggestions
|
| 562 |
+
|
| 563 |
+
## Tips for Best Results
|
| 564 |
+
|
| 565 |
+
- **Use high-quality audio files** - better input = better results
|
| 566 |
+
- **Keep files under 5 minutes** for faster processing
|
| 567 |
+
- **For style coaching**: Choose references from similar genres
|
| 568 |
+
- **Record in quiet environments** for best analysis
|
| 569 |
+
|
| 570 |
+
## Supported Formats
|
| 571 |
+
- Input: MP3, WAV, FLAC, M4A, OGG
|
| 572 |
+
- Output: High-quality WAV files
|
| 573 |
+
|
| 574 |
+
## Technical Requirements
|
| 575 |
+
- Some features require additional dependencies
|
| 576 |
+
- Processing time varies based on file length and complexity
|
| 577 |
+
|
| 578 |
+
---
|
| 579 |
+
Built for singers and musicians worldwide π
|
| 580 |
+
""")
|
| 581 |
+
|
| 582 |
+
# Connect all the event handlers
|
| 583 |
+
sep_button.click(
|
| 584 |
+
process_audio_separation,
|
| 585 |
+
inputs=[sep_audio_input, sep_mode],
|
| 586 |
+
outputs=[sep_status, sep_vocals, sep_instrumental, sep_bass, sep_other, sep_analysis]
|
| 587 |
+
)
|
| 588 |
+
|
| 589 |
+
fx_button.click(
|
| 590 |
+
process_vocal_effects,
|
| 591 |
+
inputs=[fx_audio_input, fx_pitch, fx_reverb],
|
| 592 |
+
outputs=[fx_status, fx_output, fx_analysis]
|
| 593 |
+
)
|
| 594 |
+
|
| 595 |
+
live_button.click(
|
| 596 |
+
process_vocal_effects,
|
| 597 |
+
inputs=[live_audio, live_pitch, live_reverb],
|
| 598 |
+
outputs=[live_status, live_output, live_analysis]
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
coach_button.click(
|
| 602 |
+
process_style_coaching,
|
| 603 |
+
inputs=[coach_refs, coach_user],
|
| 604 |
+
outputs=[coach_status, coach_refs_status, coach_feedback]
|
| 605 |
+
)
|
| 606 |
+
|
| 607 |
+
return app
|
| 608 |
+
|
| 609 |
+
if __name__ == "__main__":
|
| 610 |
+
app = create_app()
|
| 611 |
+
app.launch()
|