Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,7 +23,6 @@ except ImportError:
|
|
| 23 |
# ==================== ANALYSIS FUNCTIONS ====================
|
| 24 |
|
| 25 |
def read_audio_info(path):
|
| 26 |
-
"""Read audio file metadata"""
|
| 27 |
info = sf.info(path)
|
| 28 |
return {
|
| 29 |
"samplerate": int(info.samplerate),
|
|
@@ -36,7 +35,6 @@ def read_audio_info(path):
|
|
| 36 |
|
| 37 |
|
| 38 |
def compute_time_domain_stats(y):
|
| 39 |
-
"""Calculate time-domain statistics"""
|
| 40 |
peak = float(np.max(np.abs(y)))
|
| 41 |
rms = float(np.sqrt(np.mean(y**2)))
|
| 42 |
|
|
@@ -62,36 +60,43 @@ def compute_time_domain_stats(y):
|
|
| 62 |
|
| 63 |
|
| 64 |
def compute_spectral_analysis(y, sr, n_fft=8192):
|
| 65 |
-
"""Comprehensive spectral analysis"""
|
| 66 |
hop_length = n_fft // 4
|
| 67 |
-
|
|
|
|
| 68 |
S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window='hann'))
|
| 69 |
freqs = np.linspace(0, sr/2, S.shape[0])
|
|
|
|
|
|
|
| 70 |
S_db = librosa.amplitude_to_db(S, ref=np.max)
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
| 73 |
total_energy = float(np.sum(energy))
|
|
|
|
| 74 |
cum_energy = np.cumsum(energy)
|
| 75 |
-
|
| 76 |
roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy)
|
| 77 |
roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy)
|
| 78 |
freq_at_85 = float(freqs[min(roll85_idx, len(freqs)-1)])
|
| 79 |
freq_at_95 = float(freqs[min(roll95_idx, len(freqs)-1)])
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
| 82 |
peak_db = float(np.max(S_db))
|
| 83 |
threshold_db = peak_db - 60.0
|
| 84 |
non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0]
|
| 85 |
highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0
|
| 86 |
-
|
|
|
|
| 87 |
def energy_above(f):
|
| 88 |
idx = np.searchsorted(freqs, f)
|
| 89 |
return float(100.0 * np.sum(energy[idx:]) / total_energy)
|
| 90 |
-
|
| 91 |
def energy_below(f):
|
| 92 |
idx = np.searchsorted(freqs, f)
|
| 93 |
return float(100.0 * np.sum(energy[:idx]) / total_energy)
|
| 94 |
-
|
| 95 |
energy_stats = {
|
| 96 |
"below_100hz": energy_below(100),
|
| 97 |
"below_200hz": energy_below(200),
|
|
@@ -102,12 +107,14 @@ def compute_spectral_analysis(y, sr, n_fft=8192):
|
|
| 102 |
"above_12khz": energy_above(12000),
|
| 103 |
"above_16khz": energy_above(16000),
|
| 104 |
}
|
| 105 |
-
|
|
|
|
| 106 |
diffs = np.diff(mean_db_per_bin)
|
| 107 |
big_drop_idx = np.where(diffs < -20.0)[0]
|
| 108 |
brick_wall = bool(big_drop_idx.size)
|
| 109 |
brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None
|
| 110 |
-
|
|
|
|
| 111 |
smooth = sps.medfilt(mean_db_per_bin, kernel_size=9)
|
| 112 |
minima = sps.argrelextrema(smooth, np.less)[0]
|
| 113 |
notches = []
|
|
@@ -121,12 +128,12 @@ def compute_spectral_analysis(y, sr, n_fft=8192):
|
|
| 121 |
depth = neighbors_peak - smooth[m]
|
| 122 |
if depth >= 15.0 and freqs[m] > 100:
|
| 123 |
notches.append({"freq": float(freqs[m]), "depth_db": float(depth)})
|
| 124 |
-
|
| 125 |
centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
|
| 126 |
bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
|
| 127 |
flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
|
| 128 |
rolloff = float(np.mean(librosa.feature.spectral_rolloff(S=S, sr=sr)))
|
| 129 |
-
|
| 130 |
return {
|
| 131 |
"S_db": S_db,
|
| 132 |
"freqs": freqs,
|
|
@@ -147,7 +154,6 @@ def compute_spectral_analysis(y, sr, n_fft=8192):
|
|
| 147 |
|
| 148 |
|
| 149 |
def compute_loudness(y, sr):
|
| 150 |
-
"""Compute integrated loudness (LUFS)"""
|
| 151 |
if not LOUDNESS_AVAILABLE:
|
| 152 |
return None
|
| 153 |
try:
|
|
@@ -159,7 +165,6 @@ def compute_loudness(y, sr):
|
|
| 159 |
|
| 160 |
|
| 161 |
def detect_audio_issues(spectral, time_stats):
|
| 162 |
-
"""Detect common audio processing artifacts"""
|
| 163 |
issues = []
|
| 164 |
energy = spectral["energy_distribution"]
|
| 165 |
|
|
@@ -169,35 +174,34 @@ def detect_audio_issues(spectral, time_stats):
|
|
| 169 |
elif energy["below_200hz"] < 5.0:
|
| 170 |
issues.append(("HIGH_PASS_FILTER", "MEDIUM",
|
| 171 |
f"Low energy below 200Hz ({energy['below_200hz']:.2f}%). Possible mild HPF."))
|
| 172 |
-
|
| 173 |
if energy["above_12khz"] < 0.2 and spectral["highest_freq_minus60db"] < 12000:
|
| 174 |
issues.append(("HF_LOSS", "HIGH",
|
| 175 |
f"Severe HF loss. Only {energy['above_12khz']:.3f}% above 12kHz."))
|
| 176 |
elif energy["above_12khz"] < 1.0:
|
| 177 |
issues.append(("HF_LOSS", "MEDIUM",
|
| 178 |
f"Reduced HF content ({energy['above_12khz']:.2f}% above 12kHz)."))
|
| 179 |
-
|
| 180 |
if spectral["brick_wall_detected"]:
|
| 181 |
issues.append(("BRICK_WALL", "HIGH",
|
| 182 |
f"Brick-wall filter at {spectral['brick_wall_freq']:.0f}Hz."))
|
| 183 |
-
|
| 184 |
if len(spectral["spectral_notches"]) > 0:
|
| 185 |
issues.append(("SPECTRAL_NOTCHES", "MEDIUM",
|
| 186 |
f"{len(spectral['spectral_notches'])} spectral notches detected."))
|
| 187 |
-
|
| 188 |
if time_stats["crest_factor_db"] < 3.0:
|
| 189 |
issues.append(("OVER_COMPRESSION", "HIGH",
|
| 190 |
f"Very low crest factor ({time_stats['crest_factor_db']:.1f}dB). Heavy compression."))
|
| 191 |
elif time_stats["crest_factor_db"] < 6.0:
|
| 192 |
issues.append(("COMPRESSION", "MEDIUM",
|
| 193 |
f"Low crest factor ({time_stats['crest_factor_db']:.1f}dB). Moderate compression."))
|
| 194 |
-
|
| 195 |
if time_stats["peak"] >= 0.999:
|
| 196 |
issues.append(("CLIPPING", "CRITICAL",
|
| 197 |
f"Peak at {time_stats['peak']:.6f}. Possible digital clipping!"))
|
| 198 |
-
|
| 199 |
-
return issues
|
| 200 |
|
|
|
|
| 201 |
|
| 202 |
def create_report(audio_data, output_path):
|
| 203 |
"""Create comprehensive PNG report"""
|
|
|
|
| 23 |
# ==================== ANALYSIS FUNCTIONS ====================
|
| 24 |
|
| 25 |
def read_audio_info(path):
|
|
|
|
| 26 |
info = sf.info(path)
|
| 27 |
return {
|
| 28 |
"samplerate": int(info.samplerate),
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
def compute_time_domain_stats(y):
|
|
|
|
| 38 |
peak = float(np.max(np.abs(y)))
|
| 39 |
rms = float(np.sqrt(np.mean(y**2)))
|
| 40 |
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
def compute_spectral_analysis(y, sr, n_fft=8192):
|
|
|
|
| 63 |
hop_length = n_fft // 4
|
| 64 |
+
|
| 65 |
+
# STFT
|
| 66 |
S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window='hann'))
|
| 67 |
freqs = np.linspace(0, sr/2, S.shape[0])
|
| 68 |
+
|
| 69 |
+
# dB matrix
|
| 70 |
S_db = librosa.amplitude_to_db(S, ref=np.max)
|
| 71 |
+
|
| 72 |
+
# ===== HYBRID FIX: Percentile-Based Energy =====
|
| 73 |
+
S_power = S**2
|
| 74 |
+
energy = np.percentile(S_power, 75, axis=1) + 1e-20
|
| 75 |
total_energy = float(np.sum(energy))
|
| 76 |
+
|
| 77 |
cum_energy = np.cumsum(energy)
|
|
|
|
| 78 |
roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy)
|
| 79 |
roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy)
|
| 80 |
freq_at_85 = float(freqs[min(roll85_idx, len(freqs)-1)])
|
| 81 |
freq_at_95 = float(freqs[min(roll95_idx, len(freqs)-1)])
|
| 82 |
+
|
| 83 |
+
# ===== HYBRID FIX: 90th percentile dB (instead of mean) =====
|
| 84 |
+
mean_db_per_bin = np.percentile(S_db, 90, axis=1)
|
| 85 |
+
|
| 86 |
peak_db = float(np.max(S_db))
|
| 87 |
threshold_db = peak_db - 60.0
|
| 88 |
non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0]
|
| 89 |
highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0
|
| 90 |
+
|
| 91 |
+
# Energy band functions
|
| 92 |
def energy_above(f):
|
| 93 |
idx = np.searchsorted(freqs, f)
|
| 94 |
return float(100.0 * np.sum(energy[idx:]) / total_energy)
|
| 95 |
+
|
| 96 |
def energy_below(f):
|
| 97 |
idx = np.searchsorted(freqs, f)
|
| 98 |
return float(100.0 * np.sum(energy[:idx]) / total_energy)
|
| 99 |
+
|
| 100 |
energy_stats = {
|
| 101 |
"below_100hz": energy_below(100),
|
| 102 |
"below_200hz": energy_below(200),
|
|
|
|
| 107 |
"above_12khz": energy_above(12000),
|
| 108 |
"above_16khz": energy_above(16000),
|
| 109 |
}
|
| 110 |
+
|
| 111 |
+
# Brick-wall detection using new percentile spectrum
|
| 112 |
diffs = np.diff(mean_db_per_bin)
|
| 113 |
big_drop_idx = np.where(diffs < -20.0)[0]
|
| 114 |
brick_wall = bool(big_drop_idx.size)
|
| 115 |
brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None
|
| 116 |
+
|
| 117 |
+
# Spectral notches (unchanged, but uses new mean_db_per_bin)
|
| 118 |
smooth = sps.medfilt(mean_db_per_bin, kernel_size=9)
|
| 119 |
minima = sps.argrelextrema(smooth, np.less)[0]
|
| 120 |
notches = []
|
|
|
|
| 128 |
depth = neighbors_peak - smooth[m]
|
| 129 |
if depth >= 15.0 and freqs[m] > 100:
|
| 130 |
notches.append({"freq": float(freqs[m]), "depth_db": float(depth)})
|
| 131 |
+
|
| 132 |
centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
|
| 133 |
bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
|
| 134 |
flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
|
| 135 |
rolloff = float(np.mean(librosa.feature.spectral_rolloff(S=S, sr=sr)))
|
| 136 |
+
|
| 137 |
return {
|
| 138 |
"S_db": S_db,
|
| 139 |
"freqs": freqs,
|
|
|
|
| 154 |
|
| 155 |
|
| 156 |
def compute_loudness(y, sr):
|
|
|
|
| 157 |
if not LOUDNESS_AVAILABLE:
|
| 158 |
return None
|
| 159 |
try:
|
|
|
|
| 165 |
|
| 166 |
|
| 167 |
def detect_audio_issues(spectral, time_stats):
|
|
|
|
| 168 |
issues = []
|
| 169 |
energy = spectral["energy_distribution"]
|
| 170 |
|
|
|
|
| 174 |
elif energy["below_200hz"] < 5.0:
|
| 175 |
issues.append(("HIGH_PASS_FILTER", "MEDIUM",
|
| 176 |
f"Low energy below 200Hz ({energy['below_200hz']:.2f}%). Possible mild HPF."))
|
| 177 |
+
|
| 178 |
if energy["above_12khz"] < 0.2 and spectral["highest_freq_minus60db"] < 12000:
|
| 179 |
issues.append(("HF_LOSS", "HIGH",
|
| 180 |
f"Severe HF loss. Only {energy['above_12khz']:.3f}% above 12kHz."))
|
| 181 |
elif energy["above_12khz"] < 1.0:
|
| 182 |
issues.append(("HF_LOSS", "MEDIUM",
|
| 183 |
f"Reduced HF content ({energy['above_12khz']:.2f}% above 12kHz)."))
|
| 184 |
+
|
| 185 |
if spectral["brick_wall_detected"]:
|
| 186 |
issues.append(("BRICK_WALL", "HIGH",
|
| 187 |
f"Brick-wall filter at {spectral['brick_wall_freq']:.0f}Hz."))
|
| 188 |
+
|
| 189 |
if len(spectral["spectral_notches"]) > 0:
|
| 190 |
issues.append(("SPECTRAL_NOTCHES", "MEDIUM",
|
| 191 |
f"{len(spectral['spectral_notches'])} spectral notches detected."))
|
| 192 |
+
|
| 193 |
if time_stats["crest_factor_db"] < 3.0:
|
| 194 |
issues.append(("OVER_COMPRESSION", "HIGH",
|
| 195 |
f"Very low crest factor ({time_stats['crest_factor_db']:.1f}dB). Heavy compression."))
|
| 196 |
elif time_stats["crest_factor_db"] < 6.0:
|
| 197 |
issues.append(("COMPRESSION", "MEDIUM",
|
| 198 |
f"Low crest factor ({time_stats['crest_factor_db']:.1f}dB). Moderate compression."))
|
| 199 |
+
|
| 200 |
if time_stats["peak"] >= 0.999:
|
| 201 |
issues.append(("CLIPPING", "CRITICAL",
|
| 202 |
f"Peak at {time_stats['peak']:.6f}. Possible digital clipping!"))
|
|
|
|
|
|
|
| 203 |
|
| 204 |
+
return issues
|
| 205 |
|
| 206 |
def create_report(audio_data, output_path):
|
| 207 |
"""Create comprehensive PNG report"""
|