Update app.py
Browse files
app.py
CHANGED
|
@@ -71,7 +71,7 @@ class AudioAnalyzer:
|
|
| 71 |
return None, f"Download failed: {e.stderr}"
|
| 72 |
except Exception as e:
|
| 73 |
logger.error(f"Unexpected error during download: {str(e)}")
|
| 74 |
-
return None, f"
|
| 75 |
|
| 76 |
def extract_basic_features(self, audio_path: str, sr: int = 16000, max_duration: float = 60.0,
|
| 77 |
progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
|
|
@@ -93,14 +93,14 @@ class AudioAnalyzer:
|
|
| 93 |
'duration': duration,
|
| 94 |
'sample_rate': sr,
|
| 95 |
'samples': len(y),
|
| 96 |
-
'tempo': librosa.beat.beat_track(y=y, sr=sr)[0],
|
| 97 |
'mfcc': librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13),
|
| 98 |
'spectral_centroid': librosa.feature.spectral_centroid(y=y, sr=sr)[0],
|
| 99 |
'spectral_rolloff': librosa.feature.spectral_rolloff(y=y, sr=sr)[0],
|
| 100 |
'zero_crossing_rate': librosa.feature.zero_crossing_rate(y)[0]
|
| 101 |
}
|
| 102 |
|
| 103 |
-
progress(0.5, desc="Computing
|
| 104 |
hop_length = 512
|
| 105 |
S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
|
| 106 |
S_dB = librosa.power_to_db(S_mel, ref=np.max)
|
|
@@ -108,7 +108,7 @@ class AudioAnalyzer:
|
|
| 108 |
progress(0.8, desc="Creating visualizations...")
|
| 109 |
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
|
| 110 |
|
| 111 |
-
time_axis =
|
| 112 |
axes[0, 0].plot(time_axis, y)
|
| 113 |
axes[0, 0].set_title('Waveform')
|
| 114 |
axes[0, 0].set_xlabel('Time (s)')
|
|
@@ -130,15 +130,21 @@ class AudioAnalyzer:
|
|
| 130 |
|
| 131 |
plt.tight_layout()
|
| 132 |
plot_path = self.temp_dir / f"basic_features_{np.random.randint(10000)}.png"
|
| 133 |
-
plt.savefig(plot_path, dpi=
|
| 134 |
plt.close()
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
summary = f"""
|
| 137 |
**Audio Summary:**
|
| 138 |
- Duration: {duration:.2f} seconds
|
| 139 |
- Sample Rate: {sr} Hz
|
| 140 |
- Estimated Tempo: {features['tempo']:.1f} BPM
|
| 141 |
-
- Number of Samples: {
|
| 142 |
|
| 143 |
**Feature Shapes:**
|
| 144 |
- MFCC: {features['mfcc'].shape}
|
|
@@ -171,9 +177,9 @@ class AudioAnalyzer:
|
|
| 171 |
y_harm = librosa.effects.harmonic(y=y, margin=8)
|
| 172 |
chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
|
| 173 |
chroma_filter = np.minimum(chroma_harm,
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
|
| 178 |
chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
|
| 179 |
chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
|
|
@@ -195,7 +201,7 @@ class AudioAnalyzer:
|
|
| 195 |
|
| 196 |
plt.tight_layout()
|
| 197 |
plot_path = self.temp_dir / f"chroma_features_{np.random.randint(10000)}.png"
|
| 198 |
-
plt.savefig(plot_path, dpi=
|
| 199 |
plt.close()
|
| 200 |
|
| 201 |
summary = "Chroma feature analysis complete! Visualizations show different chroma extraction methods for harmonic analysis."
|
|
@@ -216,7 +222,7 @@ class AudioAnalyzer:
|
|
| 216 |
progress(0.1, desc="Loading audio...")
|
| 217 |
y, sr = librosa.load(audio_path, sr=sr)
|
| 218 |
|
| 219 |
-
progress(0.3, desc="Computing
|
| 220 |
hop_length = 512
|
| 221 |
S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
|
| 222 |
S_dB = librosa.power_to_db(S_mel, ref=np.max)
|
|
@@ -233,7 +239,7 @@ class AudioAnalyzer:
|
|
| 233 |
|
| 234 |
for i in range(num_patches_to_show):
|
| 235 |
librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
|
| 236 |
-
|
| 237 |
axes[i].set_title(f'Patch {i+1}')
|
| 238 |
|
| 239 |
for i in range(num_patches_to_show, len(axes)):
|
|
@@ -241,14 +247,14 @@ class AudioAnalyzer:
|
|
| 241 |
|
| 242 |
plt.tight_layout()
|
| 243 |
plot_path = self.temp_dir / f"patches_{np.random.randint(10000)}.png"
|
| 244 |
-
plt.savefig(plot_path, dpi=
|
| 245 |
plt.close()
|
| 246 |
|
| 247 |
summary = f"""
|
| 248 |
**Patch Generation Summary:**
|
| 249 |
- Total patches generated: {patches.shape[-1]}
|
| 250 |
-
- Patch duration: {patch_duration} seconds
|
| 251 |
-
- Hop duration: {hop_duration} seconds
|
| 252 |
- Patch shape (mels, time, patches): {patches.shape}
|
| 253 |
- Each patch covers {patch_frames} time frames
|
| 254 |
"""
|
|
@@ -275,7 +281,7 @@ def create_gradio_interface() -> gr.Blocks:
|
|
| 275 |
- 🎼 **Chroma Features**: Harmonic content analysis with multiple extraction methods
|
| 276 |
- 🧩 **Transformer Patches**: Fixed-duration patches for deep learning
|
| 277 |
|
| 278 |
-
**Requirements**:
|
| 279 |
""")
|
| 280 |
|
| 281 |
with gr.Row():
|
|
|
|
| 71 |
return None, f"Download failed: {e.stderr}"
|
| 72 |
except Exception as e:
|
| 73 |
logger.error(f"Unexpected error during download: {str(e)}")
|
| 74 |
+
return None, f"Error: {str(e)}"
|
| 75 |
|
| 76 |
def extract_basic_features(self, audio_path: str, sr: int = 16000, max_duration: float = 60.0,
|
| 77 |
progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
|
|
|
|
| 93 |
'duration': duration,
|
| 94 |
'sample_rate': sr,
|
| 95 |
'samples': len(y),
|
| 96 |
+
'tempo': float(librosa.beat.beat_track(y=y, sr=sr)[0]), # Convert to float
|
| 97 |
'mfcc': librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13),
|
| 98 |
'spectral_centroid': librosa.feature.spectral_centroid(y=y, sr=sr)[0],
|
| 99 |
'spectral_rolloff': librosa.feature.spectral_rolloff(y=y, sr=sr)[0],
|
| 100 |
'zero_crossing_rate': librosa.feature.zero_crossing_rate(y)[0]
|
| 101 |
}
|
| 102 |
|
| 103 |
+
progress(0.5, desc="Computing Mel spectrogram...")
|
| 104 |
hop_length = 512
|
| 105 |
S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
|
| 106 |
S_dB = librosa.power_to_db(S_mel, ref=np.max)
|
|
|
|
| 108 |
progress(0.8, desc="Creating visualizations...")
|
| 109 |
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
|
| 110 |
|
| 111 |
+
time_axis = np.linspace(0, duration, len(y))
|
| 112 |
axes[0, 0].plot(time_axis, y)
|
| 113 |
axes[0, 0].set_title('Waveform')
|
| 114 |
axes[0, 0].set_xlabel('Time (s)')
|
|
|
|
| 130 |
|
| 131 |
plt.tight_layout()
|
| 132 |
plot_path = self.temp_dir / f"basic_features_{np.random.randint(10000)}.png"
|
| 133 |
+
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
|
| 134 |
plt.close()
|
| 135 |
|
| 136 |
+
# Validate feature shapes
|
| 137 |
+
for key in ['mfcc', 'spectral_centroid', 'spectral_rolloff', 'zero_crossing_rate']:
|
| 138 |
+
if not isinstance(features[key].shape, tuple):
|
| 139 |
+
logger.error(f"Invalid shape for {key}: {features[key].shape}")
|
| 140 |
+
return None, None, f"Invalid feature shape for {key}"
|
| 141 |
+
|
| 142 |
summary = f"""
|
| 143 |
**Audio Summary:**
|
| 144 |
- Duration: {duration:.2f} seconds
|
| 145 |
- Sample Rate: {sr} Hz
|
| 146 |
- Estimated Tempo: {features['tempo']:.1f} BPM
|
| 147 |
+
- Number of Samples: {features['samples']:,}
|
| 148 |
|
| 149 |
**Feature Shapes:**
|
| 150 |
- MFCC: {features['mfcc'].shape}
|
|
|
|
| 177 |
y_harm = librosa.effects.harmonic(y=y, margin=8)
|
| 178 |
chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
|
| 179 |
chroma_filter = np.minimum(chroma_harm,
|
| 180 |
+
librosa.decompose.nn_filter(chroma_harm,
|
| 181 |
+
aggregate=np.median,
|
| 182 |
+
metric='cosine'))
|
| 183 |
chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
|
| 184 |
chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
|
| 185 |
chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
|
|
|
|
| 201 |
|
| 202 |
plt.tight_layout()
|
| 203 |
plot_path = self.temp_dir / f"chroma_features_{np.random.randint(10000)}.png"
|
| 204 |
+
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
|
| 205 |
plt.close()
|
| 206 |
|
| 207 |
summary = "Chroma feature analysis complete! Visualizations show different chroma extraction methods for harmonic analysis."
|
|
|
|
| 222 |
progress(0.1, desc="Loading audio...")
|
| 223 |
y, sr = librosa.load(audio_path, sr=sr)
|
| 224 |
|
| 225 |
+
progress(0.3, desc="Computing Mel spectrogram...")
|
| 226 |
hop_length = 512
|
| 227 |
S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
|
| 228 |
S_dB = librosa.power_to_db(S_mel, ref=np.max)
|
|
|
|
| 239 |
|
| 240 |
for i in range(num_patches_to_show):
|
| 241 |
librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
|
| 242 |
+
ax=axes[i], sr=sr, hop_length=hop_length)
|
| 243 |
axes[i].set_title(f'Patch {i+1}')
|
| 244 |
|
| 245 |
for i in range(num_patches_to_show, len(axes)):
|
|
|
|
| 247 |
|
| 248 |
plt.tight_layout()
|
| 249 |
plot_path = self.temp_dir / f"patches_{np.random.randint(10000)}.png"
|
| 250 |
+
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
|
| 251 |
plt.close()
|
| 252 |
|
| 253 |
summary = f"""
|
| 254 |
**Patch Generation Summary:**
|
| 255 |
- Total patches generated: {patches.shape[-1]}
|
| 256 |
+
- Patch duration: {patch_duration:.1f} seconds
|
| 257 |
+
- Hop duration: {hop_duration:.1f} seconds
|
| 258 |
- Patch shape (mels, time, patches): {patches.shape}
|
| 259 |
- Each patch covers {patch_frames} time frames
|
| 260 |
"""
|
|
|
|
| 281 |
- 🎼 **Chroma Features**: Harmonic content analysis with multiple extraction methods
|
| 282 |
- 🧩 **Transformer Patches**: Fixed-duration patches for deep learning
|
| 283 |
|
| 284 |
+
**Requirements**: Dependencies are automatically installed in Hugging Face Spaces via `requirements.txt`.
|
| 285 |
""")
|
| 286 |
|
| 287 |
with gr.Row():
|