Spaces:
Running
Running
Justin Davis Claude Opus 4.6 commited on
Commit ·
ad47dc1
1
Parent(s): 2203384
Add Audio Visualizer Gradio app
Browse files22 audio visualizations with gallery view and zip download.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- README.md +28 -6
- app.py +165 -0
- audio_visualizer.py +1142 -0
- requirements.txt +5 -0
README.md
CHANGED
|
@@ -1,14 +1,36 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: blue
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
-
short_description: Allow vision enabled models to "hear" music.
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Audio Visualizer
|
| 3 |
+
emoji: 🎵
|
| 4 |
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: "5.12.0"
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# Audio Visualizer — Let Claude Hear Your Music
|
| 14 |
+
|
| 15 |
+
Upload any audio file and get **22 detailed visualizations** that translate sound into sight. Originally built to help the deaf and hard of hearing experience music visually, these images also let Claude (or any vision-capable AI) "listen" to your music by analyzing the visual representations.
|
| 16 |
+
|
| 17 |
+
## What You Get
|
| 18 |
+
|
| 19 |
+
- **22 PNG visualizations** covering waveform, spectrogram, chromagram, beat tracking, harmonic/percussive separation, MFCCs, and more
|
| 20 |
+
- **A downloadable zip** containing all images plus a text guide explaining each visualization
|
| 21 |
+
- **Three quality levels** — Normal (150 DPI), High (200 DPI), Ultra (300 DPI)
|
| 22 |
+
|
| 23 |
+
## How to Use
|
| 24 |
+
|
| 25 |
+
1. Upload an audio file (MP3, WAV, FLAC, OGG, etc.)
|
| 26 |
+
2. Choose a quality level
|
| 27 |
+
3. Click **Generate Visualizations**
|
| 28 |
+
4. Browse the gallery and download the zip
|
| 29 |
+
|
| 30 |
+
## Tip: Share with Claude
|
| 31 |
+
|
| 32 |
+
Download the zip file and upload it to a Claude conversation. Claude can analyze the visualizations to describe the music's rhythm, melody, dynamics, and texture — even though it can't hear the audio directly.
|
| 33 |
+
|
| 34 |
+
## Links
|
| 35 |
+
|
| 36 |
+
- [GitHub Repository](https://github.com/justindavis/AVisualizer)
|
app.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Audio Visualizer — Gradio Web Interface for Hugging Face Spaces
|
| 4 |
+
Upload audio, get 22 visualizations + a zip download ready for Claude.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import matplotlib
|
| 8 |
+
matplotlib.use('Agg') # MUST be before any pyplot/librosa import
|
| 9 |
+
|
| 10 |
+
import gc
|
| 11 |
+
import shutil
|
| 12 |
+
import tempfile
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
import gradio as gr
|
| 16 |
+
import librosa
|
| 17 |
+
import matplotlib.pyplot as plt
|
| 18 |
+
import numpy as np
|
| 19 |
+
|
| 20 |
+
import audio_visualizer
|
| 21 |
+
|
| 22 |
+
# All 22 visualization functions in order (mirrors the GUI list)
|
| 23 |
+
VISUALIZATIONS = [
|
| 24 |
+
("Waveform", audio_visualizer.plot_waveform),
|
| 25 |
+
("Volume Envelope", audio_visualizer.plot_waveform_envelope),
|
| 26 |
+
("Spectrogram", audio_visualizer.plot_spectrogram),
|
| 27 |
+
("Mel Spectrogram", audio_visualizer.plot_mel_spectrogram),
|
| 28 |
+
("Chromagram", audio_visualizer.plot_chromagram),
|
| 29 |
+
("Tonnetz", audio_visualizer.plot_tonnetz),
|
| 30 |
+
("Spectral Centroid", audio_visualizer.plot_spectral_centroid),
|
| 31 |
+
("Spectral Bandwidth", audio_visualizer.plot_spectral_bandwidth),
|
| 32 |
+
("Spectral Rolloff", audio_visualizer.plot_spectral_rolloff),
|
| 33 |
+
("RMS Energy", audio_visualizer.plot_rms_energy),
|
| 34 |
+
("Zero Crossing Rate", audio_visualizer.plot_zero_crossing_rate),
|
| 35 |
+
("Onset Strength", audio_visualizer.plot_onset_strength),
|
| 36 |
+
("Beat Tracking", audio_visualizer.plot_beat_track),
|
| 37 |
+
("Tempogram", audio_visualizer.plot_tempogram),
|
| 38 |
+
("MFCCs", audio_visualizer.plot_mfcc),
|
| 39 |
+
("Spectral Contrast", audio_visualizer.plot_spectral_contrast),
|
| 40 |
+
("Harmonic/Percussive", audio_visualizer.plot_harmonic_percussive),
|
| 41 |
+
("Frequency Bands", audio_visualizer.plot_frequency_bands),
|
| 42 |
+
("Dynamic Range", audio_visualizer.plot_dynamic_range),
|
| 43 |
+
("Spectral Flatness", audio_visualizer.plot_spectral_flatness),
|
| 44 |
+
("Combined Dashboard", audio_visualizer.plot_combined_dashboard),
|
| 45 |
+
("3D Spectrogram", audio_visualizer.plot_3d_spectrogram),
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
DPI_OPTIONS = {
|
| 49 |
+
"Normal (150 DPI)": 150,
|
| 50 |
+
"High (200 DPI)": 200,
|
| 51 |
+
"Ultra (300 DPI)": 300,
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def generate_visualizations(audio_path, quality, progress=gr.Progress()):
|
| 56 |
+
"""Generate all 22 visualizations and return gallery images + zip file."""
|
| 57 |
+
if audio_path is None:
|
| 58 |
+
raise gr.Error("Please upload an audio file first.")
|
| 59 |
+
|
| 60 |
+
# Set DPI
|
| 61 |
+
audio_visualizer.FIGURE_DPI = DPI_OPTIONS.get(quality, 150)
|
| 62 |
+
|
| 63 |
+
# Load audio
|
| 64 |
+
progress(0, desc="Loading audio...")
|
| 65 |
+
y, sr = audio_visualizer.load_audio(audio_path)
|
| 66 |
+
duration = librosa.get_duration(y=y, sr=sr)
|
| 67 |
+
audio_file = Path(audio_path)
|
| 68 |
+
title = audio_file.stem
|
| 69 |
+
|
| 70 |
+
# Create temp output directory
|
| 71 |
+
output_tmp = tempfile.mkdtemp(prefix="avis_output_")
|
| 72 |
+
output_dir = Path(output_tmp)
|
| 73 |
+
|
| 74 |
+
# Generate each visualization
|
| 75 |
+
total = len(VISUALIZATIONS)
|
| 76 |
+
image_paths = []
|
| 77 |
+
|
| 78 |
+
for i, (name, func) in enumerate(VISUALIZATIONS):
|
| 79 |
+
progress((i) / total, desc=f"Generating: {name} ({i + 1}/{total})...")
|
| 80 |
+
|
| 81 |
+
if func == audio_visualizer.plot_combined_dashboard:
|
| 82 |
+
func(y, sr, output_dir, base_path=audio_file)
|
| 83 |
+
else:
|
| 84 |
+
func(y, sr, output_dir)
|
| 85 |
+
|
| 86 |
+
plt.close('all')
|
| 87 |
+
gc.collect()
|
| 88 |
+
|
| 89 |
+
# Create visualization guide
|
| 90 |
+
progress(0.95, desc="Creating visualization guide...")
|
| 91 |
+
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
|
| 92 |
+
audio_visualizer.create_visualization_guide(output_dir, duration, tempo, title)
|
| 93 |
+
|
| 94 |
+
# Collect all PNG paths (sorted by filename for correct order)
|
| 95 |
+
image_paths = sorted(output_dir.glob("*.png"))
|
| 96 |
+
|
| 97 |
+
# Create zip file
|
| 98 |
+
progress(0.98, desc="Creating zip archive...")
|
| 99 |
+
zip_tmp = tempfile.mkdtemp(prefix="avis_zip_")
|
| 100 |
+
zip_base = Path(zip_tmp) / f"{title}_visualizations"
|
| 101 |
+
zip_path = shutil.make_archive(str(zip_base), 'zip', output_dir)
|
| 102 |
+
|
| 103 |
+
progress(1.0, desc="Done!")
|
| 104 |
+
|
| 105 |
+
return image_paths, zip_path
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# --- Build the Gradio interface ---
|
| 109 |
+
|
| 110 |
+
with gr.Blocks(
|
| 111 |
+
title="Audio Visualizer",
|
| 112 |
+
theme=gr.themes.Soft(),
|
| 113 |
+
) as demo:
|
| 114 |
+
gr.Markdown(
|
| 115 |
+
"""
|
| 116 |
+
# Audio Visualizer — Let Claude Hear Your Music
|
| 117 |
+
Upload any audio file to generate **22 visualizations** that translate sound into sight.
|
| 118 |
+
Download the zip and share it with Claude to let AI "listen" to your music.
|
| 119 |
+
"""
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
with gr.Row():
|
| 123 |
+
with gr.Column(scale=1):
|
| 124 |
+
audio_input = gr.Audio(
|
| 125 |
+
type="filepath",
|
| 126 |
+
label="Upload Audio File",
|
| 127 |
+
)
|
| 128 |
+
quality_radio = gr.Radio(
|
| 129 |
+
choices=list(DPI_OPTIONS.keys()),
|
| 130 |
+
value="Normal (150 DPI)",
|
| 131 |
+
label="Quality",
|
| 132 |
+
)
|
| 133 |
+
generate_btn = gr.Button("Generate Visualizations", variant="primary")
|
| 134 |
+
with gr.Column(scale=1):
|
| 135 |
+
gr.Markdown(
|
| 136 |
+
"""
|
| 137 |
+
### How it works
|
| 138 |
+
1. **Upload** an MP3, WAV, FLAC, OGG, or other audio file
|
| 139 |
+
2. **Choose quality** — higher DPI = sharper images but slower
|
| 140 |
+
3. **Click Generate** and wait for all 22 visualizations
|
| 141 |
+
4. **Download the zip** and upload it to a Claude conversation
|
| 142 |
+
|
| 143 |
+
Claude can analyze these images to describe the music's rhythm,
|
| 144 |
+
melody, dynamics, and texture — even though it can't hear the
|
| 145 |
+
audio directly.
|
| 146 |
+
"""
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
gallery = gr.Gallery(
|
| 150 |
+
label="Visualizations",
|
| 151 |
+
columns=4,
|
| 152 |
+
object_fit="contain",
|
| 153 |
+
height="auto",
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
zip_download = gr.File(label="Download All (Zip)")
|
| 157 |
+
|
| 158 |
+
generate_btn.click(
|
| 159 |
+
fn=generate_visualizations,
|
| 160 |
+
inputs=[audio_input, quality_radio],
|
| 161 |
+
outputs=[gallery, zip_download],
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
if __name__ == "__main__":
|
| 165 |
+
demo.launch()
|
audio_visualizer.py
ADDED
|
@@ -0,0 +1,1142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Audio Visualizer for the Deaf/Hard of Hearing
|
| 4 |
+
Generates comprehensive visual representations of audio files.
|
| 5 |
+
|
| 6 |
+
This script creates multiple visualization types to help someone
|
| 7 |
+
who cannot hear experience music visually.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
import matplotlib.pyplot as plt
|
| 12 |
+
import matplotlib.colors as mcolors
|
| 13 |
+
from matplotlib.collections import LineCollection
|
| 14 |
+
from scipy import signal
|
| 15 |
+
from scipy.ndimage import gaussian_filter1d
|
| 16 |
+
import librosa
|
| 17 |
+
import librosa.display
|
| 18 |
+
import argparse
|
| 19 |
+
import sys
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
|
| 22 |
+
# Configuration
|
| 23 |
+
FIGURE_DPI = 150
|
| 24 |
+
COLORMAP_MAIN = 'magma'
|
| 25 |
+
COLORMAP_DIVERGING = 'coolwarm'
|
| 26 |
+
MAX_PLOT_POINTS = 10000 # No display can show more than this across a figure
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def downsample_for_plot(times, *arrays):
|
| 30 |
+
"""Downsample arrays to MAX_PLOT_POINTS using min/max envelope.
|
| 31 |
+
|
| 32 |
+
For each bucket, keeps the min and max indices of the *first* array
|
| 33 |
+
and samples all arrays at those same positions, so every output array
|
| 34 |
+
has the same length as the output times.
|
| 35 |
+
"""
|
| 36 |
+
n = len(times)
|
| 37 |
+
if n <= MAX_PLOT_POINTS:
|
| 38 |
+
return (times, *arrays)
|
| 39 |
+
|
| 40 |
+
# Number of buckets; keep 2 points per bucket (min + max) for envelope
|
| 41 |
+
n_buckets = MAX_PLOT_POINTS // 2
|
| 42 |
+
bucket_size = n // n_buckets
|
| 43 |
+
|
| 44 |
+
indices = []
|
| 45 |
+
for b in range(n_buckets):
|
| 46 |
+
start = b * bucket_size
|
| 47 |
+
end = start + bucket_size
|
| 48 |
+
chunk = arrays[0][start:end]
|
| 49 |
+
i_min = int(np.argmin(chunk)) + start
|
| 50 |
+
i_max = int(np.argmax(chunk)) + start
|
| 51 |
+
# Keep in temporal order
|
| 52 |
+
if i_min <= i_max:
|
| 53 |
+
indices.append(i_min)
|
| 54 |
+
indices.append(i_max)
|
| 55 |
+
else:
|
| 56 |
+
indices.append(i_max)
|
| 57 |
+
indices.append(i_min)
|
| 58 |
+
|
| 59 |
+
indices = np.array(indices)
|
| 60 |
+
out_times = times[indices]
|
| 61 |
+
out_arrays = [arr[indices] for arr in arrays]
|
| 62 |
+
return (out_times, *out_arrays)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def load_audio(filepath):
|
| 66 |
+
"""Load audio file and return time series and sample rate."""
|
| 67 |
+
print(f"Loading audio file: {filepath}")
|
| 68 |
+
y, sr = librosa.load(filepath, sr=None)
|
| 69 |
+
duration = librosa.get_duration(y=y, sr=sr)
|
| 70 |
+
print(f" Duration: {duration:.2f} seconds")
|
| 71 |
+
print(f" Sample rate: {sr} Hz")
|
| 72 |
+
print(f" Samples: {len(y):,}")
|
| 73 |
+
return y, sr
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def create_output_dir(base_path):
|
| 77 |
+
"""Create output directory for visualizations."""
|
| 78 |
+
output_dir = base_path.parent / f"{base_path.stem}_visualizations"
|
| 79 |
+
output_dir.mkdir(exist_ok=True)
|
| 80 |
+
print(f"Output directory: {output_dir}")
|
| 81 |
+
return output_dir
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def save_figure(fig, output_dir, name, tight=True):
|
| 85 |
+
"""Save figure to output directory."""
|
| 86 |
+
filepath = output_dir / f"{name}.png"
|
| 87 |
+
if tight:
|
| 88 |
+
fig.savefig(filepath, dpi=FIGURE_DPI, bbox_inches='tight',
|
| 89 |
+
facecolor='white', edgecolor='none')
|
| 90 |
+
else:
|
| 91 |
+
fig.savefig(filepath, dpi=FIGURE_DPI, facecolor='white', edgecolor='none')
|
| 92 |
+
plt.close(fig)
|
| 93 |
+
print(f" Saved: {name}.png")
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# =============================================================================
|
| 97 |
+
# VISUALIZATION FUNCTIONS
|
| 98 |
+
# =============================================================================
|
| 99 |
+
|
| 100 |
+
def plot_waveform(y, sr, output_dir):
|
| 101 |
+
"""
|
| 102 |
+
1. WAVEFORM - Basic amplitude over time
|
| 103 |
+
Shows the raw audio signal - peaks indicate loud moments,
|
| 104 |
+
flat areas indicate quiet moments.
|
| 105 |
+
"""
|
| 106 |
+
print("Generating: Waveform...")
|
| 107 |
+
fig, ax = plt.subplots(figsize=(16, 4))
|
| 108 |
+
|
| 109 |
+
times = np.linspace(0, len(y)/sr, len(y))
|
| 110 |
+
t_ds, y_ds = downsample_for_plot(times, y)
|
| 111 |
+
ax.plot(t_ds, y_ds, color='#2E86AB', linewidth=0.3, alpha=0.8)
|
| 112 |
+
ax.fill_between(t_ds, y_ds, alpha=0.3, color='#2E86AB')
|
| 113 |
+
|
| 114 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 115 |
+
ax.set_ylabel('Amplitude', fontsize=12)
|
| 116 |
+
ax.set_title('Waveform - Audio Amplitude Over Time\n(Peaks = Loud, Flat = Quiet)',
|
| 117 |
+
fontsize=14, fontweight='bold')
|
| 118 |
+
ax.set_xlim(0, len(y)/sr)
|
| 119 |
+
ax.grid(True, alpha=0.3)
|
| 120 |
+
|
| 121 |
+
save_figure(fig, output_dir, '01_waveform')
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def plot_waveform_envelope(y, sr, output_dir):
|
| 125 |
+
"""
|
| 126 |
+
2. WAVEFORM ENVELOPE - Smoothed amplitude showing dynamics
|
| 127 |
+
Shows overall loudness changes without the rapid oscillations.
|
| 128 |
+
"""
|
| 129 |
+
print("Generating: Waveform Envelope...")
|
| 130 |
+
fig, ax = plt.subplots(figsize=(16, 4))
|
| 131 |
+
|
| 132 |
+
# Compute envelope using Hilbert transform
|
| 133 |
+
analytic_signal = signal.hilbert(y)
|
| 134 |
+
envelope = np.abs(analytic_signal)
|
| 135 |
+
|
| 136 |
+
# Smooth the envelope
|
| 137 |
+
window_size = int(sr * 0.05) # 50ms window
|
| 138 |
+
envelope_smooth = gaussian_filter1d(envelope, sigma=window_size)
|
| 139 |
+
|
| 140 |
+
times = np.linspace(0, len(y)/sr, len(y))
|
| 141 |
+
t_ds, env_ds = downsample_for_plot(times, envelope_smooth)
|
| 142 |
+
ax.fill_between(t_ds, env_ds, alpha=0.7, color='#E94F37')
|
| 143 |
+
ax.plot(t_ds, env_ds, color='#E94F37', linewidth=0.5)
|
| 144 |
+
|
| 145 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 146 |
+
ax.set_ylabel('Loudness', fontsize=12)
|
| 147 |
+
ax.set_title('Volume Envelope - Overall Loudness Over Time\n(Higher = Louder sections)',
|
| 148 |
+
fontsize=14, fontweight='bold')
|
| 149 |
+
ax.set_xlim(0, len(y)/sr)
|
| 150 |
+
ax.set_ylim(0, None)
|
| 151 |
+
ax.grid(True, alpha=0.3)
|
| 152 |
+
|
| 153 |
+
save_figure(fig, output_dir, '02_volume_envelope')
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def plot_spectrogram(y, sr, output_dir):
|
| 157 |
+
"""
|
| 158 |
+
3. SPECTROGRAM - Frequency content over time
|
| 159 |
+
Shows what pitches/frequencies are playing at each moment.
|
| 160 |
+
Bottom = low/bass notes, Top = high/treble notes.
|
| 161 |
+
Brightness = loudness of that frequency.
|
| 162 |
+
"""
|
| 163 |
+
print("Generating: Spectrogram...")
|
| 164 |
+
fig, ax = plt.subplots(figsize=(16, 8))
|
| 165 |
+
|
| 166 |
+
# Compute spectrogram
|
| 167 |
+
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
|
| 168 |
+
|
| 169 |
+
img = librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log',
|
| 170 |
+
ax=ax, cmap=COLORMAP_MAIN)
|
| 171 |
+
|
| 172 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 173 |
+
ax.set_ylabel('Frequency (Hz) - Low to High pitch', fontsize=12)
|
| 174 |
+
ax.set_title('Spectrogram - All Frequencies Over Time\n(Bottom = Bass/Low, Top = Treble/High, Bright = Loud)',
|
| 175 |
+
fontsize=14, fontweight='bold')
|
| 176 |
+
|
| 177 |
+
cbar = fig.colorbar(img, ax=ax, format='%+2.0f dB')
|
| 178 |
+
cbar.set_label('Loudness (dB)', fontsize=11)
|
| 179 |
+
|
| 180 |
+
save_figure(fig, output_dir, '03_spectrogram')
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def plot_mel_spectrogram(y, sr, output_dir):
|
| 184 |
+
"""
|
| 185 |
+
4. MEL SPECTROGRAM - Human-perception-weighted frequency view
|
| 186 |
+
Similar to spectrogram but scaled to match how humans perceive pitch.
|
| 187 |
+
"""
|
| 188 |
+
print("Generating: Mel Spectrogram...")
|
| 189 |
+
fig, ax = plt.subplots(figsize=(16, 8))
|
| 190 |
+
|
| 191 |
+
# Compute mel spectrogram
|
| 192 |
+
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=sr/2)
|
| 193 |
+
S_db = librosa.power_to_db(S, ref=np.max)
|
| 194 |
+
|
| 195 |
+
img = librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='mel',
|
| 196 |
+
ax=ax, cmap=COLORMAP_MAIN, fmax=sr/2)
|
| 197 |
+
|
| 198 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 199 |
+
ax.set_ylabel('Frequency (Mel scale) - Perceived pitch', fontsize=12)
|
| 200 |
+
ax.set_title('Mel Spectrogram - Frequencies Scaled to Human Pitch Perception\n(How we naturally hear pitch differences)',
|
| 201 |
+
fontsize=14, fontweight='bold')
|
| 202 |
+
|
| 203 |
+
cbar = fig.colorbar(img, ax=ax, format='%+2.0f dB')
|
| 204 |
+
cbar.set_label('Loudness (dB)', fontsize=11)
|
| 205 |
+
|
| 206 |
+
save_figure(fig, output_dir, '04_mel_spectrogram')
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def plot_chromagram(y, sr, output_dir):
|
| 210 |
+
"""
|
| 211 |
+
5. CHROMAGRAM - Musical notes/chords over time
|
| 212 |
+
Shows the 12 musical notes (C, C#, D, etc.) and their intensity.
|
| 213 |
+
Great for seeing chord progressions and melody.
|
| 214 |
+
"""
|
| 215 |
+
print("Generating: Chromagram...")
|
| 216 |
+
fig, ax = plt.subplots(figsize=(16, 6))
|
| 217 |
+
|
| 218 |
+
# Compute chromagram
|
| 219 |
+
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
| 220 |
+
|
| 221 |
+
img = librosa.display.specshow(chroma, sr=sr, x_axis='time', y_axis='chroma',
|
| 222 |
+
ax=ax, cmap='YlOrRd')
|
| 223 |
+
|
| 224 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 225 |
+
ax.set_ylabel('Musical Note', fontsize=12)
|
| 226 |
+
ax.set_title('Chromagram - Musical Notes Over Time\n(Shows which of the 12 notes are playing - chord progressions)',
|
| 227 |
+
fontsize=14, fontweight='bold')
|
| 228 |
+
|
| 229 |
+
cbar = fig.colorbar(img, ax=ax)
|
| 230 |
+
cbar.set_label('Note Intensity', fontsize=11)
|
| 231 |
+
|
| 232 |
+
save_figure(fig, output_dir, '05_chromagram')
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def plot_tonnetz(y, sr, output_dir):
|
| 236 |
+
"""
|
| 237 |
+
6. TONNETZ - Harmonic relationships
|
| 238 |
+
Shows tonal/harmonic content using music theory relationships.
|
| 239 |
+
"""
|
| 240 |
+
print("Generating: Tonnetz (Harmonic Space)...")
|
| 241 |
+
fig, ax = plt.subplots(figsize=(16, 6))
|
| 242 |
+
|
| 243 |
+
# Compute tonnetz
|
| 244 |
+
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
| 245 |
+
tonnetz = librosa.feature.tonnetz(chroma=chroma)
|
| 246 |
+
|
| 247 |
+
img = librosa.display.specshow(tonnetz, sr=sr, x_axis='time',
|
| 248 |
+
ax=ax, cmap=COLORMAP_DIVERGING)
|
| 249 |
+
|
| 250 |
+
ax.set_ylabel('Tonal Dimension', fontsize=12)
|
| 251 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 252 |
+
ax.set_title('Tonnetz - Harmonic/Tonal Relationships\n(Shows musical harmony and chord relationships)',
|
| 253 |
+
fontsize=14, fontweight='bold')
|
| 254 |
+
ax.set_yticks(range(6))
|
| 255 |
+
ax.set_yticklabels(['Fifth (x)', 'Fifth (y)', 'Minor (x)',
|
| 256 |
+
'Minor (y)', 'Major (x)', 'Major (y)'])
|
| 257 |
+
|
| 258 |
+
cbar = fig.colorbar(img, ax=ax)
|
| 259 |
+
cbar.set_label('Intensity', fontsize=11)
|
| 260 |
+
|
| 261 |
+
save_figure(fig, output_dir, '06_tonnetz')
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def plot_spectral_centroid(y, sr, output_dir):
|
| 265 |
+
"""
|
| 266 |
+
7. SPECTRAL CENTROID - Brightness of sound over time
|
| 267 |
+
Higher values = brighter/sharper sound, Lower = darker/duller sound.
|
| 268 |
+
"""
|
| 269 |
+
print("Generating: Spectral Centroid (Brightness)...")
|
| 270 |
+
fig, ax = plt.subplots(figsize=(16, 5))
|
| 271 |
+
|
| 272 |
+
# Compute spectral centroid
|
| 273 |
+
cent = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
|
| 274 |
+
frames = range(len(cent))
|
| 275 |
+
times = librosa.frames_to_time(frames, sr=sr)
|
| 276 |
+
|
| 277 |
+
# Normalize for coloring
|
| 278 |
+
cent_norm = (cent - cent.min()) / (cent.max() - cent.min())
|
| 279 |
+
|
| 280 |
+
# Create colored line segments
|
| 281 |
+
points = np.array([times, cent]).T.reshape(-1, 1, 2)
|
| 282 |
+
segments = np.concatenate([points[:-1], points[1:]], axis=1)
|
| 283 |
+
|
| 284 |
+
norm = plt.Normalize(cent.min(), cent.max())
|
| 285 |
+
lc = LineCollection(segments, cmap='plasma', norm=norm)
|
| 286 |
+
lc.set_array(cent)
|
| 287 |
+
lc.set_linewidth(2)
|
| 288 |
+
|
| 289 |
+
line = ax.add_collection(lc)
|
| 290 |
+
ax.set_xlim(times.min(), times.max())
|
| 291 |
+
ax.set_ylim(cent.min() * 0.9, cent.max() * 1.1)
|
| 292 |
+
|
| 293 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 294 |
+
ax.set_ylabel('Spectral Centroid (Hz)', fontsize=12)
|
| 295 |
+
ax.set_title('Spectral Centroid - Sound Brightness Over Time\n(High = Bright/Sharp sound, Low = Dark/Dull sound)',
|
| 296 |
+
fontsize=14, fontweight='bold')
|
| 297 |
+
|
| 298 |
+
cbar = fig.colorbar(line, ax=ax)
|
| 299 |
+
cbar.set_label('Brightness (Hz)', fontsize=11)
|
| 300 |
+
ax.grid(True, alpha=0.3)
|
| 301 |
+
|
| 302 |
+
save_figure(fig, output_dir, '07_spectral_centroid')
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
def plot_spectral_bandwidth(y, sr, output_dir):
|
| 306 |
+
"""
|
| 307 |
+
8. SPECTRAL BANDWIDTH - How spread out the frequencies are
|
| 308 |
+
Wide = rich/complex sound, Narrow = pure/simple sound.
|
| 309 |
+
"""
|
| 310 |
+
print("Generating: Spectral Bandwidth...")
|
| 311 |
+
fig, ax = plt.subplots(figsize=(16, 5))
|
| 312 |
+
|
| 313 |
+
spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
|
| 314 |
+
frames = range(len(spec_bw))
|
| 315 |
+
times = librosa.frames_to_time(frames, sr=sr)
|
| 316 |
+
|
| 317 |
+
ax.fill_between(times, spec_bw, alpha=0.6, color='#7B2CBF')
|
| 318 |
+
ax.plot(times, spec_bw, color='#7B2CBF', linewidth=1)
|
| 319 |
+
|
| 320 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 321 |
+
ax.set_ylabel('Bandwidth (Hz)', fontsize=12)
|
| 322 |
+
ax.set_title('Spectral Bandwidth - Sound Richness/Complexity\n(Wide = Rich/Complex, Narrow = Pure/Simple)',
|
| 323 |
+
fontsize=14, fontweight='bold')
|
| 324 |
+
ax.set_xlim(0, times.max())
|
| 325 |
+
ax.grid(True, alpha=0.3)
|
| 326 |
+
|
| 327 |
+
save_figure(fig, output_dir, '08_spectral_bandwidth')
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
def plot_spectral_rolloff(y, sr, output_dir):
|
| 331 |
+
"""
|
| 332 |
+
9. SPECTRAL ROLLOFF - Where most of the energy is concentrated
|
| 333 |
+
Shows the frequency below which 85% of the sound energy exists.
|
| 334 |
+
"""
|
| 335 |
+
print("Generating: Spectral Rolloff...")
|
| 336 |
+
fig, ax = plt.subplots(figsize=(16, 5))
|
| 337 |
+
|
| 338 |
+
rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.85)[0]
|
| 339 |
+
frames = range(len(rolloff))
|
| 340 |
+
times = librosa.frames_to_time(frames, sr=sr)
|
| 341 |
+
|
| 342 |
+
ax.fill_between(times, rolloff, alpha=0.6, color='#00A896')
|
| 343 |
+
ax.plot(times, rolloff, color='#00A896', linewidth=1)
|
| 344 |
+
|
| 345 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 346 |
+
ax.set_ylabel('Rolloff Frequency (Hz)', fontsize=12)
|
| 347 |
+
ax.set_title('Spectral Rolloff - Where 85% of Sound Energy Lives\n(Higher = More high-frequency content)',
|
| 348 |
+
fontsize=14, fontweight='bold')
|
| 349 |
+
ax.set_xlim(0, times.max())
|
| 350 |
+
ax.grid(True, alpha=0.3)
|
| 351 |
+
|
| 352 |
+
save_figure(fig, output_dir, '09_spectral_rolloff')
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
def plot_rms_energy(y, sr, output_dir):
|
| 356 |
+
"""
|
| 357 |
+
10. RMS ENERGY - Overall loudness/power over time
|
| 358 |
+
Shows the intensity and dynamics of the music.
|
| 359 |
+
"""
|
| 360 |
+
print("Generating: RMS Energy (Loudness)...")
|
| 361 |
+
fig, ax = plt.subplots(figsize=(16, 4))
|
| 362 |
+
|
| 363 |
+
rms = librosa.feature.rms(y=y)[0]
|
| 364 |
+
frames = range(len(rms))
|
| 365 |
+
times = librosa.frames_to_time(frames, sr=sr)
|
| 366 |
+
|
| 367 |
+
ax.fill_between(times, rms, alpha=0.7, color='#F77F00')
|
| 368 |
+
ax.plot(times, rms, color='#D62828', linewidth=1)
|
| 369 |
+
|
| 370 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 371 |
+
ax.set_ylabel('Energy (RMS)', fontsize=12)
|
| 372 |
+
ax.set_title('RMS Energy - Overall Loudness/Power Over Time\n(Peaks = Intense moments, Valleys = Quieter sections)',
|
| 373 |
+
fontsize=14, fontweight='bold')
|
| 374 |
+
ax.set_xlim(0, times.max())
|
| 375 |
+
ax.set_ylim(0, None)
|
| 376 |
+
ax.grid(True, alpha=0.3)
|
| 377 |
+
|
| 378 |
+
save_figure(fig, output_dir, '10_rms_energy')
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
def plot_zero_crossing_rate(y, sr, output_dir):
|
| 382 |
+
"""
|
| 383 |
+
11. ZERO CROSSING RATE - Texture/noisiness indicator
|
| 384 |
+
High values indicate noisy/percussive sounds, low = tonal/smooth sounds.
|
| 385 |
+
"""
|
| 386 |
+
print("Generating: Zero Crossing Rate (Texture)...")
|
| 387 |
+
fig, ax = plt.subplots(figsize=(16, 4))
|
| 388 |
+
|
| 389 |
+
zcr = librosa.feature.zero_crossing_rate(y)[0]
|
| 390 |
+
frames = range(len(zcr))
|
| 391 |
+
times = librosa.frames_to_time(frames, sr=sr)
|
| 392 |
+
|
| 393 |
+
ax.fill_between(times, zcr, alpha=0.6, color='#84A98C')
|
| 394 |
+
ax.plot(times, zcr, color='#2D6A4F', linewidth=0.8)
|
| 395 |
+
|
| 396 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 397 |
+
ax.set_ylabel('Zero Crossing Rate', fontsize=12)
|
| 398 |
+
ax.set_title('Zero Crossing Rate - Sound Texture\n(High = Noisy/Percussive, Low = Smooth/Tonal)',
|
| 399 |
+
fontsize=14, fontweight='bold')
|
| 400 |
+
ax.set_xlim(0, times.max())
|
| 401 |
+
ax.grid(True, alpha=0.3)
|
| 402 |
+
|
| 403 |
+
save_figure(fig, output_dir, '11_zero_crossing_rate')
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
def plot_onset_strength(y, sr, output_dir):
|
| 407 |
+
"""
|
| 408 |
+
12. ONSET STRENGTH - Where new sounds/notes begin
|
| 409 |
+
Peaks indicate the start of new notes, beats, or events.
|
| 410 |
+
"""
|
| 411 |
+
print("Generating: Onset Strength (Note Attacks)...")
|
| 412 |
+
fig, ax = plt.subplots(figsize=(16, 4))
|
| 413 |
+
|
| 414 |
+
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
| 415 |
+
frames = range(len(onset_env))
|
| 416 |
+
times = librosa.frames_to_time(frames, sr=sr)
|
| 417 |
+
|
| 418 |
+
ax.fill_between(times, onset_env, alpha=0.6, color='#FF006E')
|
| 419 |
+
ax.plot(times, onset_env, color='#FF006E', linewidth=0.8)
|
| 420 |
+
|
| 421 |
+
# Mark detected onsets
|
| 422 |
+
onsets = librosa.onset.onset_detect(y=y, sr=sr, units='time')
|
| 423 |
+
ax.vlines(onsets, 0, onset_env.max(), color='#3A0CA3', alpha=0.5,
|
| 424 |
+
linewidth=0.5, label='Detected note starts')
|
| 425 |
+
|
| 426 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 427 |
+
ax.set_ylabel('Onset Strength', fontsize=12)
|
| 428 |
+
ax.set_title('Onset Strength - New Notes/Sounds Starting\n(Peaks and lines = New notes or beats beginning)',
|
| 429 |
+
fontsize=14, fontweight='bold')
|
| 430 |
+
ax.set_xlim(0, times.max())
|
| 431 |
+
ax.legend(loc='upper right')
|
| 432 |
+
ax.grid(True, alpha=0.3)
|
| 433 |
+
|
| 434 |
+
save_figure(fig, output_dir, '12_onset_strength')
|
| 435 |
+
|
| 436 |
+
|
| 437 |
+
def plot_beat_track(y, sr, output_dir):
|
| 438 |
+
"""
|
| 439 |
+
13. BEAT TRACKING - The rhythm/pulse of the music
|
| 440 |
+
Shows where the beats are and the tempo structure.
|
| 441 |
+
"""
|
| 442 |
+
print("Generating: Beat Track (Rhythm)...")
|
| 443 |
+
fig, axes = plt.subplots(2, 1, figsize=(16, 8), sharex=True)
|
| 444 |
+
|
| 445 |
+
# Compute tempo and beats
|
| 446 |
+
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
|
| 447 |
+
beat_times = librosa.frames_to_time(beats, sr=sr)
|
| 448 |
+
|
| 449 |
+
# Handle tempo - it might be an array
|
| 450 |
+
if isinstance(tempo, np.ndarray):
|
| 451 |
+
tempo_val = float(tempo[0]) if len(tempo) > 0 else 0.0
|
| 452 |
+
else:
|
| 453 |
+
tempo_val = float(tempo)
|
| 454 |
+
|
| 455 |
+
# Top plot: waveform with beat markers
|
| 456 |
+
times = np.linspace(0, len(y)/sr, len(y))
|
| 457 |
+
axes[0].plot(times, y, color='#2E86AB', linewidth=0.3, alpha=0.6)
|
| 458 |
+
axes[0].vlines(beat_times, -1, 1, color='#D62828', alpha=0.8,
|
| 459 |
+
linewidth=1, label='Beats')
|
| 460 |
+
axes[0].set_ylabel('Amplitude', fontsize=11)
|
| 461 |
+
axes[0].set_title(f'Beat Tracking - Detected Tempo: {tempo_val:.1f} BPM\n(Red lines = Beat positions)',
|
| 462 |
+
fontsize=14, fontweight='bold')
|
| 463 |
+
axes[0].legend(loc='upper right')
|
| 464 |
+
axes[0].set_xlim(0, len(y)/sr)
|
| 465 |
+
|
| 466 |
+
# Bottom plot: onset strength with beats
|
| 467 |
+
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
| 468 |
+
frames = range(len(onset_env))
|
| 469 |
+
otimes = librosa.frames_to_time(frames, sr=sr)
|
| 470 |
+
|
| 471 |
+
axes[1].fill_between(otimes, onset_env, alpha=0.5, color='#F77F00')
|
| 472 |
+
axes[1].vlines(beat_times, 0, onset_env.max(), color='#D62828',
|
| 473 |
+
alpha=0.8, linewidth=1)
|
| 474 |
+
axes[1].set_xlabel('Time (seconds)', fontsize=12)
|
| 475 |
+
axes[1].set_ylabel('Onset Strength', fontsize=11)
|
| 476 |
+
axes[1].set_xlim(0, len(y)/sr)
|
| 477 |
+
|
| 478 |
+
plt.tight_layout()
|
| 479 |
+
save_figure(fig, output_dir, '13_beat_tracking', tight=False)
|
| 480 |
+
|
| 481 |
+
|
| 482 |
+
def plot_tempogram(y, sr, output_dir):
|
| 483 |
+
"""
|
| 484 |
+
14. TEMPOGRAM - Tempo/rhythm patterns over time
|
| 485 |
+
Shows how the rhythm structure changes throughout the piece.
|
| 486 |
+
"""
|
| 487 |
+
print("Generating: Tempogram (Rhythm Patterns)...")
|
| 488 |
+
fig, ax = plt.subplots(figsize=(16, 6))
|
| 489 |
+
|
| 490 |
+
# Compute tempogram
|
| 491 |
+
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
| 492 |
+
tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr)
|
| 493 |
+
|
| 494 |
+
img = librosa.display.specshow(tempogram, sr=sr, x_axis='time',
|
| 495 |
+
y_axis='tempo', ax=ax, cmap='magma')
|
| 496 |
+
|
| 497 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 498 |
+
ax.set_ylabel('Tempo (BPM)', fontsize=12)
|
| 499 |
+
ax.set_title('Tempogram - Rhythm/Tempo Patterns Over Time\n(Bright horizontal bands = Strong rhythmic patterns)',
|
| 500 |
+
fontsize=14, fontweight='bold')
|
| 501 |
+
|
| 502 |
+
cbar = fig.colorbar(img, ax=ax)
|
| 503 |
+
cbar.set_label('Strength', fontsize=11)
|
| 504 |
+
|
| 505 |
+
save_figure(fig, output_dir, '14_tempogram')
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
def plot_mfcc(y, sr, output_dir):
|
| 509 |
+
"""
|
| 510 |
+
15. MFCCs - Timbral texture (sound color/character)
|
| 511 |
+
Shows the "color" or character of the sound - what makes
|
| 512 |
+
a piano sound different from a guitar.
|
| 513 |
+
"""
|
| 514 |
+
print("Generating: MFCCs (Sound Character/Timbre)...")
|
| 515 |
+
fig, ax = plt.subplots(figsize=(16, 6))
|
| 516 |
+
|
| 517 |
+
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
|
| 518 |
+
|
| 519 |
+
img = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=ax,
|
| 520 |
+
cmap=COLORMAP_DIVERGING)
|
| 521 |
+
|
| 522 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 523 |
+
ax.set_ylabel('MFCC Coefficient', fontsize=12)
|
| 524 |
+
ax.set_title('MFCCs - Sound Character/Timbre ("Color" of the sound)\n(Different patterns = Different instrument sounds)',
|
| 525 |
+
fontsize=14, fontweight='bold')
|
| 526 |
+
|
| 527 |
+
cbar = fig.colorbar(img, ax=ax)
|
| 528 |
+
cbar.set_label('Coefficient Value', fontsize=11)
|
| 529 |
+
|
| 530 |
+
save_figure(fig, output_dir, '15_mfcc')
|
| 531 |
+
|
| 532 |
+
|
| 533 |
+
def plot_spectral_contrast(y, sr, output_dir):
|
| 534 |
+
"""
|
| 535 |
+
16. SPECTRAL CONTRAST - Difference between peaks and valleys
|
| 536 |
+
Shows the difference between loud and quiet frequency bands.
|
| 537 |
+
"""
|
| 538 |
+
print("Generating: Spectral Contrast...")
|
| 539 |
+
fig, ax = plt.subplots(figsize=(16, 6))
|
| 540 |
+
|
| 541 |
+
contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
|
| 542 |
+
|
| 543 |
+
img = librosa.display.specshow(contrast, sr=sr, x_axis='time', ax=ax,
|
| 544 |
+
cmap='PRGn')
|
| 545 |
+
|
| 546 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 547 |
+
ax.set_ylabel('Frequency Band', fontsize=12)
|
| 548 |
+
ax.set_title('Spectral Contrast - Dynamic Range per Frequency Band\n(High contrast = Clear/distinct sounds, Low = Muddy/blended)',
|
| 549 |
+
fontsize=14, fontweight='bold')
|
| 550 |
+
|
| 551 |
+
cbar = fig.colorbar(img, ax=ax)
|
| 552 |
+
cbar.set_label('Contrast (dB)', fontsize=11)
|
| 553 |
+
|
| 554 |
+
save_figure(fig, output_dir, '16_spectral_contrast')
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
def plot_harmonic_percussive(y, sr, output_dir):
|
| 558 |
+
"""
|
| 559 |
+
17. HARMONIC vs PERCUSSIVE separation
|
| 560 |
+
Separates sustained sounds (instruments, vocals) from
|
| 561 |
+
sharp attack sounds (drums, percussion).
|
| 562 |
+
"""
|
| 563 |
+
print("Generating: Harmonic vs Percussive Separation...")
|
| 564 |
+
|
| 565 |
+
# Separate harmonic and percussive components
|
| 566 |
+
y_harmonic, y_percussive = librosa.effects.hpss(y)
|
| 567 |
+
|
| 568 |
+
fig, axes = plt.subplots(3, 1, figsize=(16, 10), sharex=True)
|
| 569 |
+
|
| 570 |
+
times = np.linspace(0, len(y)/sr, len(y))
|
| 571 |
+
t_ds, y_ds, yh_ds, yp_ds = downsample_for_plot(
|
| 572 |
+
times, y, y_harmonic, y_percussive
|
| 573 |
+
)
|
| 574 |
+
|
| 575 |
+
# Original
|
| 576 |
+
axes[0].plot(t_ds, y_ds, color='#2E86AB', linewidth=0.3)
|
| 577 |
+
axes[0].set_ylabel('Original', fontsize=11)
|
| 578 |
+
axes[0].set_title('Harmonic vs Percussive Separation\n(Splitting sustained notes from drum hits/attacks)',
|
| 579 |
+
fontsize=14, fontweight='bold')
|
| 580 |
+
|
| 581 |
+
# Harmonic (sustained notes, melody, chords)
|
| 582 |
+
axes[1].plot(t_ds, yh_ds, color='#06D6A0', linewidth=0.3)
|
| 583 |
+
axes[1].set_ylabel('Harmonic\n(Melody/Chords)', fontsize=11)
|
| 584 |
+
|
| 585 |
+
# Percussive (drums, attacks)
|
| 586 |
+
axes[2].plot(t_ds, yp_ds, color='#EF476F', linewidth=0.3)
|
| 587 |
+
axes[2].set_ylabel('Percussive\n(Drums/Attacks)', fontsize=11)
|
| 588 |
+
axes[2].set_xlabel('Time (seconds)', fontsize=12)
|
| 589 |
+
|
| 590 |
+
for ax in axes:
|
| 591 |
+
ax.set_xlim(0, len(y)/sr)
|
| 592 |
+
ax.grid(True, alpha=0.3)
|
| 593 |
+
|
| 594 |
+
plt.tight_layout()
|
| 595 |
+
save_figure(fig, output_dir, '17_harmonic_percussive', tight=False)
|
| 596 |
+
|
| 597 |
+
|
| 598 |
+
def plot_frequency_bands(y, sr, output_dir):
|
| 599 |
+
"""
|
| 600 |
+
18. FREQUENCY BANDS - Energy in bass, mid, and treble
|
| 601 |
+
Shows the balance of low, mid, and high frequencies over time.
|
| 602 |
+
"""
|
| 603 |
+
print("Generating: Frequency Bands (Bass/Mid/Treble)...")
|
| 604 |
+
|
| 605 |
+
# Compute spectrogram
|
| 606 |
+
S = np.abs(librosa.stft(y))
|
| 607 |
+
freqs = librosa.fft_frequencies(sr=sr)
|
| 608 |
+
|
| 609 |
+
# Define frequency bands
|
| 610 |
+
bands = {
|
| 611 |
+
'Sub-bass (20-60 Hz)': (20, 60),
|
| 612 |
+
'Bass (60-250 Hz)': (60, 250),
|
| 613 |
+
'Low-mid (250-500 Hz)': (250, 500),
|
| 614 |
+
'Mid (500-2000 Hz)': (500, 2000),
|
| 615 |
+
'High-mid (2000-4000 Hz)': (2000, 4000),
|
| 616 |
+
'Treble (4000-20000 Hz)': (4000, 20000)
|
| 617 |
+
}
|
| 618 |
+
|
| 619 |
+
colors = ['#540B0E', '#9E2A2B', '#E09F3E', '#FFF3B0', '#335C67', '#2E86AB']
|
| 620 |
+
|
| 621 |
+
fig, ax = plt.subplots(figsize=(16, 6))
|
| 622 |
+
|
| 623 |
+
times = librosa.frames_to_time(range(S.shape[1]), sr=sr)
|
| 624 |
+
|
| 625 |
+
band_energies = []
|
| 626 |
+
for (name, (low, high)), color in zip(bands.items(), colors):
|
| 627 |
+
mask = (freqs >= low) & (freqs < high)
|
| 628 |
+
if mask.sum() > 0:
|
| 629 |
+
energy = S[mask].mean(axis=0)
|
| 630 |
+
energy_smooth = gaussian_filter1d(energy, sigma=5)
|
| 631 |
+
band_energies.append((name, energy_smooth, color))
|
| 632 |
+
|
| 633 |
+
# Stack plot
|
| 634 |
+
energies = np.array([e[1] for e in band_energies])
|
| 635 |
+
energies_norm = energies / energies.sum(axis=0, keepdims=True)
|
| 636 |
+
|
| 637 |
+
ax.stackplot(times, energies_norm, labels=[e[0] for e in band_energies],
|
| 638 |
+
colors=colors, alpha=0.8)
|
| 639 |
+
|
| 640 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 641 |
+
ax.set_ylabel('Relative Energy', fontsize=12)
|
| 642 |
+
ax.set_title('Frequency Band Distribution Over Time\n(Shows balance of bass, mids, and treble)',
|
| 643 |
+
fontsize=14, fontweight='bold')
|
| 644 |
+
ax.set_xlim(0, times.max())
|
| 645 |
+
ax.set_ylim(0, 1)
|
| 646 |
+
ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1), fontsize=9)
|
| 647 |
+
|
| 648 |
+
plt.tight_layout()
|
| 649 |
+
save_figure(fig, output_dir, '18_frequency_bands')
|
| 650 |
+
|
| 651 |
+
|
| 652 |
+
def plot_dynamic_range(y, sr, output_dir):
|
| 653 |
+
"""
|
| 654 |
+
19. DYNAMIC RANGE - Loud vs quiet sections highlighted
|
| 655 |
+
Shows the contrast between loud and quiet parts.
|
| 656 |
+
"""
|
| 657 |
+
print("Generating: Dynamic Range...")
|
| 658 |
+
fig, ax = plt.subplots(figsize=(16, 5))
|
| 659 |
+
|
| 660 |
+
# Compute RMS in frames
|
| 661 |
+
rms = librosa.feature.rms(y=y)[0]
|
| 662 |
+
frames = range(len(rms))
|
| 663 |
+
times = librosa.frames_to_time(frames, sr=sr)
|
| 664 |
+
|
| 665 |
+
# Normalize RMS to 0-1 range
|
| 666 |
+
rms_norm = (rms - rms.min()) / (rms.max() - rms.min())
|
| 667 |
+
|
| 668 |
+
# Create gradient fill using a colored mesh (much faster than per-frame fill_between)
|
| 669 |
+
from matplotlib.collections import PolyCollection
|
| 670 |
+
verts = []
|
| 671 |
+
colors_list = []
|
| 672 |
+
for i in range(len(times) - 1):
|
| 673 |
+
verts.append([(times[i], 0), (times[i], rms[i]),
|
| 674 |
+
(times[i+1], rms[i+1]), (times[i+1], 0)])
|
| 675 |
+
colors_list.append(plt.cm.RdYlGn_r(rms_norm[i], alpha=0.8))
|
| 676 |
+
|
| 677 |
+
poly = PolyCollection(verts, facecolors=colors_list, edgecolors='none')
|
| 678 |
+
ax.add_collection(poly)
|
| 679 |
+
ax.autoscale_view()
|
| 680 |
+
|
| 681 |
+
ax.plot(times, rms, color='black', linewidth=0.8, alpha=0.5)
|
| 682 |
+
|
| 683 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 684 |
+
ax.set_ylabel('Loudness', fontsize=12)
|
| 685 |
+
ax.set_title('Dynamic Range - Volume Variation\n(Red = Loud peaks, Green = Quiet sections)',
|
| 686 |
+
fontsize=14, fontweight='bold')
|
| 687 |
+
ax.set_xlim(0, times.max())
|
| 688 |
+
ax.set_ylim(0, None)
|
| 689 |
+
|
| 690 |
+
# Add colorbar
|
| 691 |
+
sm = plt.cm.ScalarMappable(cmap='RdYlGn_r', norm=plt.Normalize(0, 1))
|
| 692 |
+
cbar = fig.colorbar(sm, ax=ax)
|
| 693 |
+
cbar.set_label('Relative Loudness', fontsize=11)
|
| 694 |
+
|
| 695 |
+
save_figure(fig, output_dir, '19_dynamic_range')
|
| 696 |
+
|
| 697 |
+
|
| 698 |
+
def plot_spectral_flatness(y, sr, output_dir):
|
| 699 |
+
"""
|
| 700 |
+
20. SPECTRAL FLATNESS - Noise vs tonal content
|
| 701 |
+
High = noise-like (drums, percussion), Low = tonal (melody, chords).
|
| 702 |
+
"""
|
| 703 |
+
print("Generating: Spectral Flatness (Noise vs Tone)...")
|
| 704 |
+
fig, ax = plt.subplots(figsize=(16, 4))
|
| 705 |
+
|
| 706 |
+
flatness = librosa.feature.spectral_flatness(y=y)[0]
|
| 707 |
+
frames = range(len(flatness))
|
| 708 |
+
times = librosa.frames_to_time(frames, sr=sr)
|
| 709 |
+
|
| 710 |
+
ax.fill_between(times, flatness, alpha=0.6, color='#9B5DE5')
|
| 711 |
+
ax.plot(times, flatness, color='#9B5DE5', linewidth=0.8)
|
| 712 |
+
|
| 713 |
+
ax.set_xlabel('Time (seconds)', fontsize=12)
|
| 714 |
+
ax.set_ylabel('Spectral Flatness', fontsize=12)
|
| 715 |
+
ax.set_title('Spectral Flatness - Noise vs Tonal Content\n(High = Noisy/percussive, Low = Tonal/melodic)',
|
| 716 |
+
fontsize=14, fontweight='bold')
|
| 717 |
+
ax.set_xlim(0, times.max())
|
| 718 |
+
ax.set_ylim(0, 1)
|
| 719 |
+
ax.grid(True, alpha=0.3)
|
| 720 |
+
|
| 721 |
+
save_figure(fig, output_dir, '20_spectral_flatness')
|
| 722 |
+
|
| 723 |
+
|
| 724 |
+
def plot_combined_dashboard(y, sr, output_dir, base_path):
|
| 725 |
+
"""
|
| 726 |
+
21. COMBINED DASHBOARD - All key visualizations in one view
|
| 727 |
+
A comprehensive overview combining multiple visualizations.
|
| 728 |
+
"""
|
| 729 |
+
print("Generating: Combined Dashboard...")
|
| 730 |
+
fig = plt.figure(figsize=(20, 16))
|
| 731 |
+
|
| 732 |
+
# Create grid
|
| 733 |
+
gs = fig.add_gridspec(4, 2, hspace=0.3, wspace=0.2)
|
| 734 |
+
|
| 735 |
+
# 1. Mel Spectrogram (top left)
|
| 736 |
+
ax1 = fig.add_subplot(gs[0, 0])
|
| 737 |
+
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=64)
|
| 738 |
+
S_db = librosa.power_to_db(S, ref=np.max)
|
| 739 |
+
librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='mel',
|
| 740 |
+
ax=ax1, cmap=COLORMAP_MAIN)
|
| 741 |
+
ax1.set_title('Mel Spectrogram (Pitch Content)', fontweight='bold')
|
| 742 |
+
|
| 743 |
+
# 2. Chromagram (top right)
|
| 744 |
+
ax2 = fig.add_subplot(gs[0, 1])
|
| 745 |
+
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
| 746 |
+
librosa.display.specshow(chroma, sr=sr, x_axis='time', y_axis='chroma',
|
| 747 |
+
ax=ax2, cmap='YlOrRd')
|
| 748 |
+
ax2.set_title('Chromagram (Musical Notes)', fontweight='bold')
|
| 749 |
+
|
| 750 |
+
# 3. RMS Energy (second row left)
|
| 751 |
+
ax3 = fig.add_subplot(gs[1, 0])
|
| 752 |
+
rms = librosa.feature.rms(y=y)[0]
|
| 753 |
+
frames = range(len(rms))
|
| 754 |
+
times = librosa.frames_to_time(frames, sr=sr)
|
| 755 |
+
ax3.fill_between(times, rms, alpha=0.7, color='#F77F00')
|
| 756 |
+
ax3.set_xlim(0, times.max())
|
| 757 |
+
ax3.set_title('Volume/Energy Over Time', fontweight='bold')
|
| 758 |
+
ax3.set_xlabel('Time (s)')
|
| 759 |
+
|
| 760 |
+
# 4. Spectral Centroid (second row right)
|
| 761 |
+
ax4 = fig.add_subplot(gs[1, 1])
|
| 762 |
+
cent = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
|
| 763 |
+
times_c = librosa.frames_to_time(range(len(cent)), sr=sr)
|
| 764 |
+
ax4.fill_between(times_c, cent, alpha=0.6, color='#9B5DE5')
|
| 765 |
+
ax4.set_xlim(0, times_c.max())
|
| 766 |
+
ax4.set_title('Brightness Over Time', fontweight='bold')
|
| 767 |
+
ax4.set_xlabel('Time (s)')
|
| 768 |
+
|
| 769 |
+
# 5. Onset Strength with Beats (third row, full width)
|
| 770 |
+
ax5 = fig.add_subplot(gs[2, :])
|
| 771 |
+
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
| 772 |
+
times_o = librosa.frames_to_time(range(len(onset_env)), sr=sr)
|
| 773 |
+
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
|
| 774 |
+
beat_times = librosa.frames_to_time(beats, sr=sr)
|
| 775 |
+
ax5.fill_between(times_o, onset_env, alpha=0.5, color='#FF006E')
|
| 776 |
+
ax5.vlines(beat_times, 0, onset_env.max(), color='#3A0CA3', alpha=0.6, linewidth=1)
|
| 777 |
+
ax5.set_xlim(0, times_o.max())
|
| 778 |
+
ax5.set_title('Rhythm & Beats (vertical lines = beat positions)', fontweight='bold')
|
| 779 |
+
ax5.set_xlabel('Time (s)')
|
| 780 |
+
|
| 781 |
+
# 6. Frequency Bands (bottom, full width)
|
| 782 |
+
ax6 = fig.add_subplot(gs[3, :])
|
| 783 |
+
S_full = np.abs(librosa.stft(y))
|
| 784 |
+
freqs = librosa.fft_frequencies(sr=sr)
|
| 785 |
+
bands = [(60, 250), (250, 2000), (2000, 20000)]
|
| 786 |
+
band_names = ['Bass', 'Mid', 'Treble']
|
| 787 |
+
colors = ['#E63946', '#F4A261', '#2A9D8F']
|
| 788 |
+
|
| 789 |
+
times_f = librosa.frames_to_time(range(S_full.shape[1]), sr=sr)
|
| 790 |
+
for (low, high), name, color in zip(bands, band_names, colors):
|
| 791 |
+
mask = (freqs >= low) & (freqs < high)
|
| 792 |
+
if mask.sum() > 0:
|
| 793 |
+
energy = gaussian_filter1d(S_full[mask].mean(axis=0), sigma=5)
|
| 794 |
+
ax6.plot(times_f, energy / energy.max(), label=name, color=color, linewidth=1.5)
|
| 795 |
+
|
| 796 |
+
ax6.set_xlim(0, times_f.max())
|
| 797 |
+
ax6.set_title('Frequency Band Balance (Bass/Mid/Treble)', fontweight='bold')
|
| 798 |
+
ax6.set_xlabel('Time (s)')
|
| 799 |
+
ax6.legend(loc='upper right')
|
| 800 |
+
|
| 801 |
+
plt.suptitle(f'Audio Visualization Dashboard\n"{base_path.stem}"',
|
| 802 |
+
fontsize=16, fontweight='bold', y=0.98)
|
| 803 |
+
|
| 804 |
+
save_figure(fig, output_dir, '21_combined_dashboard', tight=False)
|
| 805 |
+
|
| 806 |
+
|
| 807 |
+
def plot_3d_spectrogram(y, sr, output_dir):
|
| 808 |
+
"""
|
| 809 |
+
22. 3D SPECTROGRAM - Frequency, time, and amplitude in 3D
|
| 810 |
+
A three-dimensional view of the audio.
|
| 811 |
+
"""
|
| 812 |
+
print("Generating: 3D Spectrogram...")
|
| 813 |
+
from mpl_toolkits.mplot3d import Axes3D
|
| 814 |
+
|
| 815 |
+
fig = plt.figure(figsize=(14, 10))
|
| 816 |
+
ax = fig.add_subplot(111, projection='3d')
|
| 817 |
+
|
| 818 |
+
# Compute spectrogram with reduced resolution for 3D
|
| 819 |
+
hop_length = 2048
|
| 820 |
+
S = np.abs(librosa.stft(y, hop_length=hop_length))
|
| 821 |
+
S_db = librosa.amplitude_to_db(S, ref=np.max)
|
| 822 |
+
|
| 823 |
+
# Downsample for visualization
|
| 824 |
+
step_t = max(1, S_db.shape[1] // 200)
|
| 825 |
+
step_f = max(1, S_db.shape[0] // 100)
|
| 826 |
+
S_down = S_db[::step_f, ::step_t]
|
| 827 |
+
|
| 828 |
+
# Create meshgrid
|
| 829 |
+
times = librosa.frames_to_time(range(S_db.shape[1]), sr=sr, hop_length=hop_length)
|
| 830 |
+
freqs = librosa.fft_frequencies(sr=sr)
|
| 831 |
+
|
| 832 |
+
times_down = times[::step_t]
|
| 833 |
+
freqs_down = freqs[::step_f]
|
| 834 |
+
|
| 835 |
+
T, F = np.meshgrid(times_down, freqs_down)
|
| 836 |
+
|
| 837 |
+
# Plot surface
|
| 838 |
+
surf = ax.plot_surface(T, F, S_down, cmap='magma',
|
| 839 |
+
linewidth=0, antialiased=True, alpha=0.9)
|
| 840 |
+
|
| 841 |
+
ax.set_xlabel('Time (s)', fontsize=11)
|
| 842 |
+
ax.set_ylabel('Frequency (Hz)', fontsize=11)
|
| 843 |
+
ax.set_zlabel('Amplitude (dB)', fontsize=11)
|
| 844 |
+
ax.set_title('3D Spectrogram\n(Time × Frequency × Loudness)',
|
| 845 |
+
fontsize=14, fontweight='bold')
|
| 846 |
+
|
| 847 |
+
ax.view_init(elev=30, azim=45)
|
| 848 |
+
fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10, label='Amplitude (dB)')
|
| 849 |
+
|
| 850 |
+
save_figure(fig, output_dir, '22_3d_spectrogram')
|
| 851 |
+
|
| 852 |
+
|
| 853 |
+
def create_visualization_guide(output_dir, duration, tempo, title):
|
| 854 |
+
"""Create a text guide explaining all visualizations."""
|
| 855 |
+
guide_path = output_dir / "VISUALIZATION_GUIDE.txt"
|
| 856 |
+
|
| 857 |
+
# Handle tempo - it might be an array
|
| 858 |
+
if isinstance(tempo, np.ndarray):
|
| 859 |
+
tempo_val = float(tempo[0]) if len(tempo) > 0 else 0.0
|
| 860 |
+
else:
|
| 861 |
+
tempo_val = float(tempo)
|
| 862 |
+
|
| 863 |
+
guide_text = f"""
|
| 864 |
+
================================================================================
|
| 865 |
+
AUDIO VISUALIZATION GUIDE
|
| 866 |
+
"{title}"
|
| 867 |
+
================================================================================
|
| 868 |
+
|
| 869 |
+
Duration: {duration:.2f} seconds
|
| 870 |
+
Detected Tempo: {tempo_val:.1f} BPM
|
| 871 |
+
|
| 872 |
+
This folder contains {22} different visual representations of the audio file.
|
| 873 |
+
Each visualization shows a different aspect of how the music sounds.
|
| 874 |
+
|
| 875 |
+
--------------------------------------------------------------------------------
|
| 876 |
+
BASIC VISUALIZATIONS (Start Here)
|
| 877 |
+
--------------------------------------------------------------------------------
|
| 878 |
+
|
| 879 |
+
01_waveform.png
|
| 880 |
+
What it shows: The raw audio signal over time
|
| 881 |
+
How to read it: Tall peaks = loud moments, flat areas = quiet moments
|
| 882 |
+
The shape shows the overall "texture" of the sound
|
| 883 |
+
|
| 884 |
+
02_volume_envelope.png
|
| 885 |
+
What it shows: Overall loudness smoothed out over time
|
| 886 |
+
How to read it: Higher = louder, watch for crescendos (getting louder)
|
| 887 |
+
and decrescendos (getting quieter)
|
| 888 |
+
|
| 889 |
+
--------------------------------------------------------------------------------
|
| 890 |
+
FREQUENCY/PITCH VISUALIZATIONS
|
| 891 |
+
--------------------------------------------------------------------------------
|
| 892 |
+
|
| 893 |
+
03_spectrogram.png
|
| 894 |
+
What it shows: ALL frequencies (pitches) over time
|
| 895 |
+
How to read it:
|
| 896 |
+
- Bottom = low/bass notes, Top = high/treble notes
|
| 897 |
+
- Brighter colors = louder at that frequency
|
| 898 |
+
- Horizontal lines = sustained notes
|
| 899 |
+
- Vertical patterns = rhythmic hits/drums
|
| 900 |
+
|
| 901 |
+
04_mel_spectrogram.png
|
| 902 |
+
What it shows: Same as spectrogram but scaled to human hearing
|
| 903 |
+
How to read it: Spacing matches how we perceive pitch differences
|
| 904 |
+
Low notes spread apart, high notes compressed (like piano keys)
|
| 905 |
+
|
| 906 |
+
05_chromagram.png
|
| 907 |
+
What it shows: The 12 musical notes (C, C#, D, D#, E, F, F#, G, G#, A, A#, B)
|
| 908 |
+
How to read it: Bright horizontal bands = that note is playing
|
| 909 |
+
Watch for patterns - these are chord progressions!
|
| 910 |
+
|
| 911 |
+
06_tonnetz.png
|
| 912 |
+
What it shows: Musical harmony relationships
|
| 913 |
+
How to read it: Shows how notes relate to each other harmonically
|
| 914 |
+
Patterns indicate chord types and key changes
|
| 915 |
+
|
| 916 |
+
--------------------------------------------------------------------------------
|
| 917 |
+
SOUND CHARACTER VISUALIZATIONS
|
| 918 |
+
--------------------------------------------------------------------------------
|
| 919 |
+
|
| 920 |
+
07_spectral_centroid.png
|
| 921 |
+
What it shows: "Brightness" of the sound
|
| 922 |
+
How to read it: High values = bright/sharp sound (like cymbal)
|
| 923 |
+
Low values = dark/mellow sound (like bass)
|
| 924 |
+
|
| 925 |
+
08_spectral_bandwidth.png
|
| 926 |
+
What it shows: How "spread out" the frequencies are
|
| 927 |
+
How to read it: Wide = rich, complex sound (orchestra)
|
| 928 |
+
Narrow = pure, simple sound (flute solo)
|
| 929 |
+
|
| 930 |
+
09_spectral_rolloff.png
|
| 931 |
+
What it shows: Where most of the sound energy is concentrated
|
| 932 |
+
How to read it: Higher = more high-frequency content
|
| 933 |
+
|
| 934 |
+
15_mfcc.png
|
| 935 |
+
What it shows: The "character" or "color" of the sound (timbre)
|
| 936 |
+
How to read it: Different patterns = different instrument sounds
|
| 937 |
+
This is what makes a piano sound different from a trumpet
|
| 938 |
+
|
| 939 |
+
16_spectral_contrast.png
|
| 940 |
+
What it shows: Difference between loud and quiet frequency bands
|
| 941 |
+
How to read it: High contrast = clear, distinct sounds
|
| 942 |
+
Low contrast = muddy, blended sounds
|
| 943 |
+
|
| 944 |
+
--------------------------------------------------------------------------------
|
| 945 |
+
RHYTHM & DYNAMICS VISUALIZATIONS
|
| 946 |
+
--------------------------------------------------------------------------------
|
| 947 |
+
|
| 948 |
+
10_rms_energy.png
|
| 949 |
+
What it shows: Overall power/intensity over time
|
| 950 |
+
How to read it: Peaks = intense/powerful moments
|
| 951 |
+
Valleys = calmer sections
|
| 952 |
+
|
| 953 |
+
12_onset_strength.png
|
| 954 |
+
What it shows: Where new notes/sounds begin
|
| 955 |
+
How to read it: Peaks and vertical lines = new notes starting
|
| 956 |
+
Great for seeing the rhythm and when instruments come in
|
| 957 |
+
|
| 958 |
+
13_beat_tracking.png
|
| 959 |
+
What it shows: The detected beats/pulse of the music
|
| 960 |
+
How to read it: Red vertical lines = beat positions
|
| 961 |
+
The spacing shows the tempo and rhythm
|
| 962 |
+
|
| 963 |
+
14_tempogram.png
|
| 964 |
+
What it shows: Rhythm patterns over time
|
| 965 |
+
How to read it: Bright horizontal bands = strong rhythmic patterns
|
| 966 |
+
at that tempo (BPM). Changes = tempo variations
|
| 967 |
+
|
| 968 |
+
19_dynamic_range.png
|
| 969 |
+
What it shows: Volume variation with color coding
|
| 970 |
+
How to read it: Red = loud peaks, Green = quiet sections
|
| 971 |
+
Shows the dramatic contrast in the music
|
| 972 |
+
|
| 973 |
+
--------------------------------------------------------------------------------
|
| 974 |
+
TEXTURE VISUALIZATIONS
|
| 975 |
+
--------------------------------------------------------------------------------
|
| 976 |
+
|
| 977 |
+
11_zero_crossing_rate.png
|
| 978 |
+
What it shows: Sound texture (smooth vs rough)
|
| 979 |
+
How to read it: High = noisy/percussive (drums, cymbals)
|
| 980 |
+
Low = smooth/tonal (sustained notes, vocals)
|
| 981 |
+
|
| 982 |
+
20_spectral_flatness.png
|
| 983 |
+
What it shows: Noise vs tonal content
|
| 984 |
+
How to read it: High = noise-like (percussion, breath sounds)
|
| 985 |
+
Low = tonal/melodic (notes, chords)
|
| 986 |
+
|
| 987 |
+
--------------------------------------------------------------------------------
|
| 988 |
+
COMPONENT SEPARATION
|
| 989 |
+
--------------------------------------------------------------------------------
|
| 990 |
+
|
| 991 |
+
17_harmonic_percussive.png
|
| 992 |
+
What it shows: The audio split into two parts
|
| 993 |
+
- HARMONIC: Sustained sounds (piano, brass, strings)
|
| 994 |
+
- PERCUSSIVE: Sharp attacks (drums, plucks)
|
| 995 |
+
How to read it: Top = original, Middle = melody/chords, Bottom = drums/hits
|
| 996 |
+
|
| 997 |
+
18_frequency_bands.png
|
| 998 |
+
What it shows: Balance of bass, mid, and treble over time
|
| 999 |
+
How to read it: The colored areas show which frequencies dominate
|
| 1000 |
+
at each moment. Watch how the balance shifts!
|
| 1001 |
+
|
| 1002 |
+
--------------------------------------------------------------------------------
|
| 1003 |
+
OVERVIEW VISUALIZATIONS
|
| 1004 |
+
--------------------------------------------------------------------------------
|
| 1005 |
+
|
| 1006 |
+
21_combined_dashboard.png
|
| 1007 |
+
What it shows: Multiple key visualizations in one view
|
| 1008 |
+
How to read it: A comprehensive overview of the piece
|
| 1009 |
+
Good for getting the overall picture quickly
|
| 1010 |
+
|
| 1011 |
+
22_3d_spectrogram.png
|
| 1012 |
+
What it shows: Time, frequency, and amplitude in 3D
|
| 1013 |
+
How to read it: Peaks = loud frequencies, valleys = quiet
|
| 1014 |
+
Gives a "landscape" view of the music
|
| 1015 |
+
|
| 1016 |
+
================================================================================
|
| 1017 |
+
TIPS FOR EXPERIENCING THE MUSIC
|
| 1018 |
+
================================================================================
|
| 1019 |
+
|
| 1020 |
+
1. Start with the Combined Dashboard (21) to get an overview
|
| 1021 |
+
|
| 1022 |
+
2. For MELODY and HARMONY: Focus on the Chromagram (05) and
|
| 1023 |
+
Mel Spectrogram (04)
|
| 1024 |
+
|
| 1025 |
+
3. For RHYTHM: Look at Beat Tracking (13), Onset Strength (12),
|
| 1026 |
+
and Tempogram (14)
|
| 1027 |
+
|
| 1028 |
+
4. For EMOTIONAL DYNAMICS: Watch the RMS Energy (10) and
|
| 1029 |
+
Dynamic Range (19)
|
| 1030 |
+
|
| 1031 |
+
5. For TEXTURE and SOUND CHARACTER: Explore MFCC (15) and
|
| 1032 |
+
Spectral Centroid (07)
|
| 1033 |
+
|
| 1034 |
+
================================================================================
|
| 1035 |
+
"""
|
| 1036 |
+
|
| 1037 |
+
with open(guide_path, 'w') as f:
|
| 1038 |
+
f.write(guide_text)
|
| 1039 |
+
|
| 1040 |
+
print(f" Saved: VISUALIZATION_GUIDE.txt")
|
| 1041 |
+
|
| 1042 |
+
|
| 1043 |
+
def main():
|
| 1044 |
+
"""Main function to run all visualizations."""
|
| 1045 |
+
global FIGURE_DPI
|
| 1046 |
+
|
| 1047 |
+
parser = argparse.ArgumentParser(
|
| 1048 |
+
description='Generate comprehensive visual representations of audio files. '
|
| 1049 |
+
'Creates 22 different visualizations to help experience music visually.'
|
| 1050 |
+
)
|
| 1051 |
+
parser.add_argument(
|
| 1052 |
+
'file',
|
| 1053 |
+
help='Path to the audio file (mp3, wav, flac, ogg, etc.)'
|
| 1054 |
+
)
|
| 1055 |
+
parser.add_argument(
|
| 1056 |
+
'--output-dir', '-o',
|
| 1057 |
+
help='Custom output directory (default: <filename>_visualizations in the same folder as the audio file)',
|
| 1058 |
+
default=None
|
| 1059 |
+
)
|
| 1060 |
+
parser.add_argument(
|
| 1061 |
+
'--dpi',
|
| 1062 |
+
help=f'Figure resolution in DPI (default: {FIGURE_DPI})',
|
| 1063 |
+
type=int,
|
| 1064 |
+
default=None
|
| 1065 |
+
)
|
| 1066 |
+
|
| 1067 |
+
args = parser.parse_args()
|
| 1068 |
+
|
| 1069 |
+
audio_path = Path(args.file)
|
| 1070 |
+
if not audio_path.exists():
|
| 1071 |
+
print(f"Error: File not found: {audio_path}")
|
| 1072 |
+
sys.exit(1)
|
| 1073 |
+
|
| 1074 |
+
if args.dpi is not None:
|
| 1075 |
+
FIGURE_DPI = args.dpi
|
| 1076 |
+
|
| 1077 |
+
title = audio_path.stem
|
| 1078 |
+
|
| 1079 |
+
print("=" * 60)
|
| 1080 |
+
print("AUDIO VISUALIZER")
|
| 1081 |
+
print("Let Claude hear your music")
|
| 1082 |
+
print("=" * 60)
|
| 1083 |
+
print()
|
| 1084 |
+
|
| 1085 |
+
# Load audio
|
| 1086 |
+
y, sr = load_audio(audio_path)
|
| 1087 |
+
duration = librosa.get_duration(y=y, sr=sr)
|
| 1088 |
+
|
| 1089 |
+
# Create output directory
|
| 1090 |
+
if args.output_dir:
|
| 1091 |
+
output_dir = Path(args.output_dir)
|
| 1092 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 1093 |
+
print(f"Output directory: {output_dir}")
|
| 1094 |
+
else:
|
| 1095 |
+
output_dir = create_output_dir(audio_path)
|
| 1096 |
+
|
| 1097 |
+
print()
|
| 1098 |
+
print("Generating visualizations...")
|
| 1099 |
+
print("-" * 40)
|
| 1100 |
+
|
| 1101 |
+
# Generate all visualizations
|
| 1102 |
+
plot_waveform(y, sr, output_dir)
|
| 1103 |
+
plot_waveform_envelope(y, sr, output_dir)
|
| 1104 |
+
plot_spectrogram(y, sr, output_dir)
|
| 1105 |
+
plot_mel_spectrogram(y, sr, output_dir)
|
| 1106 |
+
plot_chromagram(y, sr, output_dir)
|
| 1107 |
+
plot_tonnetz(y, sr, output_dir)
|
| 1108 |
+
plot_spectral_centroid(y, sr, output_dir)
|
| 1109 |
+
plot_spectral_bandwidth(y, sr, output_dir)
|
| 1110 |
+
plot_spectral_rolloff(y, sr, output_dir)
|
| 1111 |
+
plot_rms_energy(y, sr, output_dir)
|
| 1112 |
+
plot_zero_crossing_rate(y, sr, output_dir)
|
| 1113 |
+
plot_onset_strength(y, sr, output_dir)
|
| 1114 |
+
plot_beat_track(y, sr, output_dir)
|
| 1115 |
+
plot_tempogram(y, sr, output_dir)
|
| 1116 |
+
plot_mfcc(y, sr, output_dir)
|
| 1117 |
+
plot_spectral_contrast(y, sr, output_dir)
|
| 1118 |
+
plot_harmonic_percussive(y, sr, output_dir)
|
| 1119 |
+
plot_frequency_bands(y, sr, output_dir)
|
| 1120 |
+
plot_dynamic_range(y, sr, output_dir)
|
| 1121 |
+
plot_spectral_flatness(y, sr, output_dir)
|
| 1122 |
+
plot_combined_dashboard(y, sr, output_dir, audio_path)
|
| 1123 |
+
plot_3d_spectrogram(y, sr, output_dir)
|
| 1124 |
+
|
| 1125 |
+
# Get tempo for guide
|
| 1126 |
+
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
|
| 1127 |
+
|
| 1128 |
+
# Create guide
|
| 1129 |
+
print()
|
| 1130 |
+
print("Creating visualization guide...")
|
| 1131 |
+
create_visualization_guide(output_dir, duration, tempo, title)
|
| 1132 |
+
|
| 1133 |
+
print()
|
| 1134 |
+
print("=" * 60)
|
| 1135 |
+
print("COMPLETE!")
|
| 1136 |
+
print(f"Generated 22 visualizations + guide in:")
|
| 1137 |
+
print(f" {output_dir}")
|
| 1138 |
+
print("=" * 60)
|
| 1139 |
+
|
| 1140 |
+
|
| 1141 |
+
if __name__ == "__main__":
|
| 1142 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy
|
| 2 |
+
matplotlib
|
| 3 |
+
scipy
|
| 4 |
+
librosa
|
| 5 |
+
gradio
|