Spaces:

HalfLegless
/

AVisualizer

Running

App Files Files Community

Justin Davis Claude Opus 4.6 commited on Feb 20

Commit

ad47dc1

1 Parent(s): 2203384

Add Audio Visualizer Gradio app

Browse files

22 audio visualizations with gallery view and zip download.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (4) hide show

README.md +28 -6
app.py +165 -0
audio_visualizer.py +1142 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,14 +1,36 @@
 ---
-title: AVisualizer
-emoji: 🏆
 colorFrom: blue
-colorTo: green
 sdk: gradio
-sdk_version: 6.6.0
 app_file: app.py
 pinned: false
 license: mit
-short_description: Allow vision enabled models to "hear" music.
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Audio Visualizer
+emoji: 🎵
 colorFrom: blue
+colorTo: purple
 sdk: gradio
+sdk_version: "5.12.0"
 app_file: app.py
 pinned: false
 license: mit
 ---
+# Audio Visualizer — Let Claude Hear Your Music
+Upload any audio file and get **22 detailed visualizations** that translate sound into sight. Originally built to help the deaf and hard of hearing experience music visually, these images also let Claude (or any vision-capable AI) "listen" to your music by analyzing the visual representations.
+## What You Get
+- **22 PNG visualizations** covering waveform, spectrogram, chromagram, beat tracking, harmonic/percussive separation, MFCCs, and more
+- **A downloadable zip** containing all images plus a text guide explaining each visualization
+- **Three quality levels** — Normal (150 DPI), High (200 DPI), Ultra (300 DPI)
+## How to Use
+1. Upload an audio file (MP3, WAV, FLAC, OGG, etc.)
+2. Choose a quality level
+3. Click **Generate Visualizations**
+4. Browse the gallery and download the zip
+## Tip: Share with Claude
+Download the zip file and upload it to a Claude conversation. Claude can analyze the visualizations to describe the music's rhythm, melody, dynamics, and texture — even though it can't hear the audio directly.
+## Links
+- [GitHub Repository](https://github.com/justindavis/AVisualizer)

app.py ADDED Viewed

	@@ -0,0 +1,165 @@

+#!/usr/bin/env python3
+"""
+Audio Visualizer — Gradio Web Interface for Hugging Face Spaces
+Upload audio, get 22 visualizations + a zip download ready for Claude.
+"""
+import matplotlib
+matplotlib.use('Agg')  # MUST be before any pyplot/librosa import
+import gc
+import shutil
+import tempfile
+from pathlib import Path
+import gradio as gr
+import librosa
+import matplotlib.pyplot as plt
+import numpy as np
+import audio_visualizer
+# All 22 visualization functions in order (mirrors the GUI list)
+VISUALIZATIONS = [
+    ("Waveform", audio_visualizer.plot_waveform),
+    ("Volume Envelope", audio_visualizer.plot_waveform_envelope),
+    ("Spectrogram", audio_visualizer.plot_spectrogram),
+    ("Mel Spectrogram", audio_visualizer.plot_mel_spectrogram),
+    ("Chromagram", audio_visualizer.plot_chromagram),
+    ("Tonnetz", audio_visualizer.plot_tonnetz),
+    ("Spectral Centroid", audio_visualizer.plot_spectral_centroid),
+    ("Spectral Bandwidth", audio_visualizer.plot_spectral_bandwidth),
+    ("Spectral Rolloff", audio_visualizer.plot_spectral_rolloff),
+    ("RMS Energy", audio_visualizer.plot_rms_energy),
+    ("Zero Crossing Rate", audio_visualizer.plot_zero_crossing_rate),
+    ("Onset Strength", audio_visualizer.plot_onset_strength),
+    ("Beat Tracking", audio_visualizer.plot_beat_track),
+    ("Tempogram", audio_visualizer.plot_tempogram),
+    ("MFCCs", audio_visualizer.plot_mfcc),
+    ("Spectral Contrast", audio_visualizer.plot_spectral_contrast),
+    ("Harmonic/Percussive", audio_visualizer.plot_harmonic_percussive),
+    ("Frequency Bands", audio_visualizer.plot_frequency_bands),
+    ("Dynamic Range", audio_visualizer.plot_dynamic_range),
+    ("Spectral Flatness", audio_visualizer.plot_spectral_flatness),
+    ("Combined Dashboard", audio_visualizer.plot_combined_dashboard),
+    ("3D Spectrogram", audio_visualizer.plot_3d_spectrogram),
+]
+DPI_OPTIONS = {
+    "Normal (150 DPI)": 150,
+    "High (200 DPI)": 200,
+    "Ultra (300 DPI)": 300,
+}
+def generate_visualizations(audio_path, quality, progress=gr.Progress()):
+    """Generate all 22 visualizations and return gallery images + zip file."""
+    if audio_path is None:
+        raise gr.Error("Please upload an audio file first.")
+    # Set DPI
+    audio_visualizer.FIGURE_DPI = DPI_OPTIONS.get(quality, 150)
+    # Load audio
+    progress(0, desc="Loading audio...")
+    y, sr = audio_visualizer.load_audio(audio_path)
+    duration = librosa.get_duration(y=y, sr=sr)
+    audio_file = Path(audio_path)
+    title = audio_file.stem
+    # Create temp output directory
+    output_tmp = tempfile.mkdtemp(prefix="avis_output_")
+    output_dir = Path(output_tmp)
+    # Generate each visualization
+    total = len(VISUALIZATIONS)
+    image_paths = []
+    for i, (name, func) in enumerate(VISUALIZATIONS):
+        progress((i) / total, desc=f"Generating: {name} ({i + 1}/{total})...")
+        if func == audio_visualizer.plot_combined_dashboard:
+            func(y, sr, output_dir, base_path=audio_file)
+        else:
+            func(y, sr, output_dir)
+        plt.close('all')
+        gc.collect()
+    # Create visualization guide
+    progress(0.95, desc="Creating visualization guide...")
+    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
+    audio_visualizer.create_visualization_guide(output_dir, duration, tempo, title)
+    # Collect all PNG paths (sorted by filename for correct order)
+    image_paths = sorted(output_dir.glob("*.png"))
+    # Create zip file
+    progress(0.98, desc="Creating zip archive...")
+    zip_tmp = tempfile.mkdtemp(prefix="avis_zip_")
+    zip_base = Path(zip_tmp) / f"{title}_visualizations"
+    zip_path = shutil.make_archive(str(zip_base), 'zip', output_dir)
+    progress(1.0, desc="Done!")
+    return image_paths, zip_path
+# --- Build the Gradio interface ---
+with gr.Blocks(
+    title="Audio Visualizer",
+    theme=gr.themes.Soft(),
+) as demo:
+    gr.Markdown(
+        """
+        # Audio Visualizer — Let Claude Hear Your Music
+        Upload any audio file to generate **22 visualizations** that translate sound into sight.
+        Download the zip and share it with Claude to let AI "listen" to your music.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            audio_input = gr.Audio(
+                type="filepath",
+                label="Upload Audio File",
+            )
+            quality_radio = gr.Radio(
+                choices=list(DPI_OPTIONS.keys()),
+                value="Normal (150 DPI)",
+                label="Quality",
+            )
+            generate_btn = gr.Button("Generate Visualizations", variant="primary")
+        with gr.Column(scale=1):
+            gr.Markdown(
+                """
+                ### How it works
+                1. **Upload** an MP3, WAV, FLAC, OGG, or other audio file
+                2. **Choose quality** — higher DPI = sharper images but slower
+                3. **Click Generate** and wait for all 22 visualizations
+                4. **Download the zip** and upload it to a Claude conversation
+                Claude can analyze these images to describe the music's rhythm,
+                melody, dynamics, and texture — even though it can't hear the
+                audio directly.
+                """
+            )
+    gallery = gr.Gallery(
+        label="Visualizations",
+        columns=4,
+        object_fit="contain",
+        height="auto",
+    )
+    zip_download = gr.File(label="Download All (Zip)")
+    generate_btn.click(
+        fn=generate_visualizations,
+        inputs=[audio_input, quality_radio],
+        outputs=[gallery, zip_download],
+    )
+if __name__ == "__main__":
+    demo.launch()

audio_visualizer.py ADDED Viewed

	@@ -0,0 +1,1142 @@

+#!/usr/bin/env python3
+"""
+Audio Visualizer for the Deaf/Hard of Hearing
+Generates comprehensive visual representations of audio files.
+This script creates multiple visualization types to help someone
+who cannot hear experience music visually.
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.colors as mcolors
+from matplotlib.collections import LineCollection
+from scipy import signal
+from scipy.ndimage import gaussian_filter1d
+import librosa
+import librosa.display
+import argparse
+import sys
+from pathlib import Path
+# Configuration
+FIGURE_DPI = 150
+COLORMAP_MAIN = 'magma'
+COLORMAP_DIVERGING = 'coolwarm'
+MAX_PLOT_POINTS = 10000  # No display can show more than this across a figure
+def downsample_for_plot(times, *arrays):
+    """Downsample arrays to MAX_PLOT_POINTS using min/max envelope.
+    For each bucket, keeps the min and max indices of the *first* array
+    and samples all arrays at those same positions, so every output array
+    has the same length as the output times.
+    """
+    n = len(times)
+    if n <= MAX_PLOT_POINTS:
+        return (times, *arrays)
+    # Number of buckets; keep 2 points per bucket (min + max) for envelope
+    n_buckets = MAX_PLOT_POINTS // 2
+    bucket_size = n // n_buckets
+    indices = []
+    for b in range(n_buckets):
+        start = b * bucket_size
+        end = start + bucket_size
+        chunk = arrays[0][start:end]
+        i_min = int(np.argmin(chunk)) + start
+        i_max = int(np.argmax(chunk)) + start
+        # Keep in temporal order
+        if i_min <= i_max:
+            indices.append(i_min)
+            indices.append(i_max)
+        else:
+            indices.append(i_max)
+            indices.append(i_min)
+    indices = np.array(indices)
+    out_times = times[indices]
+    out_arrays = [arr[indices] for arr in arrays]
+    return (out_times, *out_arrays)
+def load_audio(filepath):
+    """Load audio file and return time series and sample rate."""
+    print(f"Loading audio file: {filepath}")
+    y, sr = librosa.load(filepath, sr=None)
+    duration = librosa.get_duration(y=y, sr=sr)
+    print(f"  Duration: {duration:.2f} seconds")
+    print(f"  Sample rate: {sr} Hz")
+    print(f"  Samples: {len(y):,}")
+    return y, sr
+def create_output_dir(base_path):
+    """Create output directory for visualizations."""
+    output_dir = base_path.parent / f"{base_path.stem}_visualizations"
+    output_dir.mkdir(exist_ok=True)
+    print(f"Output directory: {output_dir}")
+    return output_dir
+def save_figure(fig, output_dir, name, tight=True):
+    """Save figure to output directory."""
+    filepath = output_dir / f"{name}.png"
+    if tight:
+        fig.savefig(filepath, dpi=FIGURE_DPI, bbox_inches='tight',
+                    facecolor='white', edgecolor='none')
+    else:
+        fig.savefig(filepath, dpi=FIGURE_DPI, facecolor='white', edgecolor='none')
+    plt.close(fig)
+    print(f"  Saved: {name}.png")
+# =============================================================================
+# VISUALIZATION FUNCTIONS
+# =============================================================================
+def plot_waveform(y, sr, output_dir):
+    """
+    1. WAVEFORM - Basic amplitude over time
+    Shows the raw audio signal - peaks indicate loud moments,
+    flat areas indicate quiet moments.
+    """
+    print("Generating: Waveform...")
+    fig, ax = plt.subplots(figsize=(16, 4))
+    times = np.linspace(0, len(y)/sr, len(y))
+    t_ds, y_ds = downsample_for_plot(times, y)
+    ax.plot(t_ds, y_ds, color='#2E86AB', linewidth=0.3, alpha=0.8)
+    ax.fill_between(t_ds, y_ds, alpha=0.3, color='#2E86AB')
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Amplitude', fontsize=12)
+    ax.set_title('Waveform - Audio Amplitude Over Time\n(Peaks = Loud, Flat = Quiet)',
+                 fontsize=14, fontweight='bold')
+    ax.set_xlim(0, len(y)/sr)
+    ax.grid(True, alpha=0.3)
+    save_figure(fig, output_dir, '01_waveform')
+def plot_waveform_envelope(y, sr, output_dir):
+    """
+    2. WAVEFORM ENVELOPE - Smoothed amplitude showing dynamics
+    Shows overall loudness changes without the rapid oscillations.
+    """
+    print("Generating: Waveform Envelope...")
+    fig, ax = plt.subplots(figsize=(16, 4))
+    # Compute envelope using Hilbert transform
+    analytic_signal = signal.hilbert(y)
+    envelope = np.abs(analytic_signal)
+    # Smooth the envelope
+    window_size = int(sr * 0.05)  # 50ms window
+    envelope_smooth = gaussian_filter1d(envelope, sigma=window_size)
+    times = np.linspace(0, len(y)/sr, len(y))
+    t_ds, env_ds = downsample_for_plot(times, envelope_smooth)
+    ax.fill_between(t_ds, env_ds, alpha=0.7, color='#E94F37')
+    ax.plot(t_ds, env_ds, color='#E94F37', linewidth=0.5)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Loudness', fontsize=12)
+    ax.set_title('Volume Envelope - Overall Loudness Over Time\n(Higher = Louder sections)',
+                 fontsize=14, fontweight='bold')
+    ax.set_xlim(0, len(y)/sr)
+    ax.set_ylim(0, None)
+    ax.grid(True, alpha=0.3)
+    save_figure(fig, output_dir, '02_volume_envelope')
+def plot_spectrogram(y, sr, output_dir):
+    """
+    3. SPECTROGRAM - Frequency content over time
+    Shows what pitches/frequencies are playing at each moment.
+    Bottom = low/bass notes, Top = high/treble notes.
+    Brightness = loudness of that frequency.
+    """
+    print("Generating: Spectrogram...")
+    fig, ax = plt.subplots(figsize=(16, 8))
+    # Compute spectrogram
+    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
+    img = librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log',
+                                    ax=ax, cmap=COLORMAP_MAIN)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Frequency (Hz) - Low to High pitch', fontsize=12)
+    ax.set_title('Spectrogram - All Frequencies Over Time\n(Bottom = Bass/Low, Top = Treble/High, Bright = Loud)',
+                 fontsize=14, fontweight='bold')
+    cbar = fig.colorbar(img, ax=ax, format='%+2.0f dB')
+    cbar.set_label('Loudness (dB)', fontsize=11)
+    save_figure(fig, output_dir, '03_spectrogram')
+def plot_mel_spectrogram(y, sr, output_dir):
+    """
+    4. MEL SPECTROGRAM - Human-perception-weighted frequency view
+    Similar to spectrogram but scaled to match how humans perceive pitch.
+    """
+    print("Generating: Mel Spectrogram...")
+    fig, ax = plt.subplots(figsize=(16, 8))
+    # Compute mel spectrogram
+    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=sr/2)
+    S_db = librosa.power_to_db(S, ref=np.max)
+    img = librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='mel',
+                                    ax=ax, cmap=COLORMAP_MAIN, fmax=sr/2)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Frequency (Mel scale) - Perceived pitch', fontsize=12)
+    ax.set_title('Mel Spectrogram - Frequencies Scaled to Human Pitch Perception\n(How we naturally hear pitch differences)',
+                 fontsize=14, fontweight='bold')
+    cbar = fig.colorbar(img, ax=ax, format='%+2.0f dB')
+    cbar.set_label('Loudness (dB)', fontsize=11)
+    save_figure(fig, output_dir, '04_mel_spectrogram')
+def plot_chromagram(y, sr, output_dir):
+    """
+    5. CHROMAGRAM - Musical notes/chords over time
+    Shows the 12 musical notes (C, C#, D, etc.) and their intensity.
+    Great for seeing chord progressions and melody.
+    """
+    print("Generating: Chromagram...")
+    fig, ax = plt.subplots(figsize=(16, 6))
+    # Compute chromagram
+    chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
+    img = librosa.display.specshow(chroma, sr=sr, x_axis='time', y_axis='chroma',
+                                    ax=ax, cmap='YlOrRd')
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Musical Note', fontsize=12)
+    ax.set_title('Chromagram - Musical Notes Over Time\n(Shows which of the 12 notes are playing - chord progressions)',
+                 fontsize=14, fontweight='bold')
+    cbar = fig.colorbar(img, ax=ax)
+    cbar.set_label('Note Intensity', fontsize=11)
+    save_figure(fig, output_dir, '05_chromagram')
+def plot_tonnetz(y, sr, output_dir):
+    """
+    6. TONNETZ - Harmonic relationships
+    Shows tonal/harmonic content using music theory relationships.
+    """
+    print("Generating: Tonnetz (Harmonic Space)...")
+    fig, ax = plt.subplots(figsize=(16, 6))
+    # Compute tonnetz
+    chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
+    tonnetz = librosa.feature.tonnetz(chroma=chroma)
+    img = librosa.display.specshow(tonnetz, sr=sr, x_axis='time',
+                                    ax=ax, cmap=COLORMAP_DIVERGING)
+    ax.set_ylabel('Tonal Dimension', fontsize=12)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_title('Tonnetz - Harmonic/Tonal Relationships\n(Shows musical harmony and chord relationships)',
+                 fontsize=14, fontweight='bold')
+    ax.set_yticks(range(6))
+    ax.set_yticklabels(['Fifth (x)', 'Fifth (y)', 'Minor (x)',
+                        'Minor (y)', 'Major (x)', 'Major (y)'])
+    cbar = fig.colorbar(img, ax=ax)
+    cbar.set_label('Intensity', fontsize=11)
+    save_figure(fig, output_dir, '06_tonnetz')
+def plot_spectral_centroid(y, sr, output_dir):
+    """
+    7. SPECTRAL CENTROID - Brightness of sound over time
+    Higher values = brighter/sharper sound, Lower = darker/duller sound.
+    """
+    print("Generating: Spectral Centroid (Brightness)...")
+    fig, ax = plt.subplots(figsize=(16, 5))
+    # Compute spectral centroid
+    cent = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
+    frames = range(len(cent))
+    times = librosa.frames_to_time(frames, sr=sr)
+    # Normalize for coloring
+    cent_norm = (cent - cent.min()) / (cent.max() - cent.min())
+    # Create colored line segments
+    points = np.array([times, cent]).T.reshape(-1, 1, 2)
+    segments = np.concatenate([points[:-1], points[1:]], axis=1)
+    norm = plt.Normalize(cent.min(), cent.max())
+    lc = LineCollection(segments, cmap='plasma', norm=norm)
+    lc.set_array(cent)
+    lc.set_linewidth(2)
+    line = ax.add_collection(lc)
+    ax.set_xlim(times.min(), times.max())
+    ax.set_ylim(cent.min() * 0.9, cent.max() * 1.1)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Spectral Centroid (Hz)', fontsize=12)
+    ax.set_title('Spectral Centroid - Sound Brightness Over Time\n(High = Bright/Sharp sound, Low = Dark/Dull sound)',
+                 fontsize=14, fontweight='bold')
+    cbar = fig.colorbar(line, ax=ax)
+    cbar.set_label('Brightness (Hz)', fontsize=11)
+    ax.grid(True, alpha=0.3)
+    save_figure(fig, output_dir, '07_spectral_centroid')
+def plot_spectral_bandwidth(y, sr, output_dir):
+    """
+    8. SPECTRAL BANDWIDTH - How spread out the frequencies are
+    Wide = rich/complex sound, Narrow = pure/simple sound.
+    """
+    print("Generating: Spectral Bandwidth...")
+    fig, ax = plt.subplots(figsize=(16, 5))
+    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
+    frames = range(len(spec_bw))
+    times = librosa.frames_to_time(frames, sr=sr)
+    ax.fill_between(times, spec_bw, alpha=0.6, color='#7B2CBF')
+    ax.plot(times, spec_bw, color='#7B2CBF', linewidth=1)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Bandwidth (Hz)', fontsize=12)
+    ax.set_title('Spectral Bandwidth - Sound Richness/Complexity\n(Wide = Rich/Complex, Narrow = Pure/Simple)',
+                 fontsize=14, fontweight='bold')
+    ax.set_xlim(0, times.max())
+    ax.grid(True, alpha=0.3)
+    save_figure(fig, output_dir, '08_spectral_bandwidth')
+def plot_spectral_rolloff(y, sr, output_dir):
+    """
+    9. SPECTRAL ROLLOFF - Where most of the energy is concentrated
+    Shows the frequency below which 85% of the sound energy exists.
+    """
+    print("Generating: Spectral Rolloff...")
+    fig, ax = plt.subplots(figsize=(16, 5))
+    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.85)[0]
+    frames = range(len(rolloff))
+    times = librosa.frames_to_time(frames, sr=sr)
+    ax.fill_between(times, rolloff, alpha=0.6, color='#00A896')
+    ax.plot(times, rolloff, color='#00A896', linewidth=1)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Rolloff Frequency (Hz)', fontsize=12)
+    ax.set_title('Spectral Rolloff - Where 85% of Sound Energy Lives\n(Higher = More high-frequency content)',
+                 fontsize=14, fontweight='bold')
+    ax.set_xlim(0, times.max())
+    ax.grid(True, alpha=0.3)
+    save_figure(fig, output_dir, '09_spectral_rolloff')
+def plot_rms_energy(y, sr, output_dir):
+    """
+    10. RMS ENERGY - Overall loudness/power over time
+    Shows the intensity and dynamics of the music.
+    """
+    print("Generating: RMS Energy (Loudness)...")
+    fig, ax = plt.subplots(figsize=(16, 4))
+    rms = librosa.feature.rms(y=y)[0]
+    frames = range(len(rms))
+    times = librosa.frames_to_time(frames, sr=sr)
+    ax.fill_between(times, rms, alpha=0.7, color='#F77F00')
+    ax.plot(times, rms, color='#D62828', linewidth=1)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Energy (RMS)', fontsize=12)
+    ax.set_title('RMS Energy - Overall Loudness/Power Over Time\n(Peaks = Intense moments, Valleys = Quieter sections)',
+                 fontsize=14, fontweight='bold')
+    ax.set_xlim(0, times.max())
+    ax.set_ylim(0, None)
+    ax.grid(True, alpha=0.3)
+    save_figure(fig, output_dir, '10_rms_energy')
+def plot_zero_crossing_rate(y, sr, output_dir):
+    """
+    11. ZERO CROSSING RATE - Texture/noisiness indicator
+    High values indicate noisy/percussive sounds, low = tonal/smooth sounds.
+    """
+    print("Generating: Zero Crossing Rate (Texture)...")
+    fig, ax = plt.subplots(figsize=(16, 4))
+    zcr = librosa.feature.zero_crossing_rate(y)[0]
+    frames = range(len(zcr))
+    times = librosa.frames_to_time(frames, sr=sr)
+    ax.fill_between(times, zcr, alpha=0.6, color='#84A98C')
+    ax.plot(times, zcr, color='#2D6A4F', linewidth=0.8)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Zero Crossing Rate', fontsize=12)
+    ax.set_title('Zero Crossing Rate - Sound Texture\n(High = Noisy/Percussive, Low = Smooth/Tonal)',
+                 fontsize=14, fontweight='bold')
+    ax.set_xlim(0, times.max())
+    ax.grid(True, alpha=0.3)
+    save_figure(fig, output_dir, '11_zero_crossing_rate')
+def plot_onset_strength(y, sr, output_dir):
+    """
+    12. ONSET STRENGTH - Where new sounds/notes begin
+    Peaks indicate the start of new notes, beats, or events.
+    """
+    print("Generating: Onset Strength (Note Attacks)...")
+    fig, ax = plt.subplots(figsize=(16, 4))
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
+    frames = range(len(onset_env))
+    times = librosa.frames_to_time(frames, sr=sr)
+    ax.fill_between(times, onset_env, alpha=0.6, color='#FF006E')
+    ax.plot(times, onset_env, color='#FF006E', linewidth=0.8)
+    # Mark detected onsets
+    onsets = librosa.onset.onset_detect(y=y, sr=sr, units='time')
+    ax.vlines(onsets, 0, onset_env.max(), color='#3A0CA3', alpha=0.5,
+              linewidth=0.5, label='Detected note starts')
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Onset Strength', fontsize=12)
+    ax.set_title('Onset Strength - New Notes/Sounds Starting\n(Peaks and lines = New notes or beats beginning)',
+                 fontsize=14, fontweight='bold')
+    ax.set_xlim(0, times.max())
+    ax.legend(loc='upper right')
+    ax.grid(True, alpha=0.3)
+    save_figure(fig, output_dir, '12_onset_strength')
+def plot_beat_track(y, sr, output_dir):
+    """
+    13. BEAT TRACKING - The rhythm/pulse of the music
+    Shows where the beats are and the tempo structure.
+    """
+    print("Generating: Beat Track (Rhythm)...")
+    fig, axes = plt.subplots(2, 1, figsize=(16, 8), sharex=True)
+    # Compute tempo and beats
+    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
+    beat_times = librosa.frames_to_time(beats, sr=sr)
+    # Handle tempo - it might be an array
+    if isinstance(tempo, np.ndarray):
+        tempo_val = float(tempo[0]) if len(tempo) > 0 else 0.0
+    else:
+        tempo_val = float(tempo)
+    # Top plot: waveform with beat markers
+    times = np.linspace(0, len(y)/sr, len(y))
+    axes[0].plot(times, y, color='#2E86AB', linewidth=0.3, alpha=0.6)
+    axes[0].vlines(beat_times, -1, 1, color='#D62828', alpha=0.8,
+                   linewidth=1, label='Beats')
+    axes[0].set_ylabel('Amplitude', fontsize=11)
+    axes[0].set_title(f'Beat Tracking - Detected Tempo: {tempo_val:.1f} BPM\n(Red lines = Beat positions)',
+                     fontsize=14, fontweight='bold')
+    axes[0].legend(loc='upper right')
+    axes[0].set_xlim(0, len(y)/sr)
+    # Bottom plot: onset strength with beats
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
+    frames = range(len(onset_env))
+    otimes = librosa.frames_to_time(frames, sr=sr)
+    axes[1].fill_between(otimes, onset_env, alpha=0.5, color='#F77F00')
+    axes[1].vlines(beat_times, 0, onset_env.max(), color='#D62828',
+                   alpha=0.8, linewidth=1)
+    axes[1].set_xlabel('Time (seconds)', fontsize=12)
+    axes[1].set_ylabel('Onset Strength', fontsize=11)
+    axes[1].set_xlim(0, len(y)/sr)
+    plt.tight_layout()
+    save_figure(fig, output_dir, '13_beat_tracking', tight=False)
+def plot_tempogram(y, sr, output_dir):
+    """
+    14. TEMPOGRAM - Tempo/rhythm patterns over time
+    Shows how the rhythm structure changes throughout the piece.
+    """
+    print("Generating: Tempogram (Rhythm Patterns)...")
+    fig, ax = plt.subplots(figsize=(16, 6))
+    # Compute tempogram
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
+    tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr)
+    img = librosa.display.specshow(tempogram, sr=sr, x_axis='time',
+                                    y_axis='tempo', ax=ax, cmap='magma')
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Tempo (BPM)', fontsize=12)
+    ax.set_title('Tempogram - Rhythm/Tempo Patterns Over Time\n(Bright horizontal bands = Strong rhythmic patterns)',
+                 fontsize=14, fontweight='bold')
+    cbar = fig.colorbar(img, ax=ax)
+    cbar.set_label('Strength', fontsize=11)
+    save_figure(fig, output_dir, '14_tempogram')
+def plot_mfcc(y, sr, output_dir):
+    """
+    15. MFCCs - Timbral texture (sound color/character)
+    Shows the "color" or character of the sound - what makes
+    a piano sound different from a guitar.
+    """
+    print("Generating: MFCCs (Sound Character/Timbre)...")
+    fig, ax = plt.subplots(figsize=(16, 6))
+    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
+    img = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=ax,
+                                    cmap=COLORMAP_DIVERGING)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('MFCC Coefficient', fontsize=12)
+    ax.set_title('MFCCs - Sound Character/Timbre ("Color" of the sound)\n(Different patterns = Different instrument sounds)',
+                 fontsize=14, fontweight='bold')
+    cbar = fig.colorbar(img, ax=ax)
+    cbar.set_label('Coefficient Value', fontsize=11)
+    save_figure(fig, output_dir, '15_mfcc')
+def plot_spectral_contrast(y, sr, output_dir):
+    """
+    16. SPECTRAL CONTRAST - Difference between peaks and valleys
+    Shows the difference between loud and quiet frequency bands.
+    """
+    print("Generating: Spectral Contrast...")
+    fig, ax = plt.subplots(figsize=(16, 6))
+    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
+    img = librosa.display.specshow(contrast, sr=sr, x_axis='time', ax=ax,
+                                    cmap='PRGn')
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Frequency Band', fontsize=12)
+    ax.set_title('Spectral Contrast - Dynamic Range per Frequency Band\n(High contrast = Clear/distinct sounds, Low = Muddy/blended)',
+                 fontsize=14, fontweight='bold')
+    cbar = fig.colorbar(img, ax=ax)
+    cbar.set_label('Contrast (dB)', fontsize=11)
+    save_figure(fig, output_dir, '16_spectral_contrast')
+def plot_harmonic_percussive(y, sr, output_dir):
+    """
+    17. HARMONIC vs PERCUSSIVE separation
+    Separates sustained sounds (instruments, vocals) from
+    sharp attack sounds (drums, percussion).
+    """
+    print("Generating: Harmonic vs Percussive Separation...")
+    # Separate harmonic and percussive components
+    y_harmonic, y_percussive = librosa.effects.hpss(y)
+    fig, axes = plt.subplots(3, 1, figsize=(16, 10), sharex=True)
+    times = np.linspace(0, len(y)/sr, len(y))
+    t_ds, y_ds, yh_ds, yp_ds = downsample_for_plot(
+        times, y, y_harmonic, y_percussive
+    )
+    # Original
+    axes[0].plot(t_ds, y_ds, color='#2E86AB', linewidth=0.3)
+    axes[0].set_ylabel('Original', fontsize=11)
+    axes[0].set_title('Harmonic vs Percussive Separation\n(Splitting sustained notes from drum hits/attacks)',
+                     fontsize=14, fontweight='bold')
+    # Harmonic (sustained notes, melody, chords)
+    axes[1].plot(t_ds, yh_ds, color='#06D6A0', linewidth=0.3)
+    axes[1].set_ylabel('Harmonic\n(Melody/Chords)', fontsize=11)
+    # Percussive (drums, attacks)
+    axes[2].plot(t_ds, yp_ds, color='#EF476F', linewidth=0.3)
+    axes[2].set_ylabel('Percussive\n(Drums/Attacks)', fontsize=11)
+    axes[2].set_xlabel('Time (seconds)', fontsize=12)
+    for ax in axes:
+        ax.set_xlim(0, len(y)/sr)
+        ax.grid(True, alpha=0.3)
+    plt.tight_layout()
+    save_figure(fig, output_dir, '17_harmonic_percussive', tight=False)
+def plot_frequency_bands(y, sr, output_dir):
+    """
+    18. FREQUENCY BANDS - Energy in bass, mid, and treble
+    Shows the balance of low, mid, and high frequencies over time.
+    """
+    print("Generating: Frequency Bands (Bass/Mid/Treble)...")
+    # Compute spectrogram
+    S = np.abs(librosa.stft(y))
+    freqs = librosa.fft_frequencies(sr=sr)
+    # Define frequency bands
+    bands = {
+        'Sub-bass (20-60 Hz)': (20, 60),
+        'Bass (60-250 Hz)': (60, 250),
+        'Low-mid (250-500 Hz)': (250, 500),
+        'Mid (500-2000 Hz)': (500, 2000),
+        'High-mid (2000-4000 Hz)': (2000, 4000),
+        'Treble (4000-20000 Hz)': (4000, 20000)
+    }
+    colors = ['#540B0E', '#9E2A2B', '#E09F3E', '#FFF3B0', '#335C67', '#2E86AB']
+    fig, ax = plt.subplots(figsize=(16, 6))
+    times = librosa.frames_to_time(range(S.shape[1]), sr=sr)
+    band_energies = []
+    for (name, (low, high)), color in zip(bands.items(), colors):
+        mask = (freqs >= low) & (freqs < high)
+        if mask.sum() > 0:
+            energy = S[mask].mean(axis=0)
+            energy_smooth = gaussian_filter1d(energy, sigma=5)
+            band_energies.append((name, energy_smooth, color))
+    # Stack plot
+    energies = np.array([e[1] for e in band_energies])
+    energies_norm = energies / energies.sum(axis=0, keepdims=True)
+    ax.stackplot(times, energies_norm, labels=[e[0] for e in band_energies],
+                 colors=colors, alpha=0.8)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Relative Energy', fontsize=12)
+    ax.set_title('Frequency Band Distribution Over Time\n(Shows balance of bass, mids, and treble)',
+                 fontsize=14, fontweight='bold')
+    ax.set_xlim(0, times.max())
+    ax.set_ylim(0, 1)
+    ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1), fontsize=9)
+    plt.tight_layout()
+    save_figure(fig, output_dir, '18_frequency_bands')
+def plot_dynamic_range(y, sr, output_dir):
+    """
+    19. DYNAMIC RANGE - Loud vs quiet sections highlighted
+    Shows the contrast between loud and quiet parts.
+    """
+    print("Generating: Dynamic Range...")
+    fig, ax = plt.subplots(figsize=(16, 5))
+    # Compute RMS in frames
+    rms = librosa.feature.rms(y=y)[0]
+    frames = range(len(rms))
+    times = librosa.frames_to_time(frames, sr=sr)
+    # Normalize RMS to 0-1 range
+    rms_norm = (rms - rms.min()) / (rms.max() - rms.min())
+    # Create gradient fill using a colored mesh (much faster than per-frame fill_between)
+    from matplotlib.collections import PolyCollection
+    verts = []
+    colors_list = []
+    for i in range(len(times) - 1):
+        verts.append([(times[i], 0), (times[i], rms[i]),
+                      (times[i+1], rms[i+1]), (times[i+1], 0)])
+        colors_list.append(plt.cm.RdYlGn_r(rms_norm[i], alpha=0.8))
+    poly = PolyCollection(verts, facecolors=colors_list, edgecolors='none')
+    ax.add_collection(poly)
+    ax.autoscale_view()
+    ax.plot(times, rms, color='black', linewidth=0.8, alpha=0.5)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Loudness', fontsize=12)
+    ax.set_title('Dynamic Range - Volume Variation\n(Red = Loud peaks, Green = Quiet sections)',
+                 fontsize=14, fontweight='bold')
+    ax.set_xlim(0, times.max())
+    ax.set_ylim(0, None)
+    # Add colorbar
+    sm = plt.cm.ScalarMappable(cmap='RdYlGn_r', norm=plt.Normalize(0, 1))
+    cbar = fig.colorbar(sm, ax=ax)
+    cbar.set_label('Relative Loudness', fontsize=11)
+    save_figure(fig, output_dir, '19_dynamic_range')
+def plot_spectral_flatness(y, sr, output_dir):
+    """
+    20. SPECTRAL FLATNESS - Noise vs tonal content
+    High = noise-like (drums, percussion), Low = tonal (melody, chords).
+    """
+    print("Generating: Spectral Flatness (Noise vs Tone)...")
+    fig, ax = plt.subplots(figsize=(16, 4))
+    flatness = librosa.feature.spectral_flatness(y=y)[0]
+    frames = range(len(flatness))
+    times = librosa.frames_to_time(frames, sr=sr)
+    ax.fill_between(times, flatness, alpha=0.6, color='#9B5DE5')
+    ax.plot(times, flatness, color='#9B5DE5', linewidth=0.8)
+    ax.set_xlabel('Time (seconds)', fontsize=12)
+    ax.set_ylabel('Spectral Flatness', fontsize=12)
+    ax.set_title('Spectral Flatness - Noise vs Tonal Content\n(High = Noisy/percussive, Low = Tonal/melodic)',
+                 fontsize=14, fontweight='bold')
+    ax.set_xlim(0, times.max())
+    ax.set_ylim(0, 1)
+    ax.grid(True, alpha=0.3)
+    save_figure(fig, output_dir, '20_spectral_flatness')
+def plot_combined_dashboard(y, sr, output_dir, base_path):
+    """
+    21. COMBINED DASHBOARD - All key visualizations in one view
+    A comprehensive overview combining multiple visualizations.
+    """
+    print("Generating: Combined Dashboard...")
+    fig = plt.figure(figsize=(20, 16))
+    # Create grid
+    gs = fig.add_gridspec(4, 2, hspace=0.3, wspace=0.2)
+    # 1. Mel Spectrogram (top left)
+    ax1 = fig.add_subplot(gs[0, 0])
+    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=64)
+    S_db = librosa.power_to_db(S, ref=np.max)
+    librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='mel',
+                              ax=ax1, cmap=COLORMAP_MAIN)
+    ax1.set_title('Mel Spectrogram (Pitch Content)', fontweight='bold')
+    # 2. Chromagram (top right)
+    ax2 = fig.add_subplot(gs[0, 1])
+    chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
+    librosa.display.specshow(chroma, sr=sr, x_axis='time', y_axis='chroma',
+                              ax=ax2, cmap='YlOrRd')
+    ax2.set_title('Chromagram (Musical Notes)', fontweight='bold')
+    # 3. RMS Energy (second row left)
+    ax3 = fig.add_subplot(gs[1, 0])
+    rms = librosa.feature.rms(y=y)[0]
+    frames = range(len(rms))
+    times = librosa.frames_to_time(frames, sr=sr)
+    ax3.fill_between(times, rms, alpha=0.7, color='#F77F00')
+    ax3.set_xlim(0, times.max())
+    ax3.set_title('Volume/Energy Over Time', fontweight='bold')
+    ax3.set_xlabel('Time (s)')
+    # 4. Spectral Centroid (second row right)
+    ax4 = fig.add_subplot(gs[1, 1])
+    cent = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
+    times_c = librosa.frames_to_time(range(len(cent)), sr=sr)
+    ax4.fill_between(times_c, cent, alpha=0.6, color='#9B5DE5')
+    ax4.set_xlim(0, times_c.max())
+    ax4.set_title('Brightness Over Time', fontweight='bold')
+    ax4.set_xlabel('Time (s)')
+    # 5. Onset Strength with Beats (third row, full width)
+    ax5 = fig.add_subplot(gs[2, :])
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
+    times_o = librosa.frames_to_time(range(len(onset_env)), sr=sr)
+    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
+    beat_times = librosa.frames_to_time(beats, sr=sr)
+    ax5.fill_between(times_o, onset_env, alpha=0.5, color='#FF006E')
+    ax5.vlines(beat_times, 0, onset_env.max(), color='#3A0CA3', alpha=0.6, linewidth=1)
+    ax5.set_xlim(0, times_o.max())
+    ax5.set_title('Rhythm & Beats (vertical lines = beat positions)', fontweight='bold')
+    ax5.set_xlabel('Time (s)')
+    # 6. Frequency Bands (bottom, full width)
+    ax6 = fig.add_subplot(gs[3, :])
+    S_full = np.abs(librosa.stft(y))
+    freqs = librosa.fft_frequencies(sr=sr)
+    bands = [(60, 250), (250, 2000), (2000, 20000)]
+    band_names = ['Bass', 'Mid', 'Treble']
+    colors = ['#E63946', '#F4A261', '#2A9D8F']
+    times_f = librosa.frames_to_time(range(S_full.shape[1]), sr=sr)
+    for (low, high), name, color in zip(bands, band_names, colors):
+        mask = (freqs >= low) & (freqs < high)
+        if mask.sum() > 0:
+            energy = gaussian_filter1d(S_full[mask].mean(axis=0), sigma=5)
+            ax6.plot(times_f, energy / energy.max(), label=name, color=color, linewidth=1.5)
+    ax6.set_xlim(0, times_f.max())
+    ax6.set_title('Frequency Band Balance (Bass/Mid/Treble)', fontweight='bold')
+    ax6.set_xlabel('Time (s)')
+    ax6.legend(loc='upper right')
+    plt.suptitle(f'Audio Visualization Dashboard\n"{base_path.stem}"',
+                 fontsize=16, fontweight='bold', y=0.98)
+    save_figure(fig, output_dir, '21_combined_dashboard', tight=False)
+def plot_3d_spectrogram(y, sr, output_dir):
+    """
+    22. 3D SPECTROGRAM - Frequency, time, and amplitude in 3D
+    A three-dimensional view of the audio.
+    """
+    print("Generating: 3D Spectrogram...")
+    from mpl_toolkits.mplot3d import Axes3D
+    fig = plt.figure(figsize=(14, 10))
+    ax = fig.add_subplot(111, projection='3d')
+    # Compute spectrogram with reduced resolution for 3D
+    hop_length = 2048
+    S = np.abs(librosa.stft(y, hop_length=hop_length))
+    S_db = librosa.amplitude_to_db(S, ref=np.max)
+    # Downsample for visualization
+    step_t = max(1, S_db.shape[1] // 200)
+    step_f = max(1, S_db.shape[0] // 100)
+    S_down = S_db[::step_f, ::step_t]
+    # Create meshgrid
+    times = librosa.frames_to_time(range(S_db.shape[1]), sr=sr, hop_length=hop_length)
+    freqs = librosa.fft_frequencies(sr=sr)
+    times_down = times[::step_t]
+    freqs_down = freqs[::step_f]
+    T, F = np.meshgrid(times_down, freqs_down)
+    # Plot surface
+    surf = ax.plot_surface(T, F, S_down, cmap='magma',
+                           linewidth=0, antialiased=True, alpha=0.9)
+    ax.set_xlabel('Time (s)', fontsize=11)
+    ax.set_ylabel('Frequency (Hz)', fontsize=11)
+    ax.set_zlabel('Amplitude (dB)', fontsize=11)
+    ax.set_title('3D Spectrogram\n(Time × Frequency × Loudness)',
+                 fontsize=14, fontweight='bold')
+    ax.view_init(elev=30, azim=45)
+    fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10, label='Amplitude (dB)')
+    save_figure(fig, output_dir, '22_3d_spectrogram')
+def create_visualization_guide(output_dir, duration, tempo, title):
+    """Create a text guide explaining all visualizations."""
+    guide_path = output_dir / "VISUALIZATION_GUIDE.txt"
+    # Handle tempo - it might be an array
+    if isinstance(tempo, np.ndarray):
+        tempo_val = float(tempo[0]) if len(tempo) > 0 else 0.0
+    else:
+        tempo_val = float(tempo)
+    guide_text = f"""
+================================================================================
+    AUDIO VISUALIZATION GUIDE
+    "{title}"
+================================================================================
+Duration: {duration:.2f} seconds
+Detected Tempo: {tempo_val:.1f} BPM
+This folder contains {22} different visual representations of the audio file.
+Each visualization shows a different aspect of how the music sounds.
+--------------------------------------------------------------------------------
+BASIC VISUALIZATIONS (Start Here)
+--------------------------------------------------------------------------------
+01_waveform.png
+   What it shows: The raw audio signal over time
+   How to read it: Tall peaks = loud moments, flat areas = quiet moments
+   The shape shows the overall "texture" of the sound
+02_volume_envelope.png
+   What it shows: Overall loudness smoothed out over time
+   How to read it: Higher = louder, watch for crescendos (getting louder)
+   and decrescendos (getting quieter)
+--------------------------------------------------------------------------------
+FREQUENCY/PITCH VISUALIZATIONS
+--------------------------------------------------------------------------------
+03_spectrogram.png
+   What it shows: ALL frequencies (pitches) over time
+   How to read it:
+   - Bottom = low/bass notes, Top = high/treble notes
+   - Brighter colors = louder at that frequency
+   - Horizontal lines = sustained notes
+   - Vertical patterns = rhythmic hits/drums
+04_mel_spectrogram.png
+   What it shows: Same as spectrogram but scaled to human hearing
+   How to read it: Spacing matches how we perceive pitch differences
+   Low notes spread apart, high notes compressed (like piano keys)
+05_chromagram.png
+   What it shows: The 12 musical notes (C, C#, D, D#, E, F, F#, G, G#, A, A#, B)
+   How to read it: Bright horizontal bands = that note is playing
+   Watch for patterns - these are chord progressions!
+06_tonnetz.png
+   What it shows: Musical harmony relationships
+   How to read it: Shows how notes relate to each other harmonically
+   Patterns indicate chord types and key changes
+--------------------------------------------------------------------------------
+SOUND CHARACTER VISUALIZATIONS
+--------------------------------------------------------------------------------
+07_spectral_centroid.png
+   What it shows: "Brightness" of the sound
+   How to read it: High values = bright/sharp sound (like cymbal)
+   Low values = dark/mellow sound (like bass)
+08_spectral_bandwidth.png
+   What it shows: How "spread out" the frequencies are
+   How to read it: Wide = rich, complex sound (orchestra)
+   Narrow = pure, simple sound (flute solo)
+09_spectral_rolloff.png
+   What it shows: Where most of the sound energy is concentrated
+   How to read it: Higher = more high-frequency content
+15_mfcc.png
+   What it shows: The "character" or "color" of the sound (timbre)
+   How to read it: Different patterns = different instrument sounds
+   This is what makes a piano sound different from a trumpet
+16_spectral_contrast.png
+   What it shows: Difference between loud and quiet frequency bands
+   How to read it: High contrast = clear, distinct sounds
+   Low contrast = muddy, blended sounds
+--------------------------------------------------------------------------------
+RHYTHM & DYNAMICS VISUALIZATIONS
+--------------------------------------------------------------------------------
+10_rms_energy.png
+   What it shows: Overall power/intensity over time
+   How to read it: Peaks = intense/powerful moments
+   Valleys = calmer sections
+12_onset_strength.png
+   What it shows: Where new notes/sounds begin
+   How to read it: Peaks and vertical lines = new notes starting
+   Great for seeing the rhythm and when instruments come in
+13_beat_tracking.png
+   What it shows: The detected beats/pulse of the music
+   How to read it: Red vertical lines = beat positions
+   The spacing shows the tempo and rhythm
+14_tempogram.png
+   What it shows: Rhythm patterns over time
+   How to read it: Bright horizontal bands = strong rhythmic patterns
+   at that tempo (BPM). Changes = tempo variations
+19_dynamic_range.png
+   What it shows: Volume variation with color coding
+   How to read it: Red = loud peaks, Green = quiet sections
+   Shows the dramatic contrast in the music
+--------------------------------------------------------------------------------
+TEXTURE VISUALIZATIONS
+--------------------------------------------------------------------------------
+11_zero_crossing_rate.png
+   What it shows: Sound texture (smooth vs rough)
+   How to read it: High = noisy/percussive (drums, cymbals)
+   Low = smooth/tonal (sustained notes, vocals)
+20_spectral_flatness.png
+   What it shows: Noise vs tonal content
+   How to read it: High = noise-like (percussion, breath sounds)
+   Low = tonal/melodic (notes, chords)
+--------------------------------------------------------------------------------
+COMPONENT SEPARATION
+--------------------------------------------------------------------------------
+17_harmonic_percussive.png
+   What it shows: The audio split into two parts
+   - HARMONIC: Sustained sounds (piano, brass, strings)
+   - PERCUSSIVE: Sharp attacks (drums, plucks)
+   How to read it: Top = original, Middle = melody/chords, Bottom = drums/hits
+18_frequency_bands.png
+   What it shows: Balance of bass, mid, and treble over time
+   How to read it: The colored areas show which frequencies dominate
+   at each moment. Watch how the balance shifts!
+--------------------------------------------------------------------------------
+OVERVIEW VISUALIZATIONS
+--------------------------------------------------------------------------------
+21_combined_dashboard.png
+   What it shows: Multiple key visualizations in one view
+   How to read it: A comprehensive overview of the piece
+   Good for getting the overall picture quickly
+22_3d_spectrogram.png
+   What it shows: Time, frequency, and amplitude in 3D
+   How to read it: Peaks = loud frequencies, valleys = quiet
+   Gives a "landscape" view of the music
+================================================================================
+TIPS FOR EXPERIENCING THE MUSIC
+================================================================================
+1. Start with the Combined Dashboard (21) to get an overview
+2. For MELODY and HARMONY: Focus on the Chromagram (05) and
+   Mel Spectrogram (04)
+3. For RHYTHM: Look at Beat Tracking (13), Onset Strength (12),
+   and Tempogram (14)
+4. For EMOTIONAL DYNAMICS: Watch the RMS Energy (10) and
+   Dynamic Range (19)
+5. For TEXTURE and SOUND CHARACTER: Explore MFCC (15) and
+   Spectral Centroid (07)
+================================================================================
+"""
+    with open(guide_path, 'w') as f:
+        f.write(guide_text)
+    print(f"  Saved: VISUALIZATION_GUIDE.txt")
+def main():
+    """Main function to run all visualizations."""
+    global FIGURE_DPI
+    parser = argparse.ArgumentParser(
+        description='Generate comprehensive visual representations of audio files. '
+                    'Creates 22 different visualizations to help experience music visually.'
+    )
+    parser.add_argument(
+        'file',
+        help='Path to the audio file (mp3, wav, flac, ogg, etc.)'
+    )
+    parser.add_argument(
+        '--output-dir', '-o',
+        help='Custom output directory (default: <filename>_visualizations in the same folder as the audio file)',
+        default=None
+    )
+    parser.add_argument(
+        '--dpi',
+        help=f'Figure resolution in DPI (default: {FIGURE_DPI})',
+        type=int,
+        default=None
+    )
+    args = parser.parse_args()
+    audio_path = Path(args.file)
+    if not audio_path.exists():
+        print(f"Error: File not found: {audio_path}")
+        sys.exit(1)
+    if args.dpi is not None:
+        FIGURE_DPI = args.dpi
+    title = audio_path.stem
+    print("=" * 60)
+    print("AUDIO VISUALIZER")
+    print("Let Claude hear your music")
+    print("=" * 60)
+    print()
+    # Load audio
+    y, sr = load_audio(audio_path)
+    duration = librosa.get_duration(y=y, sr=sr)
+    # Create output directory
+    if args.output_dir:
+        output_dir = Path(args.output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        print(f"Output directory: {output_dir}")
+    else:
+        output_dir = create_output_dir(audio_path)
+    print()
+    print("Generating visualizations...")
+    print("-" * 40)
+    # Generate all visualizations
+    plot_waveform(y, sr, output_dir)
+    plot_waveform_envelope(y, sr, output_dir)
+    plot_spectrogram(y, sr, output_dir)
+    plot_mel_spectrogram(y, sr, output_dir)
+    plot_chromagram(y, sr, output_dir)
+    plot_tonnetz(y, sr, output_dir)
+    plot_spectral_centroid(y, sr, output_dir)
+    plot_spectral_bandwidth(y, sr, output_dir)
+    plot_spectral_rolloff(y, sr, output_dir)
+    plot_rms_energy(y, sr, output_dir)
+    plot_zero_crossing_rate(y, sr, output_dir)
+    plot_onset_strength(y, sr, output_dir)
+    plot_beat_track(y, sr, output_dir)
+    plot_tempogram(y, sr, output_dir)
+    plot_mfcc(y, sr, output_dir)
+    plot_spectral_contrast(y, sr, output_dir)
+    plot_harmonic_percussive(y, sr, output_dir)
+    plot_frequency_bands(y, sr, output_dir)
+    plot_dynamic_range(y, sr, output_dir)
+    plot_spectral_flatness(y, sr, output_dir)
+    plot_combined_dashboard(y, sr, output_dir, audio_path)
+    plot_3d_spectrogram(y, sr, output_dir)
+    # Get tempo for guide
+    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
+    # Create guide
+    print()
+    print("Creating visualization guide...")
+    create_visualization_guide(output_dir, duration, tempo, title)
+    print()
+    print("=" * 60)
+    print("COMPLETE!")
+    print(f"Generated 22 visualizations + guide in:")
+    print(f"  {output_dir}")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy
+matplotlib
+scipy
+librosa
+gradio