AVisualizer / app.py
Justin Davis
Add Audio Visualizer Gradio app
ad47dc1
#!/usr/bin/env python3
"""
Audio Visualizer β€” Gradio Web Interface for Hugging Face Spaces
Upload audio, get 22 visualizations + a zip download ready for Claude.
"""
import matplotlib
matplotlib.use('Agg') # MUST be before any pyplot/librosa import
import gc
import shutil
import tempfile
from pathlib import Path
import gradio as gr
import librosa
import matplotlib.pyplot as plt
import numpy as np
import audio_visualizer
# All 22 visualization functions in order (mirrors the GUI list)
VISUALIZATIONS = [
("Waveform", audio_visualizer.plot_waveform),
("Volume Envelope", audio_visualizer.plot_waveform_envelope),
("Spectrogram", audio_visualizer.plot_spectrogram),
("Mel Spectrogram", audio_visualizer.plot_mel_spectrogram),
("Chromagram", audio_visualizer.plot_chromagram),
("Tonnetz", audio_visualizer.plot_tonnetz),
("Spectral Centroid", audio_visualizer.plot_spectral_centroid),
("Spectral Bandwidth", audio_visualizer.plot_spectral_bandwidth),
("Spectral Rolloff", audio_visualizer.plot_spectral_rolloff),
("RMS Energy", audio_visualizer.plot_rms_energy),
("Zero Crossing Rate", audio_visualizer.plot_zero_crossing_rate),
("Onset Strength", audio_visualizer.plot_onset_strength),
("Beat Tracking", audio_visualizer.plot_beat_track),
("Tempogram", audio_visualizer.plot_tempogram),
("MFCCs", audio_visualizer.plot_mfcc),
("Spectral Contrast", audio_visualizer.plot_spectral_contrast),
("Harmonic/Percussive", audio_visualizer.plot_harmonic_percussive),
("Frequency Bands", audio_visualizer.plot_frequency_bands),
("Dynamic Range", audio_visualizer.plot_dynamic_range),
("Spectral Flatness", audio_visualizer.plot_spectral_flatness),
("Combined Dashboard", audio_visualizer.plot_combined_dashboard),
("3D Spectrogram", audio_visualizer.plot_3d_spectrogram),
]
DPI_OPTIONS = {
"Normal (150 DPI)": 150,
"High (200 DPI)": 200,
"Ultra (300 DPI)": 300,
}
def generate_visualizations(audio_path, quality, progress=gr.Progress()):
"""Generate all 22 visualizations and return gallery images + zip file."""
if audio_path is None:
raise gr.Error("Please upload an audio file first.")
# Set DPI
audio_visualizer.FIGURE_DPI = DPI_OPTIONS.get(quality, 150)
# Load audio
progress(0, desc="Loading audio...")
y, sr = audio_visualizer.load_audio(audio_path)
duration = librosa.get_duration(y=y, sr=sr)
audio_file = Path(audio_path)
title = audio_file.stem
# Create temp output directory
output_tmp = tempfile.mkdtemp(prefix="avis_output_")
output_dir = Path(output_tmp)
# Generate each visualization
total = len(VISUALIZATIONS)
image_paths = []
for i, (name, func) in enumerate(VISUALIZATIONS):
progress((i) / total, desc=f"Generating: {name} ({i + 1}/{total})...")
if func == audio_visualizer.plot_combined_dashboard:
func(y, sr, output_dir, base_path=audio_file)
else:
func(y, sr, output_dir)
plt.close('all')
gc.collect()
# Create visualization guide
progress(0.95, desc="Creating visualization guide...")
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
audio_visualizer.create_visualization_guide(output_dir, duration, tempo, title)
# Collect all PNG paths (sorted by filename for correct order)
image_paths = sorted(output_dir.glob("*.png"))
# Create zip file
progress(0.98, desc="Creating zip archive...")
zip_tmp = tempfile.mkdtemp(prefix="avis_zip_")
zip_base = Path(zip_tmp) / f"{title}_visualizations"
zip_path = shutil.make_archive(str(zip_base), 'zip', output_dir)
progress(1.0, desc="Done!")
return image_paths, zip_path
# --- Build the Gradio interface ---
with gr.Blocks(
title="Audio Visualizer",
theme=gr.themes.Soft(),
) as demo:
gr.Markdown(
"""
# Audio Visualizer β€” Let Claude Hear Your Music
Upload any audio file to generate **22 visualizations** that translate sound into sight.
Download the zip and share it with Claude to let AI "listen" to your music.
"""
)
with gr.Row():
with gr.Column(scale=1):
audio_input = gr.Audio(
type="filepath",
label="Upload Audio File",
)
quality_radio = gr.Radio(
choices=list(DPI_OPTIONS.keys()),
value="Normal (150 DPI)",
label="Quality",
)
generate_btn = gr.Button("Generate Visualizations", variant="primary")
with gr.Column(scale=1):
gr.Markdown(
"""
### How it works
1. **Upload** an MP3, WAV, FLAC, OGG, or other audio file
2. **Choose quality** β€” higher DPI = sharper images but slower
3. **Click Generate** and wait for all 22 visualizations
4. **Download the zip** and upload it to a Claude conversation
Claude can analyze these images to describe the music's rhythm,
melody, dynamics, and texture β€” even though it can't hear the
audio directly.
"""
)
gallery = gr.Gallery(
label="Visualizations",
columns=4,
object_fit="contain",
height="auto",
)
zip_download = gr.File(label="Download All (Zip)")
generate_btn.click(
fn=generate_visualizations,
inputs=[audio_input, quality_radio],
outputs=[gallery, zip_download],
)
if __name__ == "__main__":
demo.launch()