#!/usr/bin/env python3 """ Audio Visualizer — Gradio Web Interface for Hugging Face Spaces Upload audio, get 22 visualizations + a zip download ready for Claude. """ import matplotlib matplotlib.use('Agg') # MUST be before any pyplot/librosa import import gc import shutil import tempfile from pathlib import Path import gradio as gr import librosa import matplotlib.pyplot as plt import numpy as np import audio_visualizer # All 22 visualization functions in order (mirrors the GUI list) VISUALIZATIONS = [ ("Waveform", audio_visualizer.plot_waveform), ("Volume Envelope", audio_visualizer.plot_waveform_envelope), ("Spectrogram", audio_visualizer.plot_spectrogram), ("Mel Spectrogram", audio_visualizer.plot_mel_spectrogram), ("Chromagram", audio_visualizer.plot_chromagram), ("Tonnetz", audio_visualizer.plot_tonnetz), ("Spectral Centroid", audio_visualizer.plot_spectral_centroid), ("Spectral Bandwidth", audio_visualizer.plot_spectral_bandwidth), ("Spectral Rolloff", audio_visualizer.plot_spectral_rolloff), ("RMS Energy", audio_visualizer.plot_rms_energy), ("Zero Crossing Rate", audio_visualizer.plot_zero_crossing_rate), ("Onset Strength", audio_visualizer.plot_onset_strength), ("Beat Tracking", audio_visualizer.plot_beat_track), ("Tempogram", audio_visualizer.plot_tempogram), ("MFCCs", audio_visualizer.plot_mfcc), ("Spectral Contrast", audio_visualizer.plot_spectral_contrast), ("Harmonic/Percussive", audio_visualizer.plot_harmonic_percussive), ("Frequency Bands", audio_visualizer.plot_frequency_bands), ("Dynamic Range", audio_visualizer.plot_dynamic_range), ("Spectral Flatness", audio_visualizer.plot_spectral_flatness), ("Combined Dashboard", audio_visualizer.plot_combined_dashboard), ("3D Spectrogram", audio_visualizer.plot_3d_spectrogram), ] DPI_OPTIONS = { "Normal (150 DPI)": 150, "High (200 DPI)": 200, "Ultra (300 DPI)": 300, } def generate_visualizations(audio_path, quality, progress=gr.Progress()): """Generate all 22 visualizations and return gallery images + zip file.""" if audio_path is None: raise gr.Error("Please upload an audio file first.") # Set DPI audio_visualizer.FIGURE_DPI = DPI_OPTIONS.get(quality, 150) # Load audio progress(0, desc="Loading audio...") y, sr = audio_visualizer.load_audio(audio_path) duration = librosa.get_duration(y=y, sr=sr) audio_file = Path(audio_path) title = audio_file.stem # Create temp output directory output_tmp = tempfile.mkdtemp(prefix="avis_output_") output_dir = Path(output_tmp) # Generate each visualization total = len(VISUALIZATIONS) image_paths = [] for i, (name, func) in enumerate(VISUALIZATIONS): progress((i) / total, desc=f"Generating: {name} ({i + 1}/{total})...") if func == audio_visualizer.plot_combined_dashboard: func(y, sr, output_dir, base_path=audio_file) else: func(y, sr, output_dir) plt.close('all') gc.collect() # Create visualization guide progress(0.95, desc="Creating visualization guide...") tempo, _ = librosa.beat.beat_track(y=y, sr=sr) audio_visualizer.create_visualization_guide(output_dir, duration, tempo, title) # Collect all PNG paths (sorted by filename for correct order) image_paths = sorted(output_dir.glob("*.png")) # Create zip file progress(0.98, desc="Creating zip archive...") zip_tmp = tempfile.mkdtemp(prefix="avis_zip_") zip_base = Path(zip_tmp) / f"{title}_visualizations" zip_path = shutil.make_archive(str(zip_base), 'zip', output_dir) progress(1.0, desc="Done!") return image_paths, zip_path # --- Build the Gradio interface --- with gr.Blocks( title="Audio Visualizer", theme=gr.themes.Soft(), ) as demo: gr.Markdown( """ # Audio Visualizer — Let Claude Hear Your Music Upload any audio file to generate **22 visualizations** that translate sound into sight. Download the zip and share it with Claude to let AI "listen" to your music. """ ) with gr.Row(): with gr.Column(scale=1): audio_input = gr.Audio( type="filepath", label="Upload Audio File", ) quality_radio = gr.Radio( choices=list(DPI_OPTIONS.keys()), value="Normal (150 DPI)", label="Quality", ) generate_btn = gr.Button("Generate Visualizations", variant="primary") with gr.Column(scale=1): gr.Markdown( """ ### How it works 1. **Upload** an MP3, WAV, FLAC, OGG, or other audio file 2. **Choose quality** — higher DPI = sharper images but slower 3. **Click Generate** and wait for all 22 visualizations 4. **Download the zip** and upload it to a Claude conversation Claude can analyze these images to describe the music's rhythm, melody, dynamics, and texture — even though it can't hear the audio directly. """ ) gallery = gr.Gallery( label="Visualizations", columns=4, object_fit="contain", height="auto", ) zip_download = gr.File(label="Download All (Zip)") generate_btn.click( fn=generate_visualizations, inputs=[audio_input, quality_radio], outputs=[gallery, zip_download], ) if __name__ == "__main__": demo.launch()