Spaces:

AnimalMonk
/

audio-mastering-suite

Running

App Files Files Community

AnimalMonk commited on Mar 10

Commit

9c754af

verified ·

1 Parent(s): 255dc57

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

.gitignore +4 -0
CHANGELOG.md +14 -0
analysis.py +300 -0
app.py +98 -1
requirements.txt +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+__pycache__/
+*.pyc
+*.wav
+nul

CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,19 @@
 # Changelog — Audio Mastering Suite
 ## v3.2 — 2026-03-04
 ### Genre Expansion

 # Changelog — Audio Mastering Suite
+## v3.3 — 2026-03-10
+### AI Analysis (Gemini Pro 3.1)
+- **AI Recommend button** — Analyzes uploaded audio (spectral profile, dynamics, stereo field) and recommends optimal mastering settings via Google Gemini Pro 3.1
+- **Apply AI Settings** — One-click button to populate all 7 sliders with AI-recommended values
+- **Post-master AI report** — After mastering, Gemini compares original vs mastered audio and provides a quality assessment with actionable feedback
+- **Audio feature extraction** — New `analysis.py` module: spectral centroid, spectral rolloff, 6-band energy distribution, crest factor, dynamic range, stereo correlation
+- **Graceful degradation** — If `GOOGLE_API_KEY` is not set, AI features show a helpful message instead of crashing
+### Dependencies
+- Added `google-generativeai>=0.8.0`
+---
 ## v3.2 — 2026-03-04
 ### Genre Expansion

analysis.py ADDED Viewed

	@@ -0,0 +1,300 @@

+"""AI-powered audio analysis using Gemini Pro — feature extraction and recommendations."""
+import json
+import os
+import numpy as np
+from scipy.signal import welch
+from dsp import load_audio, map_compression
+from loudness import measure_loudness, measure_true_peak
+# ---------------------------------------------------------------------------
+# Audio feature extraction
+# ---------------------------------------------------------------------------
+_BANDS = [
+    ("Sub-bass", 20, 60),
+    ("Bass", 60, 250),
+    ("Low-Mids", 250, 500),
+    ("Mids", 500, 2000),
+    ("Upper-Mids", 2000, 6000),
+    ("Highs", 6000, 20000),
+]
+def extract_features(audio, sample_rate):
+    """Extract audio features for AI analysis.
+    Args:
+        audio: numpy array, shape (samples,) or (samples, channels).
+        sample_rate: int.
+    Returns:
+        dict with spectral, dynamic, and stereo measurements.
+    """
+    # Convert to mono for spectral analysis
+    if audio.ndim == 2:
+        mono = audio.mean(axis=1)
+    else:
+        mono = audio
+    # --- Spectral analysis via Welch ---
+    nperseg = min(8192, len(mono))
+    freqs, psd = welch(mono, fs=sample_rate, nperseg=nperseg)
+    # Spectral centroid
+    total_energy = np.sum(psd)
+    if total_energy > 0:
+        spectral_centroid = float(np.sum(freqs * psd) / total_energy)
+    else:
+        spectral_centroid = 0.0
+    # Spectral rolloff (85%)
+    cumulative = np.cumsum(psd)
+    if total_energy > 0:
+        rolloff_idx = np.searchsorted(cumulative, 0.85 * total_energy)
+        spectral_rolloff = float(freqs[min(rolloff_idx, len(freqs) - 1)])
+    else:
+        spectral_rolloff = 0.0
+    # Band energy distribution (dB)
+    band_energy = {}
+    for name, lo, hi in _BANDS:
+        mask = (freqs >= lo) & (freqs < hi)
+        band_rms = np.sqrt(np.mean(psd[mask])) if np.any(mask) else 0.0
+        if band_rms > 0:
+            band_energy[name] = round(20.0 * np.log10(band_rms), 1)
+        else:
+            band_energy[name] = -100.0
+    # --- Dynamics ---
+    rms = np.sqrt(np.mean(mono ** 2))
+    peak = np.max(np.abs(mono))
+    rms_db = round(20.0 * np.log10(rms), 1) if rms > 0 else -100.0
+    peak_db = round(20.0 * np.log10(peak), 1) if peak > 0 else -100.0
+    crest_factor = round(peak_db - rms_db, 1)
+    dynamic_range = crest_factor  # simplified: same as crest factor for full-file
+    # --- Stereo correlation ---
+    is_mono = audio.ndim == 1 or audio.shape[1] == 1
+    if not is_mono:
+        left = audio[:, 0]
+        right = audio[:, 1]
+        correlation = np.corrcoef(left, right)[0, 1]
+        stereo_correlation = round(float(correlation), 3)
+    else:
+        stereo_correlation = None
+    # --- Loudness (reuse existing functions) ---
+    lufs = measure_loudness(audio, sample_rate)
+    true_peak = measure_true_peak(audio, sample_rate)
+    return {
+        "spectral_centroid_hz": round(spectral_centroid, 1),
+        "spectral_rolloff_hz": round(spectral_rolloff, 1),
+        "band_energy": band_energy,
+        "rms_db": rms_db,
+        "peak_db": peak_db,
+        "crest_factor_db": crest_factor,
+        "dynamic_range_db": dynamic_range,
+        "stereo_correlation": stereo_correlation,
+        "lufs": round(lufs, 1) if not np.isinf(lufs) else -100.0,
+        "true_peak_dbtp": true_peak,
+        "is_mono": is_mono,
+    }
+# ---------------------------------------------------------------------------
+# Gemini API wrapper
+# ---------------------------------------------------------------------------
+def _get_gemini_model():
+    """Initialize and return the Gemini model, or None if no API key."""
+    api_key = os.environ.get("GOOGLE_API_KEY")
+    if not api_key:
+        return None
+    try:
+        import google.generativeai as genai
+        genai.configure(api_key=api_key)
+        return genai.GenerativeModel("gemini-2.5-pro")
+    except Exception:
+        return None
+def _call_gemini(system_prompt, user_prompt):
+    """Call Gemini and return the response text."""
+    model = _get_gemini_model()
+    if model is None:
+        return None
+    try:
+        response = model.generate_content(
+            [{"role": "user", "parts": [f"{system_prompt}\n\n{user_prompt}"]}]
+        )
+        return response.text
+    except Exception as e:
+        return f"*AI analysis unavailable: {e}*"
+# ---------------------------------------------------------------------------
+# Phase 1: AI-recommended settings
+# ---------------------------------------------------------------------------
+_RECOMMEND_SYSTEM = """You are an expert audio mastering engineer. Analyze the audio measurements below and recommend optimal mastering settings.
+You have access to these controls:
+- Lows: Low shelf at 200 Hz, range -3.0 to +3.0 dB, step 0.5 dB
+- Mids: Peak filter at 1.2 kHz (Q=1.0), range -3.0 to +3.0 dB, step 0.1 dB
+- Highs: High shelf at 10 kHz (Q=0.7), range -3.0 to +3.0 dB, step 0.5 dB
+- Bass Boost: Peak filter (Q=2.0), range 0 to +3.0 dB, step 0.5 dB
+- Bass Frequency: Center frequency for bass boost, range 40-100 Hz, step 1 Hz
+- Compression: 0 (light) to 100 (heavy). Maps to: threshold -14 to -22 dB, ratio 1.1:1 to 2.5:1, release 250ms to 100ms, fixed 30ms attack
+- Stereo Width: 80% (narrow) to 150% (wide). 100% = no change. M/S encoding above 200 Hz only.
+Return ONLY a valid JSON object with these exact keys and a "reasoning" field containing a brief markdown explanation (3-5 bullet points):
+{
+  "lows_db": number,
+  "mid_boost_db": number,
+  "highs_db": number,
+  "bass_boost_db": number,
+  "bass_freq_hz": integer,
+  "compression": integer,
+  "stereo_width": integer,
+  "reasoning": "markdown string"
+}
+Keep values within the valid ranges. Be conservative — subtle moves are better than aggressive ones."""
+def recommend_settings(audio_path):
+    """Analyze raw audio and return AI-recommended mastering settings.
+    Args:
+        audio_path: path to the uploaded audio file.
+    Returns:
+        dict with recommended slider values and reasoning markdown,
+        or None if AI is unavailable.
+    """
+    audio, sr = load_audio(audio_path)
+    features = extract_features(audio, sr)
+    user_prompt = f"""Analyze this audio and recommend mastering settings:
+**Audio Measurements:**
+- Integrated Loudness: {features['lufs']} LUFS
+- True Peak: {features['true_peak_dbtp']} dBTP
+- RMS Level: {features['rms_db']} dB
+- Crest Factor: {features['crest_factor_db']} dB
+- Spectral Centroid: {features['spectral_centroid_hz']} Hz
+- Spectral Rolloff (85%): {features['spectral_rolloff_hz']} Hz
+- Stereo Correlation: {features['stereo_correlation'] if features['stereo_correlation'] is not None else 'N/A (mono)'}
+- Mono: {features['is_mono']}
+**Band Energy (dB):**
+{chr(10).join(f'- {k}: {v} dB' for k, v in features['band_energy'].items())}
+Return the JSON object with recommended settings."""
+    response = _call_gemini(_RECOMMEND_SYSTEM, user_prompt)
+    if response is None:
+        return None
+    # Parse JSON from response (Gemini may wrap it in markdown code fence)
+    try:
+        text = response.strip()
+        if text.startswith("```"):
+            # Strip markdown code fence
+            lines = text.split("\n")
+            text = "\n".join(lines[1:-1])
+        result = json.loads(text)
+        # Clamp values to valid ranges
+        result["lows_db"] = max(-3.0, min(3.0, float(result.get("lows_db", 0))))
+        result["mid_boost_db"] = max(-3.0, min(3.0, float(result.get("mid_boost_db", 0))))
+        result["highs_db"] = max(-3.0, min(3.0, float(result.get("highs_db", 0))))
+        result["bass_boost_db"] = max(0, min(3.0, float(result.get("bass_boost_db", 0))))
+        result["bass_freq_hz"] = max(40, min(100, int(result.get("bass_freq_hz", 60))))
+        result["compression"] = max(0, min(100, int(result.get("compression", 50))))
+        result["stereo_width"] = max(80, min(150, int(result.get("stereo_width", 100))))
+        if "reasoning" not in result:
+            result["reasoning"] = "*No explanation provided.*"
+        return result
+    except (json.JSONDecodeError, KeyError, TypeError):
+        return {"reasoning": response, "parse_error": True}
+# ---------------------------------------------------------------------------
+# Phase 2: Post-master comparison report
+# ---------------------------------------------------------------------------
+_COMPARE_SYSTEM = """You are an expert audio mastering engineer reviewing a completed master. Compare the original and mastered audio measurements below. Assess whether the mastering improved the audio quality.
+Format your response as markdown with these sections:
+### Overall Assessment
+(1-2 sentences)
+### What Worked Well
+(bullet points)
+### Suggested Improvements
+(bullet points with specific slider recommendations if applicable)
+### Technical Notes
+(any concerns about dynamics, phase, or frequency balance)
+Be concise and specific. Reference actual measurement changes."""
+def compare_master(original, mastered, sample_rate, settings_dict):
+    """Compare original vs mastered audio and return AI quality report.
+    Args:
+        original: numpy array of original audio.
+        mastered: numpy array of mastered audio.
+        sample_rate: int.
+        settings_dict: dict with the mastering settings that were applied.
+    Returns:
+        str: markdown-formatted comparison report, or fallback message.
+    """
+    orig_features = extract_features(original, sample_rate)
+    mast_features = extract_features(mastered, sample_rate)
+    # Build the compression details from slider value
+    comp_val = settings_dict.get("compression", 50)
+    threshold, ratio, attack, release = map_compression(comp_val)
+    user_prompt = f"""Compare the original and mastered audio:
+**ORIGINAL Audio:**
+- Loudness: {orig_features['lufs']} LUFS | True Peak: {orig_features['true_peak_dbtp']} dBTP
+- RMS: {orig_features['rms_db']} dB | Crest Factor: {orig_features['crest_factor_db']} dB
+- Spectral Centroid: {orig_features['spectral_centroid_hz']} Hz | Rolloff: {orig_features['spectral_rolloff_hz']} Hz
+- Stereo Correlation: {orig_features['stereo_correlation'] if orig_features['stereo_correlation'] is not None else 'N/A (mono)'}
+- Band Energy: {json.dumps(orig_features['band_energy'])}
+**MASTERED Audio:**
+- Loudness: {mast_features['lufs']} LUFS | True Peak: {mast_features['true_peak_dbtp']} dBTP
+- RMS: {mast_features['rms_db']} dB | Crest Factor: {mast_features['crest_factor_db']} dB
+- Spectral Centroid: {mast_features['spectral_centroid_hz']} Hz | Rolloff: {mast_features['spectral_rolloff_hz']} Hz
+- Stereo Correlation: {mast_features['stereo_correlation'] if mast_features['stereo_correlation'] is not None else 'N/A (mono)'}
+- Band Energy: {json.dumps(mast_features['band_energy'])}
+**Settings Applied:**
+- Lows (200 Hz shelf): {settings_dict.get('lows_db', 0)} dB
+- Mids (1.2 kHz peak): {settings_dict.get('mid_boost_db', 0)} dB
+- Highs (10 kHz shelf): {settings_dict.get('highs_db', 0)} dB
+- Bass Boost: {settings_dict.get('bass_boost_db', 0)} dB @ {settings_dict.get('bass_freq_hz', 60)} Hz
+- Compression: slider {comp_val} → threshold {threshold:.1f} dB, ratio {ratio:.1f}:1, attack {attack:.0f} ms, release {release:.0f} ms
+- Stereo Width: {settings_dict.get('stereo_width', 100)}%
+- Target LUFS: {settings_dict.get('target_lufs', -14)}"""
+    response = _call_gemini(_COMPARE_SYSTEM, user_prompt)
+    if response is None:
+        return "*Set GOOGLE_API_KEY to enable AI comparison report.*"
+    return response

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import gradio as gr
 from dsp import master_audio
 from presets import PRESETS
 from visualization import plot_waveform_comparison, plot_spectrum_comparison
 # ---------------------------------------------------------------------------
@@ -35,6 +36,49 @@ def toggle_custom_lufs(target_choice):
     return gr.update(visible=(target_choice == "Custom"))
 def process(audio_path, lows_db, mid_boost_db, highs_db, bass_boost_db, bass_freq_hz,
             comp_val, width, target_choice, custom_lufs):
     """Run the mastering pipeline and return all outputs."""
@@ -69,11 +113,25 @@ def process(audio_path, lows_db, mid_boost_db, highs_db, bass_boost_db, bass_fre
         f"{mono_note}"
     )
     return (
         output_path,
         waveform_fig, spectrum_fig,
         stats_md,
         gr.DownloadButton("Download Mastered File", value=output_path, visible=True),
     )
@@ -93,13 +151,14 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
         '</div></div>'
     )
-    # --- Preset & Target LUFS (side by side) ---
     with gr.Row():
         preset_dropdown = gr.Dropdown(
             label="Preset",
             choices=list(PRESETS.keys()),
             value="-- None --",
         )
         target_dropdown = gr.Dropdown(
             label="Target LUFS",
             choices=["-14 (Streaming)", "-11 (CD)", "Custom"],
@@ -156,6 +215,19 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
                 minimum=80, maximum=150, value=100, step=1,
             )
     # --- Playback ---
     ab_player = gr.Audio(label="Mastered", interactive=False)
     download_file = gr.DownloadButton("Download Mastered File", visible=False)
@@ -166,6 +238,7 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
         spectrum_plot = gr.Plot(label="Spectrum Comparison")
     stats_display = gr.Markdown()
     # --- Event wiring ---
     preset_dropdown.change(
@@ -182,6 +255,29 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
         outputs=[custom_lufs_input],
     )
     master_btn.click(
         process,
         inputs=[
@@ -194,6 +290,7 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
             ab_player,
             waveform_plot, spectrum_plot,
             stats_display, download_file,
         ],
     )

 from dsp import master_audio
 from presets import PRESETS
 from visualization import plot_waveform_comparison, plot_spectrum_comparison
+from analysis import recommend_settings, compare_master
 # ---------------------------------------------------------------------------
     return gr.update(visible=(target_choice == "Custom"))
+def ai_recommend(audio_path):
+    """Analyze raw audio and return AI-recommended settings + reasoning."""
+    if audio_path is None:
+        raise gr.Error("Please upload an audio file first.")
+    result = recommend_settings(audio_path)
+    if result is None:
+        return (
+            gr.update(), gr.update(), gr.update(),
+            gr.update(), gr.update(), gr.update(),
+            gr.update(),
+            "*Set GOOGLE_API_KEY to enable AI recommendations.*",
+            gr.update(visible=False),
+        )
+    if result.get("parse_error"):
+        return (
+            gr.update(), gr.update(), gr.update(),
+            gr.update(), gr.update(), gr.update(),
+            gr.update(),
+            result.get("reasoning", "*Could not parse AI response.*"),
+            gr.update(visible=False),
+        )
+    return (
+        result["lows_db"],
+        result["mid_boost_db"],
+        result["highs_db"],
+        result["bass_boost_db"],
+        result["bass_freq_hz"],
+        result["compression"],
+        result["stereo_width"],
+        result.get("reasoning", ""),
+        gr.update(visible=True),
+    )
+def apply_ai(ai_lows, ai_mids, ai_highs, ai_bass, ai_freq, ai_comp, ai_width):
+    """Populate sliders with AI-recommended values stored in State."""
+    return ai_lows, ai_mids, ai_highs, ai_bass, ai_freq, ai_comp, ai_width
 def process(audio_path, lows_db, mid_boost_db, highs_db, bass_boost_db, bass_freq_hz,
             comp_val, width, target_choice, custom_lufs):
     """Run the mastering pipeline and return all outputs."""
         f"{mono_note}"
     )
+    # AI comparison report
+    settings_dict = {
+        "lows_db": lows_db,
+        "mid_boost_db": mid_boost_db,
+        "highs_db": highs_db,
+        "bass_boost_db": bass_boost_db,
+        "bass_freq_hz": bass_freq_hz,
+        "compression": comp_val,
+        "stereo_width": width,
+        "target_lufs": target,
+    }
+    ai_report = compare_master(original, mastered, sr, settings_dict)
     return (
         output_path,
         waveform_fig, spectrum_fig,
         stats_md,
         gr.DownloadButton("Download Mastered File", value=output_path, visible=True),
+        ai_report,
     )
         '</div></div>'
     )
+    # --- Preset, AI Recommend & Target LUFS ---
     with gr.Row():
         preset_dropdown = gr.Dropdown(
             label="Preset",
             choices=list(PRESETS.keys()),
             value="-- None --",
         )
+        ai_recommend_btn = gr.Button("AI Recommend", variant="secondary")
         target_dropdown = gr.Dropdown(
             label="Target LUFS",
             choices=["-14 (Streaming)", "-11 (CD)", "Custom"],
                 minimum=80, maximum=150, value=100, step=1,
             )
+    # --- AI Recommendations ---
+    ai_reasoning_display = gr.Markdown(value="", visible=True)
+    apply_ai_btn = gr.Button("Apply AI Settings", variant="secondary", visible=False)
+    # --- Hidden states for AI-recommended values ---
+    ai_lows_state = gr.State(0.0)
+    ai_mids_state = gr.State(0.0)
+    ai_highs_state = gr.State(0.0)
+    ai_bass_state = gr.State(0.0)
+    ai_freq_state = gr.State(60)
+    ai_comp_state = gr.State(50)
+    ai_width_state = gr.State(100)
     # --- Playback ---
     ab_player = gr.Audio(label="Mastered", interactive=False)
     download_file = gr.DownloadButton("Download Mastered File", visible=False)
         spectrum_plot = gr.Plot(label="Spectrum Comparison")
     stats_display = gr.Markdown()
+    ai_report_display = gr.Markdown(value="", visible=True)
     # --- Event wiring ---
     preset_dropdown.change(
         outputs=[custom_lufs_input],
     )
+    ai_recommend_btn.click(
+        ai_recommend,
+        inputs=[audio_input],
+        outputs=[
+            ai_lows_state, ai_mids_state, ai_highs_state,
+            ai_bass_state, ai_freq_state, ai_comp_state, ai_width_state,
+            ai_reasoning_display, apply_ai_btn,
+        ],
+    )
+    apply_ai_btn.click(
+        apply_ai,
+        inputs=[
+            ai_lows_state, ai_mids_state, ai_highs_state,
+            ai_bass_state, ai_freq_state, ai_comp_state, ai_width_state,
+        ],
+        outputs=[
+            lows_slider, mid_boost_slider, highs_slider,
+            bass_boost_slider, bass_freq_slider,
+            comp_slider, width_slider,
+        ],
+    )
     master_btn.click(
         process,
         inputs=[
             ab_player,
             waveform_plot, spectrum_plot,
             stats_display, download_file,
+            ai_report_display,
         ],
     )

requirements.txt CHANGED Viewed

@@ -5,3 +5,4 @@ numpy>=1.24.0
 soundfile>=0.12.0
 matplotlib>=3.7.0
 scipy>=1.10.0

 soundfile>=0.12.0
 matplotlib>=3.7.0
 scipy>=1.10.0
+google-generativeai>=0.8.0