Upload folder using huggingface_hub
Browse files- .gitignore +4 -0
- CHANGELOG.md +14 -0
- analysis.py +300 -0
- app.py +98 -1
- requirements.txt +1 -0
.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
*.wav
|
| 4 |
+
nul
|
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
| 1 |
# Changelog β Audio Mastering Suite
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
## v3.2 β 2026-03-04
|
| 4 |
|
| 5 |
### Genre Expansion
|
|
|
|
| 1 |
# Changelog β Audio Mastering Suite
|
| 2 |
|
| 3 |
+
## v3.3 β 2026-03-10
|
| 4 |
+
|
| 5 |
+
### AI Analysis (Gemini Pro 3.1)
|
| 6 |
+
- **AI Recommend button** β Analyzes uploaded audio (spectral profile, dynamics, stereo field) and recommends optimal mastering settings via Google Gemini Pro 3.1
|
| 7 |
+
- **Apply AI Settings** β One-click button to populate all 7 sliders with AI-recommended values
|
| 8 |
+
- **Post-master AI report** β After mastering, Gemini compares original vs mastered audio and provides a quality assessment with actionable feedback
|
| 9 |
+
- **Audio feature extraction** β New `analysis.py` module: spectral centroid, spectral rolloff, 6-band energy distribution, crest factor, dynamic range, stereo correlation
|
| 10 |
+
- **Graceful degradation** β If `GOOGLE_API_KEY` is not set, AI features show a helpful message instead of crashing
|
| 11 |
+
|
| 12 |
+
### Dependencies
|
| 13 |
+
- Added `google-generativeai>=0.8.0`
|
| 14 |
+
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
## v3.2 β 2026-03-04
|
| 18 |
|
| 19 |
### Genre Expansion
|
analysis.py
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""AI-powered audio analysis using Gemini Pro β feature extraction and recommendations."""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import numpy as np
|
| 6 |
+
from scipy.signal import welch
|
| 7 |
+
|
| 8 |
+
from dsp import load_audio, map_compression
|
| 9 |
+
from loudness import measure_loudness, measure_true_peak
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# ---------------------------------------------------------------------------
|
| 13 |
+
# Audio feature extraction
|
| 14 |
+
# ---------------------------------------------------------------------------
|
| 15 |
+
|
| 16 |
+
_BANDS = [
|
| 17 |
+
("Sub-bass", 20, 60),
|
| 18 |
+
("Bass", 60, 250),
|
| 19 |
+
("Low-Mids", 250, 500),
|
| 20 |
+
("Mids", 500, 2000),
|
| 21 |
+
("Upper-Mids", 2000, 6000),
|
| 22 |
+
("Highs", 6000, 20000),
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def extract_features(audio, sample_rate):
|
| 27 |
+
"""Extract audio features for AI analysis.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
audio: numpy array, shape (samples,) or (samples, channels).
|
| 31 |
+
sample_rate: int.
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
dict with spectral, dynamic, and stereo measurements.
|
| 35 |
+
"""
|
| 36 |
+
# Convert to mono for spectral analysis
|
| 37 |
+
if audio.ndim == 2:
|
| 38 |
+
mono = audio.mean(axis=1)
|
| 39 |
+
else:
|
| 40 |
+
mono = audio
|
| 41 |
+
|
| 42 |
+
# --- Spectral analysis via Welch ---
|
| 43 |
+
nperseg = min(8192, len(mono))
|
| 44 |
+
freqs, psd = welch(mono, fs=sample_rate, nperseg=nperseg)
|
| 45 |
+
|
| 46 |
+
# Spectral centroid
|
| 47 |
+
total_energy = np.sum(psd)
|
| 48 |
+
if total_energy > 0:
|
| 49 |
+
spectral_centroid = float(np.sum(freqs * psd) / total_energy)
|
| 50 |
+
else:
|
| 51 |
+
spectral_centroid = 0.0
|
| 52 |
+
|
| 53 |
+
# Spectral rolloff (85%)
|
| 54 |
+
cumulative = np.cumsum(psd)
|
| 55 |
+
if total_energy > 0:
|
| 56 |
+
rolloff_idx = np.searchsorted(cumulative, 0.85 * total_energy)
|
| 57 |
+
spectral_rolloff = float(freqs[min(rolloff_idx, len(freqs) - 1)])
|
| 58 |
+
else:
|
| 59 |
+
spectral_rolloff = 0.0
|
| 60 |
+
|
| 61 |
+
# Band energy distribution (dB)
|
| 62 |
+
band_energy = {}
|
| 63 |
+
for name, lo, hi in _BANDS:
|
| 64 |
+
mask = (freqs >= lo) & (freqs < hi)
|
| 65 |
+
band_rms = np.sqrt(np.mean(psd[mask])) if np.any(mask) else 0.0
|
| 66 |
+
if band_rms > 0:
|
| 67 |
+
band_energy[name] = round(20.0 * np.log10(band_rms), 1)
|
| 68 |
+
else:
|
| 69 |
+
band_energy[name] = -100.0
|
| 70 |
+
|
| 71 |
+
# --- Dynamics ---
|
| 72 |
+
rms = np.sqrt(np.mean(mono ** 2))
|
| 73 |
+
peak = np.max(np.abs(mono))
|
| 74 |
+
|
| 75 |
+
rms_db = round(20.0 * np.log10(rms), 1) if rms > 0 else -100.0
|
| 76 |
+
peak_db = round(20.0 * np.log10(peak), 1) if peak > 0 else -100.0
|
| 77 |
+
crest_factor = round(peak_db - rms_db, 1)
|
| 78 |
+
dynamic_range = crest_factor # simplified: same as crest factor for full-file
|
| 79 |
+
|
| 80 |
+
# --- Stereo correlation ---
|
| 81 |
+
is_mono = audio.ndim == 1 or audio.shape[1] == 1
|
| 82 |
+
if not is_mono:
|
| 83 |
+
left = audio[:, 0]
|
| 84 |
+
right = audio[:, 1]
|
| 85 |
+
correlation = np.corrcoef(left, right)[0, 1]
|
| 86 |
+
stereo_correlation = round(float(correlation), 3)
|
| 87 |
+
else:
|
| 88 |
+
stereo_correlation = None
|
| 89 |
+
|
| 90 |
+
# --- Loudness (reuse existing functions) ---
|
| 91 |
+
lufs = measure_loudness(audio, sample_rate)
|
| 92 |
+
true_peak = measure_true_peak(audio, sample_rate)
|
| 93 |
+
|
| 94 |
+
return {
|
| 95 |
+
"spectral_centroid_hz": round(spectral_centroid, 1),
|
| 96 |
+
"spectral_rolloff_hz": round(spectral_rolloff, 1),
|
| 97 |
+
"band_energy": band_energy,
|
| 98 |
+
"rms_db": rms_db,
|
| 99 |
+
"peak_db": peak_db,
|
| 100 |
+
"crest_factor_db": crest_factor,
|
| 101 |
+
"dynamic_range_db": dynamic_range,
|
| 102 |
+
"stereo_correlation": stereo_correlation,
|
| 103 |
+
"lufs": round(lufs, 1) if not np.isinf(lufs) else -100.0,
|
| 104 |
+
"true_peak_dbtp": true_peak,
|
| 105 |
+
"is_mono": is_mono,
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
# ---------------------------------------------------------------------------
|
| 110 |
+
# Gemini API wrapper
|
| 111 |
+
# ---------------------------------------------------------------------------
|
| 112 |
+
|
| 113 |
+
def _get_gemini_model():
|
| 114 |
+
"""Initialize and return the Gemini model, or None if no API key."""
|
| 115 |
+
api_key = os.environ.get("GOOGLE_API_KEY")
|
| 116 |
+
if not api_key:
|
| 117 |
+
return None
|
| 118 |
+
try:
|
| 119 |
+
import google.generativeai as genai
|
| 120 |
+
genai.configure(api_key=api_key)
|
| 121 |
+
return genai.GenerativeModel("gemini-2.5-pro")
|
| 122 |
+
except Exception:
|
| 123 |
+
return None
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def _call_gemini(system_prompt, user_prompt):
|
| 127 |
+
"""Call Gemini and return the response text."""
|
| 128 |
+
model = _get_gemini_model()
|
| 129 |
+
if model is None:
|
| 130 |
+
return None
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
response = model.generate_content(
|
| 134 |
+
[{"role": "user", "parts": [f"{system_prompt}\n\n{user_prompt}"]}]
|
| 135 |
+
)
|
| 136 |
+
return response.text
|
| 137 |
+
except Exception as e:
|
| 138 |
+
return f"*AI analysis unavailable: {e}*"
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# ---------------------------------------------------------------------------
|
| 142 |
+
# Phase 1: AI-recommended settings
|
| 143 |
+
# ---------------------------------------------------------------------------
|
| 144 |
+
|
| 145 |
+
_RECOMMEND_SYSTEM = """You are an expert audio mastering engineer. Analyze the audio measurements below and recommend optimal mastering settings.
|
| 146 |
+
|
| 147 |
+
You have access to these controls:
|
| 148 |
+
- Lows: Low shelf at 200 Hz, range -3.0 to +3.0 dB, step 0.5 dB
|
| 149 |
+
- Mids: Peak filter at 1.2 kHz (Q=1.0), range -3.0 to +3.0 dB, step 0.1 dB
|
| 150 |
+
- Highs: High shelf at 10 kHz (Q=0.7), range -3.0 to +3.0 dB, step 0.5 dB
|
| 151 |
+
- Bass Boost: Peak filter (Q=2.0), range 0 to +3.0 dB, step 0.5 dB
|
| 152 |
+
- Bass Frequency: Center frequency for bass boost, range 40-100 Hz, step 1 Hz
|
| 153 |
+
- Compression: 0 (light) to 100 (heavy). Maps to: threshold -14 to -22 dB, ratio 1.1:1 to 2.5:1, release 250ms to 100ms, fixed 30ms attack
|
| 154 |
+
- Stereo Width: 80% (narrow) to 150% (wide). 100% = no change. M/S encoding above 200 Hz only.
|
| 155 |
+
|
| 156 |
+
Return ONLY a valid JSON object with these exact keys and a "reasoning" field containing a brief markdown explanation (3-5 bullet points):
|
| 157 |
+
{
|
| 158 |
+
"lows_db": number,
|
| 159 |
+
"mid_boost_db": number,
|
| 160 |
+
"highs_db": number,
|
| 161 |
+
"bass_boost_db": number,
|
| 162 |
+
"bass_freq_hz": integer,
|
| 163 |
+
"compression": integer,
|
| 164 |
+
"stereo_width": integer,
|
| 165 |
+
"reasoning": "markdown string"
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
Keep values within the valid ranges. Be conservative β subtle moves are better than aggressive ones."""
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def recommend_settings(audio_path):
|
| 172 |
+
"""Analyze raw audio and return AI-recommended mastering settings.
|
| 173 |
+
|
| 174 |
+
Args:
|
| 175 |
+
audio_path: path to the uploaded audio file.
|
| 176 |
+
|
| 177 |
+
Returns:
|
| 178 |
+
dict with recommended slider values and reasoning markdown,
|
| 179 |
+
or None if AI is unavailable.
|
| 180 |
+
"""
|
| 181 |
+
audio, sr = load_audio(audio_path)
|
| 182 |
+
features = extract_features(audio, sr)
|
| 183 |
+
|
| 184 |
+
user_prompt = f"""Analyze this audio and recommend mastering settings:
|
| 185 |
+
|
| 186 |
+
**Audio Measurements:**
|
| 187 |
+
- Integrated Loudness: {features['lufs']} LUFS
|
| 188 |
+
- True Peak: {features['true_peak_dbtp']} dBTP
|
| 189 |
+
- RMS Level: {features['rms_db']} dB
|
| 190 |
+
- Crest Factor: {features['crest_factor_db']} dB
|
| 191 |
+
- Spectral Centroid: {features['spectral_centroid_hz']} Hz
|
| 192 |
+
- Spectral Rolloff (85%): {features['spectral_rolloff_hz']} Hz
|
| 193 |
+
- Stereo Correlation: {features['stereo_correlation'] if features['stereo_correlation'] is not None else 'N/A (mono)'}
|
| 194 |
+
- Mono: {features['is_mono']}
|
| 195 |
+
|
| 196 |
+
**Band Energy (dB):**
|
| 197 |
+
{chr(10).join(f'- {k}: {v} dB' for k, v in features['band_energy'].items())}
|
| 198 |
+
|
| 199 |
+
Return the JSON object with recommended settings."""
|
| 200 |
+
|
| 201 |
+
response = _call_gemini(_RECOMMEND_SYSTEM, user_prompt)
|
| 202 |
+
if response is None:
|
| 203 |
+
return None
|
| 204 |
+
|
| 205 |
+
# Parse JSON from response (Gemini may wrap it in markdown code fence)
|
| 206 |
+
try:
|
| 207 |
+
text = response.strip()
|
| 208 |
+
if text.startswith("```"):
|
| 209 |
+
# Strip markdown code fence
|
| 210 |
+
lines = text.split("\n")
|
| 211 |
+
text = "\n".join(lines[1:-1])
|
| 212 |
+
result = json.loads(text)
|
| 213 |
+
|
| 214 |
+
# Clamp values to valid ranges
|
| 215 |
+
result["lows_db"] = max(-3.0, min(3.0, float(result.get("lows_db", 0))))
|
| 216 |
+
result["mid_boost_db"] = max(-3.0, min(3.0, float(result.get("mid_boost_db", 0))))
|
| 217 |
+
result["highs_db"] = max(-3.0, min(3.0, float(result.get("highs_db", 0))))
|
| 218 |
+
result["bass_boost_db"] = max(0, min(3.0, float(result.get("bass_boost_db", 0))))
|
| 219 |
+
result["bass_freq_hz"] = max(40, min(100, int(result.get("bass_freq_hz", 60))))
|
| 220 |
+
result["compression"] = max(0, min(100, int(result.get("compression", 50))))
|
| 221 |
+
result["stereo_width"] = max(80, min(150, int(result.get("stereo_width", 100))))
|
| 222 |
+
|
| 223 |
+
if "reasoning" not in result:
|
| 224 |
+
result["reasoning"] = "*No explanation provided.*"
|
| 225 |
+
|
| 226 |
+
return result
|
| 227 |
+
except (json.JSONDecodeError, KeyError, TypeError):
|
| 228 |
+
return {"reasoning": response, "parse_error": True}
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# ---------------------------------------------------------------------------
|
| 232 |
+
# Phase 2: Post-master comparison report
|
| 233 |
+
# ---------------------------------------------------------------------------
|
| 234 |
+
|
| 235 |
+
_COMPARE_SYSTEM = """You are an expert audio mastering engineer reviewing a completed master. Compare the original and mastered audio measurements below. Assess whether the mastering improved the audio quality.
|
| 236 |
+
|
| 237 |
+
Format your response as markdown with these sections:
|
| 238 |
+
### Overall Assessment
|
| 239 |
+
(1-2 sentences)
|
| 240 |
+
|
| 241 |
+
### What Worked Well
|
| 242 |
+
(bullet points)
|
| 243 |
+
|
| 244 |
+
### Suggested Improvements
|
| 245 |
+
(bullet points with specific slider recommendations if applicable)
|
| 246 |
+
|
| 247 |
+
### Technical Notes
|
| 248 |
+
(any concerns about dynamics, phase, or frequency balance)
|
| 249 |
+
|
| 250 |
+
Be concise and specific. Reference actual measurement changes."""
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def compare_master(original, mastered, sample_rate, settings_dict):
|
| 254 |
+
"""Compare original vs mastered audio and return AI quality report.
|
| 255 |
+
|
| 256 |
+
Args:
|
| 257 |
+
original: numpy array of original audio.
|
| 258 |
+
mastered: numpy array of mastered audio.
|
| 259 |
+
sample_rate: int.
|
| 260 |
+
settings_dict: dict with the mastering settings that were applied.
|
| 261 |
+
|
| 262 |
+
Returns:
|
| 263 |
+
str: markdown-formatted comparison report, or fallback message.
|
| 264 |
+
"""
|
| 265 |
+
orig_features = extract_features(original, sample_rate)
|
| 266 |
+
mast_features = extract_features(mastered, sample_rate)
|
| 267 |
+
|
| 268 |
+
# Build the compression details from slider value
|
| 269 |
+
comp_val = settings_dict.get("compression", 50)
|
| 270 |
+
threshold, ratio, attack, release = map_compression(comp_val)
|
| 271 |
+
|
| 272 |
+
user_prompt = f"""Compare the original and mastered audio:
|
| 273 |
+
|
| 274 |
+
**ORIGINAL Audio:**
|
| 275 |
+
- Loudness: {orig_features['lufs']} LUFS | True Peak: {orig_features['true_peak_dbtp']} dBTP
|
| 276 |
+
- RMS: {orig_features['rms_db']} dB | Crest Factor: {orig_features['crest_factor_db']} dB
|
| 277 |
+
- Spectral Centroid: {orig_features['spectral_centroid_hz']} Hz | Rolloff: {orig_features['spectral_rolloff_hz']} Hz
|
| 278 |
+
- Stereo Correlation: {orig_features['stereo_correlation'] if orig_features['stereo_correlation'] is not None else 'N/A (mono)'}
|
| 279 |
+
- Band Energy: {json.dumps(orig_features['band_energy'])}
|
| 280 |
+
|
| 281 |
+
**MASTERED Audio:**
|
| 282 |
+
- Loudness: {mast_features['lufs']} LUFS | True Peak: {mast_features['true_peak_dbtp']} dBTP
|
| 283 |
+
- RMS: {mast_features['rms_db']} dB | Crest Factor: {mast_features['crest_factor_db']} dB
|
| 284 |
+
- Spectral Centroid: {mast_features['spectral_centroid_hz']} Hz | Rolloff: {mast_features['spectral_rolloff_hz']} Hz
|
| 285 |
+
- Stereo Correlation: {mast_features['stereo_correlation'] if mast_features['stereo_correlation'] is not None else 'N/A (mono)'}
|
| 286 |
+
- Band Energy: {json.dumps(mast_features['band_energy'])}
|
| 287 |
+
|
| 288 |
+
**Settings Applied:**
|
| 289 |
+
- Lows (200 Hz shelf): {settings_dict.get('lows_db', 0)} dB
|
| 290 |
+
- Mids (1.2 kHz peak): {settings_dict.get('mid_boost_db', 0)} dB
|
| 291 |
+
- Highs (10 kHz shelf): {settings_dict.get('highs_db', 0)} dB
|
| 292 |
+
- Bass Boost: {settings_dict.get('bass_boost_db', 0)} dB @ {settings_dict.get('bass_freq_hz', 60)} Hz
|
| 293 |
+
- Compression: slider {comp_val} β threshold {threshold:.1f} dB, ratio {ratio:.1f}:1, attack {attack:.0f} ms, release {release:.0f} ms
|
| 294 |
+
- Stereo Width: {settings_dict.get('stereo_width', 100)}%
|
| 295 |
+
- Target LUFS: {settings_dict.get('target_lufs', -14)}"""
|
| 296 |
+
|
| 297 |
+
response = _call_gemini(_COMPARE_SYSTEM, user_prompt)
|
| 298 |
+
if response is None:
|
| 299 |
+
return "*Set GOOGLE_API_KEY to enable AI comparison report.*"
|
| 300 |
+
return response
|
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import gradio as gr
|
|
| 4 |
from dsp import master_audio
|
| 5 |
from presets import PRESETS
|
| 6 |
from visualization import plot_waveform_comparison, plot_spectrum_comparison
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
# ---------------------------------------------------------------------------
|
|
@@ -35,6 +36,49 @@ def toggle_custom_lufs(target_choice):
|
|
| 35 |
return gr.update(visible=(target_choice == "Custom"))
|
| 36 |
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def process(audio_path, lows_db, mid_boost_db, highs_db, bass_boost_db, bass_freq_hz,
|
| 39 |
comp_val, width, target_choice, custom_lufs):
|
| 40 |
"""Run the mastering pipeline and return all outputs."""
|
|
@@ -69,11 +113,25 @@ def process(audio_path, lows_db, mid_boost_db, highs_db, bass_boost_db, bass_fre
|
|
| 69 |
f"{mono_note}"
|
| 70 |
)
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
return (
|
| 73 |
output_path,
|
| 74 |
waveform_fig, spectrum_fig,
|
| 75 |
stats_md,
|
| 76 |
gr.DownloadButton("Download Mastered File", value=output_path, visible=True),
|
|
|
|
| 77 |
)
|
| 78 |
|
| 79 |
|
|
@@ -93,13 +151,14 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
|
|
| 93 |
'</div></div>'
|
| 94 |
)
|
| 95 |
|
| 96 |
-
# --- Preset & Target LUFS
|
| 97 |
with gr.Row():
|
| 98 |
preset_dropdown = gr.Dropdown(
|
| 99 |
label="Preset",
|
| 100 |
choices=list(PRESETS.keys()),
|
| 101 |
value="-- None --",
|
| 102 |
)
|
|
|
|
| 103 |
target_dropdown = gr.Dropdown(
|
| 104 |
label="Target LUFS",
|
| 105 |
choices=["-14 (Streaming)", "-11 (CD)", "Custom"],
|
|
@@ -156,6 +215,19 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
|
|
| 156 |
minimum=80, maximum=150, value=100, step=1,
|
| 157 |
)
|
| 158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
# --- Playback ---
|
| 160 |
ab_player = gr.Audio(label="Mastered", interactive=False)
|
| 161 |
download_file = gr.DownloadButton("Download Mastered File", visible=False)
|
|
@@ -166,6 +238,7 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
|
|
| 166 |
spectrum_plot = gr.Plot(label="Spectrum Comparison")
|
| 167 |
|
| 168 |
stats_display = gr.Markdown()
|
|
|
|
| 169 |
|
| 170 |
# --- Event wiring ---
|
| 171 |
preset_dropdown.change(
|
|
@@ -182,6 +255,29 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
|
|
| 182 |
outputs=[custom_lufs_input],
|
| 183 |
)
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
master_btn.click(
|
| 186 |
process,
|
| 187 |
inputs=[
|
|
@@ -194,6 +290,7 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
|
|
| 194 |
ab_player,
|
| 195 |
waveform_plot, spectrum_plot,
|
| 196 |
stats_display, download_file,
|
|
|
|
| 197 |
],
|
| 198 |
)
|
| 199 |
|
|
|
|
| 4 |
from dsp import master_audio
|
| 5 |
from presets import PRESETS
|
| 6 |
from visualization import plot_waveform_comparison, plot_spectrum_comparison
|
| 7 |
+
from analysis import recommend_settings, compare_master
|
| 8 |
|
| 9 |
|
| 10 |
# ---------------------------------------------------------------------------
|
|
|
|
| 36 |
return gr.update(visible=(target_choice == "Custom"))
|
| 37 |
|
| 38 |
|
| 39 |
+
def ai_recommend(audio_path):
|
| 40 |
+
"""Analyze raw audio and return AI-recommended settings + reasoning."""
|
| 41 |
+
if audio_path is None:
|
| 42 |
+
raise gr.Error("Please upload an audio file first.")
|
| 43 |
+
|
| 44 |
+
result = recommend_settings(audio_path)
|
| 45 |
+
|
| 46 |
+
if result is None:
|
| 47 |
+
return (
|
| 48 |
+
gr.update(), gr.update(), gr.update(),
|
| 49 |
+
gr.update(), gr.update(), gr.update(),
|
| 50 |
+
gr.update(),
|
| 51 |
+
"*Set GOOGLE_API_KEY to enable AI recommendations.*",
|
| 52 |
+
gr.update(visible=False),
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
if result.get("parse_error"):
|
| 56 |
+
return (
|
| 57 |
+
gr.update(), gr.update(), gr.update(),
|
| 58 |
+
gr.update(), gr.update(), gr.update(),
|
| 59 |
+
gr.update(),
|
| 60 |
+
result.get("reasoning", "*Could not parse AI response.*"),
|
| 61 |
+
gr.update(visible=False),
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
return (
|
| 65 |
+
result["lows_db"],
|
| 66 |
+
result["mid_boost_db"],
|
| 67 |
+
result["highs_db"],
|
| 68 |
+
result["bass_boost_db"],
|
| 69 |
+
result["bass_freq_hz"],
|
| 70 |
+
result["compression"],
|
| 71 |
+
result["stereo_width"],
|
| 72 |
+
result.get("reasoning", ""),
|
| 73 |
+
gr.update(visible=True),
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def apply_ai(ai_lows, ai_mids, ai_highs, ai_bass, ai_freq, ai_comp, ai_width):
|
| 78 |
+
"""Populate sliders with AI-recommended values stored in State."""
|
| 79 |
+
return ai_lows, ai_mids, ai_highs, ai_bass, ai_freq, ai_comp, ai_width
|
| 80 |
+
|
| 81 |
+
|
| 82 |
def process(audio_path, lows_db, mid_boost_db, highs_db, bass_boost_db, bass_freq_hz,
|
| 83 |
comp_val, width, target_choice, custom_lufs):
|
| 84 |
"""Run the mastering pipeline and return all outputs."""
|
|
|
|
| 113 |
f"{mono_note}"
|
| 114 |
)
|
| 115 |
|
| 116 |
+
# AI comparison report
|
| 117 |
+
settings_dict = {
|
| 118 |
+
"lows_db": lows_db,
|
| 119 |
+
"mid_boost_db": mid_boost_db,
|
| 120 |
+
"highs_db": highs_db,
|
| 121 |
+
"bass_boost_db": bass_boost_db,
|
| 122 |
+
"bass_freq_hz": bass_freq_hz,
|
| 123 |
+
"compression": comp_val,
|
| 124 |
+
"stereo_width": width,
|
| 125 |
+
"target_lufs": target,
|
| 126 |
+
}
|
| 127 |
+
ai_report = compare_master(original, mastered, sr, settings_dict)
|
| 128 |
+
|
| 129 |
return (
|
| 130 |
output_path,
|
| 131 |
waveform_fig, spectrum_fig,
|
| 132 |
stats_md,
|
| 133 |
gr.DownloadButton("Download Mastered File", value=output_path, visible=True),
|
| 134 |
+
ai_report,
|
| 135 |
)
|
| 136 |
|
| 137 |
|
|
|
|
| 151 |
'</div></div>'
|
| 152 |
)
|
| 153 |
|
| 154 |
+
# --- Preset, AI Recommend & Target LUFS ---
|
| 155 |
with gr.Row():
|
| 156 |
preset_dropdown = gr.Dropdown(
|
| 157 |
label="Preset",
|
| 158 |
choices=list(PRESETS.keys()),
|
| 159 |
value="-- None --",
|
| 160 |
)
|
| 161 |
+
ai_recommend_btn = gr.Button("AI Recommend", variant="secondary")
|
| 162 |
target_dropdown = gr.Dropdown(
|
| 163 |
label="Target LUFS",
|
| 164 |
choices=["-14 (Streaming)", "-11 (CD)", "Custom"],
|
|
|
|
| 215 |
minimum=80, maximum=150, value=100, step=1,
|
| 216 |
)
|
| 217 |
|
| 218 |
+
# --- AI Recommendations ---
|
| 219 |
+
ai_reasoning_display = gr.Markdown(value="", visible=True)
|
| 220 |
+
apply_ai_btn = gr.Button("Apply AI Settings", variant="secondary", visible=False)
|
| 221 |
+
|
| 222 |
+
# --- Hidden states for AI-recommended values ---
|
| 223 |
+
ai_lows_state = gr.State(0.0)
|
| 224 |
+
ai_mids_state = gr.State(0.0)
|
| 225 |
+
ai_highs_state = gr.State(0.0)
|
| 226 |
+
ai_bass_state = gr.State(0.0)
|
| 227 |
+
ai_freq_state = gr.State(60)
|
| 228 |
+
ai_comp_state = gr.State(50)
|
| 229 |
+
ai_width_state = gr.State(100)
|
| 230 |
+
|
| 231 |
# --- Playback ---
|
| 232 |
ab_player = gr.Audio(label="Mastered", interactive=False)
|
| 233 |
download_file = gr.DownloadButton("Download Mastered File", visible=False)
|
|
|
|
| 238 |
spectrum_plot = gr.Plot(label="Spectrum Comparison")
|
| 239 |
|
| 240 |
stats_display = gr.Markdown()
|
| 241 |
+
ai_report_display = gr.Markdown(value="", visible=True)
|
| 242 |
|
| 243 |
# --- Event wiring ---
|
| 244 |
preset_dropdown.change(
|
|
|
|
| 255 |
outputs=[custom_lufs_input],
|
| 256 |
)
|
| 257 |
|
| 258 |
+
ai_recommend_btn.click(
|
| 259 |
+
ai_recommend,
|
| 260 |
+
inputs=[audio_input],
|
| 261 |
+
outputs=[
|
| 262 |
+
ai_lows_state, ai_mids_state, ai_highs_state,
|
| 263 |
+
ai_bass_state, ai_freq_state, ai_comp_state, ai_width_state,
|
| 264 |
+
ai_reasoning_display, apply_ai_btn,
|
| 265 |
+
],
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
apply_ai_btn.click(
|
| 269 |
+
apply_ai,
|
| 270 |
+
inputs=[
|
| 271 |
+
ai_lows_state, ai_mids_state, ai_highs_state,
|
| 272 |
+
ai_bass_state, ai_freq_state, ai_comp_state, ai_width_state,
|
| 273 |
+
],
|
| 274 |
+
outputs=[
|
| 275 |
+
lows_slider, mid_boost_slider, highs_slider,
|
| 276 |
+
bass_boost_slider, bass_freq_slider,
|
| 277 |
+
comp_slider, width_slider,
|
| 278 |
+
],
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
master_btn.click(
|
| 282 |
process,
|
| 283 |
inputs=[
|
|
|
|
| 290 |
ab_player,
|
| 291 |
waveform_plot, spectrum_plot,
|
| 292 |
stats_display, download_file,
|
| 293 |
+
ai_report_display,
|
| 294 |
],
|
| 295 |
)
|
| 296 |
|
requirements.txt
CHANGED
|
@@ -5,3 +5,4 @@ numpy>=1.24.0
|
|
| 5 |
soundfile>=0.12.0
|
| 6 |
matplotlib>=3.7.0
|
| 7 |
scipy>=1.10.0
|
|
|
|
|
|
| 5 |
soundfile>=0.12.0
|
| 6 |
matplotlib>=3.7.0
|
| 7 |
scipy>=1.10.0
|
| 8 |
+
google-generativeai>=0.8.0
|