AnimalMonk commited on
Commit
9c754af
Β·
verified Β·
1 Parent(s): 255dc57

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. .gitignore +4 -0
  2. CHANGELOG.md +14 -0
  3. analysis.py +300 -0
  4. app.py +98 -1
  5. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.wav
4
+ nul
CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
  # Changelog β€” Audio Mastering Suite
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ## v3.2 β€” 2026-03-04
4
 
5
  ### Genre Expansion
 
1
  # Changelog β€” Audio Mastering Suite
2
 
3
+ ## v3.3 β€” 2026-03-10
4
+
5
+ ### AI Analysis (Gemini Pro 3.1)
6
+ - **AI Recommend button** β€” Analyzes uploaded audio (spectral profile, dynamics, stereo field) and recommends optimal mastering settings via Google Gemini Pro 3.1
7
+ - **Apply AI Settings** β€” One-click button to populate all 7 sliders with AI-recommended values
8
+ - **Post-master AI report** β€” After mastering, Gemini compares original vs mastered audio and provides a quality assessment with actionable feedback
9
+ - **Audio feature extraction** β€” New `analysis.py` module: spectral centroid, spectral rolloff, 6-band energy distribution, crest factor, dynamic range, stereo correlation
10
+ - **Graceful degradation** β€” If `GOOGLE_API_KEY` is not set, AI features show a helpful message instead of crashing
11
+
12
+ ### Dependencies
13
+ - Added `google-generativeai>=0.8.0`
14
+
15
+ ---
16
+
17
  ## v3.2 β€” 2026-03-04
18
 
19
  ### Genre Expansion
analysis.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AI-powered audio analysis using Gemini Pro β€” feature extraction and recommendations."""
2
+
3
+ import json
4
+ import os
5
+ import numpy as np
6
+ from scipy.signal import welch
7
+
8
+ from dsp import load_audio, map_compression
9
+ from loudness import measure_loudness, measure_true_peak
10
+
11
+
12
+ # ---------------------------------------------------------------------------
13
+ # Audio feature extraction
14
+ # ---------------------------------------------------------------------------
15
+
16
+ _BANDS = [
17
+ ("Sub-bass", 20, 60),
18
+ ("Bass", 60, 250),
19
+ ("Low-Mids", 250, 500),
20
+ ("Mids", 500, 2000),
21
+ ("Upper-Mids", 2000, 6000),
22
+ ("Highs", 6000, 20000),
23
+ ]
24
+
25
+
26
+ def extract_features(audio, sample_rate):
27
+ """Extract audio features for AI analysis.
28
+
29
+ Args:
30
+ audio: numpy array, shape (samples,) or (samples, channels).
31
+ sample_rate: int.
32
+
33
+ Returns:
34
+ dict with spectral, dynamic, and stereo measurements.
35
+ """
36
+ # Convert to mono for spectral analysis
37
+ if audio.ndim == 2:
38
+ mono = audio.mean(axis=1)
39
+ else:
40
+ mono = audio
41
+
42
+ # --- Spectral analysis via Welch ---
43
+ nperseg = min(8192, len(mono))
44
+ freqs, psd = welch(mono, fs=sample_rate, nperseg=nperseg)
45
+
46
+ # Spectral centroid
47
+ total_energy = np.sum(psd)
48
+ if total_energy > 0:
49
+ spectral_centroid = float(np.sum(freqs * psd) / total_energy)
50
+ else:
51
+ spectral_centroid = 0.0
52
+
53
+ # Spectral rolloff (85%)
54
+ cumulative = np.cumsum(psd)
55
+ if total_energy > 0:
56
+ rolloff_idx = np.searchsorted(cumulative, 0.85 * total_energy)
57
+ spectral_rolloff = float(freqs[min(rolloff_idx, len(freqs) - 1)])
58
+ else:
59
+ spectral_rolloff = 0.0
60
+
61
+ # Band energy distribution (dB)
62
+ band_energy = {}
63
+ for name, lo, hi in _BANDS:
64
+ mask = (freqs >= lo) & (freqs < hi)
65
+ band_rms = np.sqrt(np.mean(psd[mask])) if np.any(mask) else 0.0
66
+ if band_rms > 0:
67
+ band_energy[name] = round(20.0 * np.log10(band_rms), 1)
68
+ else:
69
+ band_energy[name] = -100.0
70
+
71
+ # --- Dynamics ---
72
+ rms = np.sqrt(np.mean(mono ** 2))
73
+ peak = np.max(np.abs(mono))
74
+
75
+ rms_db = round(20.0 * np.log10(rms), 1) if rms > 0 else -100.0
76
+ peak_db = round(20.0 * np.log10(peak), 1) if peak > 0 else -100.0
77
+ crest_factor = round(peak_db - rms_db, 1)
78
+ dynamic_range = crest_factor # simplified: same as crest factor for full-file
79
+
80
+ # --- Stereo correlation ---
81
+ is_mono = audio.ndim == 1 or audio.shape[1] == 1
82
+ if not is_mono:
83
+ left = audio[:, 0]
84
+ right = audio[:, 1]
85
+ correlation = np.corrcoef(left, right)[0, 1]
86
+ stereo_correlation = round(float(correlation), 3)
87
+ else:
88
+ stereo_correlation = None
89
+
90
+ # --- Loudness (reuse existing functions) ---
91
+ lufs = measure_loudness(audio, sample_rate)
92
+ true_peak = measure_true_peak(audio, sample_rate)
93
+
94
+ return {
95
+ "spectral_centroid_hz": round(spectral_centroid, 1),
96
+ "spectral_rolloff_hz": round(spectral_rolloff, 1),
97
+ "band_energy": band_energy,
98
+ "rms_db": rms_db,
99
+ "peak_db": peak_db,
100
+ "crest_factor_db": crest_factor,
101
+ "dynamic_range_db": dynamic_range,
102
+ "stereo_correlation": stereo_correlation,
103
+ "lufs": round(lufs, 1) if not np.isinf(lufs) else -100.0,
104
+ "true_peak_dbtp": true_peak,
105
+ "is_mono": is_mono,
106
+ }
107
+
108
+
109
+ # ---------------------------------------------------------------------------
110
+ # Gemini API wrapper
111
+ # ---------------------------------------------------------------------------
112
+
113
+ def _get_gemini_model():
114
+ """Initialize and return the Gemini model, or None if no API key."""
115
+ api_key = os.environ.get("GOOGLE_API_KEY")
116
+ if not api_key:
117
+ return None
118
+ try:
119
+ import google.generativeai as genai
120
+ genai.configure(api_key=api_key)
121
+ return genai.GenerativeModel("gemini-2.5-pro")
122
+ except Exception:
123
+ return None
124
+
125
+
126
+ def _call_gemini(system_prompt, user_prompt):
127
+ """Call Gemini and return the response text."""
128
+ model = _get_gemini_model()
129
+ if model is None:
130
+ return None
131
+
132
+ try:
133
+ response = model.generate_content(
134
+ [{"role": "user", "parts": [f"{system_prompt}\n\n{user_prompt}"]}]
135
+ )
136
+ return response.text
137
+ except Exception as e:
138
+ return f"*AI analysis unavailable: {e}*"
139
+
140
+
141
+ # ---------------------------------------------------------------------------
142
+ # Phase 1: AI-recommended settings
143
+ # ---------------------------------------------------------------------------
144
+
145
+ _RECOMMEND_SYSTEM = """You are an expert audio mastering engineer. Analyze the audio measurements below and recommend optimal mastering settings.
146
+
147
+ You have access to these controls:
148
+ - Lows: Low shelf at 200 Hz, range -3.0 to +3.0 dB, step 0.5 dB
149
+ - Mids: Peak filter at 1.2 kHz (Q=1.0), range -3.0 to +3.0 dB, step 0.1 dB
150
+ - Highs: High shelf at 10 kHz (Q=0.7), range -3.0 to +3.0 dB, step 0.5 dB
151
+ - Bass Boost: Peak filter (Q=2.0), range 0 to +3.0 dB, step 0.5 dB
152
+ - Bass Frequency: Center frequency for bass boost, range 40-100 Hz, step 1 Hz
153
+ - Compression: 0 (light) to 100 (heavy). Maps to: threshold -14 to -22 dB, ratio 1.1:1 to 2.5:1, release 250ms to 100ms, fixed 30ms attack
154
+ - Stereo Width: 80% (narrow) to 150% (wide). 100% = no change. M/S encoding above 200 Hz only.
155
+
156
+ Return ONLY a valid JSON object with these exact keys and a "reasoning" field containing a brief markdown explanation (3-5 bullet points):
157
+ {
158
+ "lows_db": number,
159
+ "mid_boost_db": number,
160
+ "highs_db": number,
161
+ "bass_boost_db": number,
162
+ "bass_freq_hz": integer,
163
+ "compression": integer,
164
+ "stereo_width": integer,
165
+ "reasoning": "markdown string"
166
+ }
167
+
168
+ Keep values within the valid ranges. Be conservative β€” subtle moves are better than aggressive ones."""
169
+
170
+
171
+ def recommend_settings(audio_path):
172
+ """Analyze raw audio and return AI-recommended mastering settings.
173
+
174
+ Args:
175
+ audio_path: path to the uploaded audio file.
176
+
177
+ Returns:
178
+ dict with recommended slider values and reasoning markdown,
179
+ or None if AI is unavailable.
180
+ """
181
+ audio, sr = load_audio(audio_path)
182
+ features = extract_features(audio, sr)
183
+
184
+ user_prompt = f"""Analyze this audio and recommend mastering settings:
185
+
186
+ **Audio Measurements:**
187
+ - Integrated Loudness: {features['lufs']} LUFS
188
+ - True Peak: {features['true_peak_dbtp']} dBTP
189
+ - RMS Level: {features['rms_db']} dB
190
+ - Crest Factor: {features['crest_factor_db']} dB
191
+ - Spectral Centroid: {features['spectral_centroid_hz']} Hz
192
+ - Spectral Rolloff (85%): {features['spectral_rolloff_hz']} Hz
193
+ - Stereo Correlation: {features['stereo_correlation'] if features['stereo_correlation'] is not None else 'N/A (mono)'}
194
+ - Mono: {features['is_mono']}
195
+
196
+ **Band Energy (dB):**
197
+ {chr(10).join(f'- {k}: {v} dB' for k, v in features['band_energy'].items())}
198
+
199
+ Return the JSON object with recommended settings."""
200
+
201
+ response = _call_gemini(_RECOMMEND_SYSTEM, user_prompt)
202
+ if response is None:
203
+ return None
204
+
205
+ # Parse JSON from response (Gemini may wrap it in markdown code fence)
206
+ try:
207
+ text = response.strip()
208
+ if text.startswith("```"):
209
+ # Strip markdown code fence
210
+ lines = text.split("\n")
211
+ text = "\n".join(lines[1:-1])
212
+ result = json.loads(text)
213
+
214
+ # Clamp values to valid ranges
215
+ result["lows_db"] = max(-3.0, min(3.0, float(result.get("lows_db", 0))))
216
+ result["mid_boost_db"] = max(-3.0, min(3.0, float(result.get("mid_boost_db", 0))))
217
+ result["highs_db"] = max(-3.0, min(3.0, float(result.get("highs_db", 0))))
218
+ result["bass_boost_db"] = max(0, min(3.0, float(result.get("bass_boost_db", 0))))
219
+ result["bass_freq_hz"] = max(40, min(100, int(result.get("bass_freq_hz", 60))))
220
+ result["compression"] = max(0, min(100, int(result.get("compression", 50))))
221
+ result["stereo_width"] = max(80, min(150, int(result.get("stereo_width", 100))))
222
+
223
+ if "reasoning" not in result:
224
+ result["reasoning"] = "*No explanation provided.*"
225
+
226
+ return result
227
+ except (json.JSONDecodeError, KeyError, TypeError):
228
+ return {"reasoning": response, "parse_error": True}
229
+
230
+
231
+ # ---------------------------------------------------------------------------
232
+ # Phase 2: Post-master comparison report
233
+ # ---------------------------------------------------------------------------
234
+
235
+ _COMPARE_SYSTEM = """You are an expert audio mastering engineer reviewing a completed master. Compare the original and mastered audio measurements below. Assess whether the mastering improved the audio quality.
236
+
237
+ Format your response as markdown with these sections:
238
+ ### Overall Assessment
239
+ (1-2 sentences)
240
+
241
+ ### What Worked Well
242
+ (bullet points)
243
+
244
+ ### Suggested Improvements
245
+ (bullet points with specific slider recommendations if applicable)
246
+
247
+ ### Technical Notes
248
+ (any concerns about dynamics, phase, or frequency balance)
249
+
250
+ Be concise and specific. Reference actual measurement changes."""
251
+
252
+
253
+ def compare_master(original, mastered, sample_rate, settings_dict):
254
+ """Compare original vs mastered audio and return AI quality report.
255
+
256
+ Args:
257
+ original: numpy array of original audio.
258
+ mastered: numpy array of mastered audio.
259
+ sample_rate: int.
260
+ settings_dict: dict with the mastering settings that were applied.
261
+
262
+ Returns:
263
+ str: markdown-formatted comparison report, or fallback message.
264
+ """
265
+ orig_features = extract_features(original, sample_rate)
266
+ mast_features = extract_features(mastered, sample_rate)
267
+
268
+ # Build the compression details from slider value
269
+ comp_val = settings_dict.get("compression", 50)
270
+ threshold, ratio, attack, release = map_compression(comp_val)
271
+
272
+ user_prompt = f"""Compare the original and mastered audio:
273
+
274
+ **ORIGINAL Audio:**
275
+ - Loudness: {orig_features['lufs']} LUFS | True Peak: {orig_features['true_peak_dbtp']} dBTP
276
+ - RMS: {orig_features['rms_db']} dB | Crest Factor: {orig_features['crest_factor_db']} dB
277
+ - Spectral Centroid: {orig_features['spectral_centroid_hz']} Hz | Rolloff: {orig_features['spectral_rolloff_hz']} Hz
278
+ - Stereo Correlation: {orig_features['stereo_correlation'] if orig_features['stereo_correlation'] is not None else 'N/A (mono)'}
279
+ - Band Energy: {json.dumps(orig_features['band_energy'])}
280
+
281
+ **MASTERED Audio:**
282
+ - Loudness: {mast_features['lufs']} LUFS | True Peak: {mast_features['true_peak_dbtp']} dBTP
283
+ - RMS: {mast_features['rms_db']} dB | Crest Factor: {mast_features['crest_factor_db']} dB
284
+ - Spectral Centroid: {mast_features['spectral_centroid_hz']} Hz | Rolloff: {mast_features['spectral_rolloff_hz']} Hz
285
+ - Stereo Correlation: {mast_features['stereo_correlation'] if mast_features['stereo_correlation'] is not None else 'N/A (mono)'}
286
+ - Band Energy: {json.dumps(mast_features['band_energy'])}
287
+
288
+ **Settings Applied:**
289
+ - Lows (200 Hz shelf): {settings_dict.get('lows_db', 0)} dB
290
+ - Mids (1.2 kHz peak): {settings_dict.get('mid_boost_db', 0)} dB
291
+ - Highs (10 kHz shelf): {settings_dict.get('highs_db', 0)} dB
292
+ - Bass Boost: {settings_dict.get('bass_boost_db', 0)} dB @ {settings_dict.get('bass_freq_hz', 60)} Hz
293
+ - Compression: slider {comp_val} β†’ threshold {threshold:.1f} dB, ratio {ratio:.1f}:1, attack {attack:.0f} ms, release {release:.0f} ms
294
+ - Stereo Width: {settings_dict.get('stereo_width', 100)}%
295
+ - Target LUFS: {settings_dict.get('target_lufs', -14)}"""
296
+
297
+ response = _call_gemini(_COMPARE_SYSTEM, user_prompt)
298
+ if response is None:
299
+ return "*Set GOOGLE_API_KEY to enable AI comparison report.*"
300
+ return response
app.py CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
4
  from dsp import master_audio
5
  from presets import PRESETS
6
  from visualization import plot_waveform_comparison, plot_spectrum_comparison
 
7
 
8
 
9
  # ---------------------------------------------------------------------------
@@ -35,6 +36,49 @@ def toggle_custom_lufs(target_choice):
35
  return gr.update(visible=(target_choice == "Custom"))
36
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def process(audio_path, lows_db, mid_boost_db, highs_db, bass_boost_db, bass_freq_hz,
39
  comp_val, width, target_choice, custom_lufs):
40
  """Run the mastering pipeline and return all outputs."""
@@ -69,11 +113,25 @@ def process(audio_path, lows_db, mid_boost_db, highs_db, bass_boost_db, bass_fre
69
  f"{mono_note}"
70
  )
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  return (
73
  output_path,
74
  waveform_fig, spectrum_fig,
75
  stats_md,
76
  gr.DownloadButton("Download Mastered File", value=output_path, visible=True),
 
77
  )
78
 
79
 
@@ -93,13 +151,14 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
93
  '</div></div>'
94
  )
95
 
96
- # --- Preset & Target LUFS (side by side) ---
97
  with gr.Row():
98
  preset_dropdown = gr.Dropdown(
99
  label="Preset",
100
  choices=list(PRESETS.keys()),
101
  value="-- None --",
102
  )
 
103
  target_dropdown = gr.Dropdown(
104
  label="Target LUFS",
105
  choices=["-14 (Streaming)", "-11 (CD)", "Custom"],
@@ -156,6 +215,19 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
156
  minimum=80, maximum=150, value=100, step=1,
157
  )
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  # --- Playback ---
160
  ab_player = gr.Audio(label="Mastered", interactive=False)
161
  download_file = gr.DownloadButton("Download Mastered File", visible=False)
@@ -166,6 +238,7 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
166
  spectrum_plot = gr.Plot(label="Spectrum Comparison")
167
 
168
  stats_display = gr.Markdown()
 
169
 
170
  # --- Event wiring ---
171
  preset_dropdown.change(
@@ -182,6 +255,29 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
182
  outputs=[custom_lufs_input],
183
  )
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  master_btn.click(
186
  process,
187
  inputs=[
@@ -194,6 +290,7 @@ with gr.Blocks(title="Audio Mastering Suite", theme=gr.themes.Soft()) as demo:
194
  ab_player,
195
  waveform_plot, spectrum_plot,
196
  stats_display, download_file,
 
197
  ],
198
  )
199
 
 
4
  from dsp import master_audio
5
  from presets import PRESETS
6
  from visualization import plot_waveform_comparison, plot_spectrum_comparison
7
+ from analysis import recommend_settings, compare_master
8
 
9
 
10
  # ---------------------------------------------------------------------------
 
36
  return gr.update(visible=(target_choice == "Custom"))
37
 
38
 
39
+ def ai_recommend(audio_path):
40
+ """Analyze raw audio and return AI-recommended settings + reasoning."""
41
+ if audio_path is None:
42
+ raise gr.Error("Please upload an audio file first.")
43
+
44
+ result = recommend_settings(audio_path)
45
+
46
+ if result is None:
47
+ return (
48
+ gr.update(), gr.update(), gr.update(),
49
+ gr.update(), gr.update(), gr.update(),
50
+ gr.update(),
51
+ "*Set GOOGLE_API_KEY to enable AI recommendations.*",
52
+ gr.update(visible=False),
53
+ )
54
+
55
+ if result.get("parse_error"):
56
+ return (
57
+ gr.update(), gr.update(), gr.update(),
58
+ gr.update(), gr.update(), gr.update(),
59
+ gr.update(),
60
+ result.get("reasoning", "*Could not parse AI response.*"),
61
+ gr.update(visible=False),
62
+ )
63
+
64
+ return (
65
+ result["lows_db"],
66
+ result["mid_boost_db"],
67
+ result["highs_db"],
68
+ result["bass_boost_db"],
69
+ result["bass_freq_hz"],
70
+ result["compression"],
71
+ result["stereo_width"],
72
+ result.get("reasoning", ""),
73
+ gr.update(visible=True),
74
+ )
75
+
76
+
77
+ def apply_ai(ai_lows, ai_mids, ai_highs, ai_bass, ai_freq, ai_comp, ai_width):
78
+ """Populate sliders with AI-recommended values stored in State."""
79
+ return ai_lows, ai_mids, ai_highs, ai_bass, ai_freq, ai_comp, ai_width
80
+
81
+
82
  def process(audio_path, lows_db, mid_boost_db, highs_db, bass_boost_db, bass_freq_hz,
83
  comp_val, width, target_choice, custom_lufs):
84
  """Run the mastering pipeline and return all outputs."""
 
113
  f"{mono_note}"
114
  )
115
 
116
+ # AI comparison report
117
+ settings_dict = {
118
+ "lows_db": lows_db,
119
+ "mid_boost_db": mid_boost_db,
120
+ "highs_db": highs_db,
121
+ "bass_boost_db": bass_boost_db,
122
+ "bass_freq_hz": bass_freq_hz,
123
+ "compression": comp_val,
124
+ "stereo_width": width,
125
+ "target_lufs": target,
126
+ }
127
+ ai_report = compare_master(original, mastered, sr, settings_dict)
128
+
129
  return (
130
  output_path,
131
  waveform_fig, spectrum_fig,
132
  stats_md,
133
  gr.DownloadButton("Download Mastered File", value=output_path, visible=True),
134
+ ai_report,
135
  )
136
 
137
 
 
151
  '</div></div>'
152
  )
153
 
154
+ # --- Preset, AI Recommend & Target LUFS ---
155
  with gr.Row():
156
  preset_dropdown = gr.Dropdown(
157
  label="Preset",
158
  choices=list(PRESETS.keys()),
159
  value="-- None --",
160
  )
161
+ ai_recommend_btn = gr.Button("AI Recommend", variant="secondary")
162
  target_dropdown = gr.Dropdown(
163
  label="Target LUFS",
164
  choices=["-14 (Streaming)", "-11 (CD)", "Custom"],
 
215
  minimum=80, maximum=150, value=100, step=1,
216
  )
217
 
218
+ # --- AI Recommendations ---
219
+ ai_reasoning_display = gr.Markdown(value="", visible=True)
220
+ apply_ai_btn = gr.Button("Apply AI Settings", variant="secondary", visible=False)
221
+
222
+ # --- Hidden states for AI-recommended values ---
223
+ ai_lows_state = gr.State(0.0)
224
+ ai_mids_state = gr.State(0.0)
225
+ ai_highs_state = gr.State(0.0)
226
+ ai_bass_state = gr.State(0.0)
227
+ ai_freq_state = gr.State(60)
228
+ ai_comp_state = gr.State(50)
229
+ ai_width_state = gr.State(100)
230
+
231
  # --- Playback ---
232
  ab_player = gr.Audio(label="Mastered", interactive=False)
233
  download_file = gr.DownloadButton("Download Mastered File", visible=False)
 
238
  spectrum_plot = gr.Plot(label="Spectrum Comparison")
239
 
240
  stats_display = gr.Markdown()
241
+ ai_report_display = gr.Markdown(value="", visible=True)
242
 
243
  # --- Event wiring ---
244
  preset_dropdown.change(
 
255
  outputs=[custom_lufs_input],
256
  )
257
 
258
+ ai_recommend_btn.click(
259
+ ai_recommend,
260
+ inputs=[audio_input],
261
+ outputs=[
262
+ ai_lows_state, ai_mids_state, ai_highs_state,
263
+ ai_bass_state, ai_freq_state, ai_comp_state, ai_width_state,
264
+ ai_reasoning_display, apply_ai_btn,
265
+ ],
266
+ )
267
+
268
+ apply_ai_btn.click(
269
+ apply_ai,
270
+ inputs=[
271
+ ai_lows_state, ai_mids_state, ai_highs_state,
272
+ ai_bass_state, ai_freq_state, ai_comp_state, ai_width_state,
273
+ ],
274
+ outputs=[
275
+ lows_slider, mid_boost_slider, highs_slider,
276
+ bass_boost_slider, bass_freq_slider,
277
+ comp_slider, width_slider,
278
+ ],
279
+ )
280
+
281
  master_btn.click(
282
  process,
283
  inputs=[
 
290
  ab_player,
291
  waveform_plot, spectrum_plot,
292
  stats_display, download_file,
293
+ ai_report_display,
294
  ],
295
  )
296
 
requirements.txt CHANGED
@@ -5,3 +5,4 @@ numpy>=1.24.0
5
  soundfile>=0.12.0
6
  matplotlib>=3.7.0
7
  scipy>=1.10.0
 
 
5
  soundfile>=0.12.0
6
  matplotlib>=3.7.0
7
  scipy>=1.10.0
8
+ google-generativeai>=0.8.0