SaltProphet commited on
Commit
e428beb
·
verified ·
1 Parent(s): cda2d28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +714 -467
app.py CHANGED
@@ -1,22 +1,22 @@
1
  import gradio as gr
2
  import os
 
 
 
 
3
  import shutil
4
  import zipfile
5
- import librosa
6
- import numpy as np
7
- import soundfile as sf
8
- from pydub import AudioSegment
9
- from moviepy.editor import AudioFileClip, ImageClip, CompositeVideoClip, ColorClip
10
  import subprocess
11
  from pathlib import Path
12
- import sys
 
 
 
13
  import yt_dlp
14
  import pyloudnorm as pyln
15
- import time
16
- import hashlib
17
- import json
18
 
19
- # --- OPTIONAL: MIDI IMPORT ---
20
  try:
21
  from basic_pitch.inference import predict_and_save
22
  MIDI_AVAILABLE = True
@@ -24,115 +24,134 @@ except ImportError:
24
  MIDI_AVAILABLE = False
25
  print("WARNING: 'basic-pitch' not installed. MIDI extraction will be disabled.")
26
 
27
- # --- PATCH FOR PILLOW ---
28
- import PIL.Image
29
- if not hasattr(PIL.Image, 'ANTIALIAS'):
30
- PIL.Image.ANTIALIAS = PIL.Image.LANCZOS
31
 
32
- # --- CONFIGURATION ---
 
 
 
 
33
  OUTPUT_DIR = Path("nightpulse_output")
34
- TEMP_DIR = Path("temp_processing")
35
- CACHE_FILE = TEMP_DIR / "process_cache.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # ==========================================
38
- # 1. SYSTEM UTILITIES & SECURITY
39
- # ==========================================
40
 
41
- def get_file_hash(filepath):
42
- """Generates a SHA256 hash of the file to prevent re-processing identical audio."""
43
  h = hashlib.sha256()
44
- with open(filepath, 'rb') as f:
45
- while chunk := f.read(8192):
46
- h.update(chunk)
 
 
 
47
  return h.hexdigest()
48
 
49
- def check_system():
50
- """System health check."""
51
- ffmpeg_ok = shutil.which("ffmpeg") is not None
52
-
53
- cuda_ok = False
 
 
 
 
 
54
  try:
55
  import torch
56
- if torch.cuda.is_available():
57
- cuda_ok = True
58
- print(f"CUDA DETECTED: {torch.cuda.get_device_name(0)}")
59
  else:
60
- print("⚠️ CUDA NOT DETECTED. Demucs will run on CPU (Slow).")
61
- except ImportError:
62
- print("⚠️ Torch not installed.")
 
 
63
 
64
- return ffmpeg_ok, cuda_ok
65
 
66
- FFMPEG_OK, CUDA_OK = check_system()
 
67
 
68
- # ==========================================
69
- # 2. AUDIO PROCESSING CORE
70
- # ==========================================
71
 
72
- def wipe_dir(p: Path):
73
- try:
74
- if p.exists():
75
- shutil.rmtree(p, ignore_errors=True)
76
- except Exception:
77
- pass
78
 
79
- def download_from_url(url):
80
- """Downloads audio from YouTube/SoundCloud using yt-dlp."""
81
- if not url: return None
82
-
83
- # Sanitize URL for safety (basic check)
84
- if not url.startswith(("http://", "https://")):
85
- raise gr.Error("Invalid URL protocol.")
 
 
 
 
 
86
 
87
- wipe_dir(TEMP_DIR / "downloads")
88
- (TEMP_DIR / "downloads").mkdir(parents=True, exist_ok=True)
89
 
 
 
90
  ydl_opts = {
91
  "format": "bestaudio/best",
92
- "outtmpl": str(TEMP_DIR / "downloads" / "%(title)s.%(ext)s"),
93
  "postprocessors": [{"key": "FFmpegExtractAudio", "preferredcodec": "wav", "preferredquality": "192"}],
94
  "quiet": True,
95
  "no_warnings": True,
96
  }
97
-
98
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
99
  info = ydl.extract_info(url, download=True)
100
  filename = ydl.prepare_filename(info)
101
  final_path = Path(filename).with_suffix(".wav")
102
- return str(final_path)
103
 
104
- def ensure_wav(input_path: str) -> str:
105
- """Standardizes input to WAV."""
106
- p = Path(input_path)
107
- if p.suffix.lower() == ".wav": return str(p)
108
-
109
- convert_dir = TEMP_DIR / "converted"
110
- convert_dir.mkdir(parents=True, exist_ok=True)
111
- out = convert_dir / f"{p.stem}.wav"
112
-
113
- audio = AudioSegment.from_file(str(p))
114
- audio.export(str(out), format="wav")
115
- return str(out)
116
 
117
- def detect_key_and_bpm(audio_path):
118
- """Estimates musical key and BPM with range correction."""
119
- try:
120
- y, sr = librosa.load(str(audio_path), sr=None, duration=120)
121
-
122
- # BPM Detection
123
- onset_env = librosa.onset.onset_strength(y=y, sr=sr)
124
- tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
125
- bpm = float(tempo) if np.ndim(tempo) == 0 else float(tempo[0])
126
-
127
- # Producer Logic: Constrain BPM to 70-170 range
128
- # Often librosa catches half-time (e.g. 70 instead of 140) or double-time.
129
- while bpm < 70: bpm *= 2
130
- while bpm > 180: bpm /= 2
131
- bpm = int(round(bpm))
 
 
132
 
133
- # Key Detection
 
 
134
  chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
135
  chroma_vals = np.sum(chroma, axis=1)
 
136
  maj_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
137
  min_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
138
  pitches = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
@@ -142,459 +161,687 @@ def detect_key_and_bpm(audio_path):
142
  for i in range(12):
143
  score_maj = np.corrcoef(chroma_vals, np.roll(maj_profile, i))[0, 1]
144
  score_min = np.corrcoef(chroma_vals, np.roll(min_profile, i))[0, 1]
145
- if score_maj > best_score:
146
- best_score, best_key = score_maj, f"{pitches[i]}maj"
147
- if score_min > best_score:
148
- best_score, best_key = score_min, f"{pitches[i]}min"
 
 
 
 
 
149
 
150
- return bpm, best_key
151
- except Exception as e:
152
- print(f"Analysis Error: {e}")
153
- return 120, "Cmaj"
154
 
155
- # ==========================================
156
- # 3. LOOPING ENGINE (UPGRADED)
157
- # ==========================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- def snap_to_zero_crossing(audio_segment, intended_ms, window_ms=30):
160
- """
161
- Finds the nearest zero-crossing point within a window to avoid clicks.
162
- Crucial for professional audio looping.
163
- """
164
- start_search = max(0, intended_ms - window_ms)
165
- end_search = min(len(audio_segment), intended_ms + window_ms)
166
-
167
- # Extract raw data for this slice
168
- chunk = audio_segment[start_search:end_search]
169
- samples = chunk.get_array_of_samples()
170
-
171
- # Find point closest to zero
172
- min_amp = float('inf')
173
- best_offset = 0
174
-
175
- for i, sample in enumerate(samples):
176
- if abs(sample) < min_amp:
177
- min_amp = abs(sample)
178
- best_offset = i
179
-
180
- return start_search + best_offset
181
 
182
- def apply_loudness(seg: AudioSegment, mode: str, target: float = -14.0) -> AudioSegment:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  mode = (mode or "none").lower().strip()
184
- if mode == "none": return seg
185
- if mode == "peak": return seg.normalize()
186
-
187
- # RMS Normalization (Simple but effective)
188
  if mode == "rms":
189
- if seg.dBFS == float("-inf"): return seg
190
- change = target - seg.dBFS
191
- return seg.apply_gain(change)
192
-
193
- # LUFS Normalization (Broadcast Standard)
194
  if mode == "lufs":
195
  try:
196
- samples = np.array(seg.get_array_of_samples())
197
- if seg.channels > 1: samples = samples.reshape((-1, seg.channels))
198
-
199
- # Normalize to -1.0 to 1.0 float
200
- max_int = float(2 ** (8 * seg.sample_width - 1))
201
- samples_float = samples.astype(np.float64) / max_int
202
-
203
- meter = pyln.Meter(seg.frame_rate)
204
- loudness = meter.integrated_loudness(samples_float)
205
-
206
- if loudness == -float('inf'): return seg
207
-
208
- gain_db = target - loudness
209
- # Safety clamp to avoid blowing speakers on silent tracks
210
- gain_db = max(min(gain_db, 20.0), -20.0)
211
- return seg.apply_gain(gain_db)
212
  except Exception:
213
- return seg
214
- return seg
215
-
216
- def make_quantized_loops(
217
- stem_path, stem_name, bpm, key,
218
- bar_starts_ms, bar_lengths, hop_bars, loops_per,
219
- top_k, fade_ms, loop_seam, seam_ms, min_bar_gap,
220
- loudness_mode, target_dbfs, out_dir
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  ):
222
- stem_path = Path(stem_path)
223
- if not stem_path.exists(): return []
224
 
225
- audio = AudioSegment.from_wav(str(stem_path))
226
- ms_per_bar = (240000.0 / max(1, bpm))
227
-
228
- # If no grid provided, make a mathematical one
229
- if not bar_starts_ms:
230
- bar_starts_ms = [int(i * ms_per_bar) for i in range(int(len(audio)/ms_per_bar))]
 
231
 
232
  candidates = []
233
-
234
- # 1. Candidate Generation
235
- for bar_len in bar_lengths:
236
- t_dur = int(ms_per_bar * bar_len)
237
-
238
- # Step through the grid
239
- for i in range(0, len(bar_starts_ms), int(hop_bars)):
240
- start_ms = bar_starts_ms[i]
241
-
242
- # Safety check
243
- if start_ms + t_dur > len(audio): continue
244
-
245
- # Extract temporary segment for analysis
246
- seg = audio[start_ms:start_ms + t_dur]
247
-
248
- # Score by Energy (RMS) - Filter out silence
249
- if seg.rms < 100: continue
250
-
251
  candidates.append({
252
- 'score': seg.rms,
253
- 'start_ms': start_ms,
254
- 'duration': t_dur,
255
- 'bar_len': bar_len,
256
- 'grid_index': i
257
  })
258
 
259
- # 2. Filtering & Selection
260
- candidates.sort(key=lambda x: x['score'], reverse=True)
261
- if top_k > 0: candidates = candidates[:int(top_k)]
262
-
 
 
 
 
 
 
 
 
 
 
263
  selected = []
264
- used_indices = []
265
-
266
  for c in candidates:
267
- # De-duplication: Don't pick loops too close to each other
268
- if any(abs(c['grid_index'] - u) < min_bar_gap for u in used_indices):
269
  continue
270
-
271
  selected.append(c)
272
- used_indices.append(c['grid_index'])
273
- if len(selected) >= loops_per: break
274
 
275
- exported_paths = []
276
- out_dir.mkdir(parents=True, exist_ok=True)
 
 
277
 
278
- # 3. Export with Audio Engineering Polish
279
- for i, item in enumerate(selected, 1):
280
- start = item['start_ms']
281
- dur = item['duration']
282
-
283
- # PRODUCER TRICK: Snap start to zero crossing to prevent click
284
- safe_start = snap_to_zero_crossing(audio, start)
285
-
286
- # Grab audio
287
- loop = audio[safe_start : safe_start + dur]
288
-
289
- # Fades (Only necessary if not using zero crossing, but safe to keep light)
290
- if fade_ms > 0:
291
- loop = loop.fade_in(int(fade_ms)).fade_out(int(fade_ms))
292
-
293
- # Loudness Normalization
294
- loop = apply_loudness(loop, loudness_mode, target_dbfs)
295
-
296
- fname = f"{bpm}BPM_{key}_{stem_name}_L{item['bar_len']}bars_{i:02d}.wav"
297
  out_path = out_dir / fname
298
- loop.export(out_path, format="wav")
299
- exported_paths.append(out_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
- return exported_paths
 
 
 
 
 
 
 
 
 
 
302
 
303
- # ==========================================
304
- # 4. MAIN ORCHESTRATION
305
- # ==========================================
 
 
 
306
 
307
- def run_phase_1(file_in, url_in, mode, manual_bpm):
308
- # 1. Ingestion
309
- fpath = download_from_url(url_in) if (url_in and str(url_in).strip()) else file_in
310
- if not fpath: raise gr.Error("No Audio Source.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
- fpath = ensure_wav(fpath)
313
- file_hash = get_file_hash(fpath)
314
 
315
- # 2. Check Cache (Avoid re-running Demucs)
316
- demucs_base = TEMP_DIR / "htdemucs_6s" if mode == "6stem" else TEMP_DIR / "htdemucs"
317
- track_dir = None
318
 
319
- # Very basic cache check: if folder exists and holds files
320
- if demucs_base.exists():
321
- potential_tracks = [p for p in demucs_base.iterdir() if p.is_dir()]
322
- if potential_tracks:
323
- # In a real app, map hash to folder name.
324
- # Here we just take the latest for simplicity but assume re-run if hash differs.
325
- # For this MVP, we force re-run if the user changes input.
326
- pass
327
-
328
- # 3. Analysis
329
  if manual_bpm and float(manual_bpm) > 0:
330
- bpm, key = int(manual_bpm), "Unknown"
331
  else:
332
- bpm, key = detect_key_and_bpm(fpath)
333
-
334
- # 4. Separation
335
- model_name = "htdemucs_6s" if mode == "6stem" else "htdemucs"
336
- device = "cuda" if CUDA_OK else "cpu"
337
-
338
- # Run Demucs
339
- cmd = [
340
- sys.executable, "-m", "demucs",
341
- "--device", device,
342
- "-n", model_name,
343
- "--out", str(TEMP_DIR),
344
- fpath
345
- ]
346
- if mode == "2stem": cmd += ["--two-stems", "vocals"]
347
-
348
- subprocess.run(cmd, check=True) # Security: 'check=True' ensures we catch crashes
349
 
350
- # Find output
351
- model_dir = TEMP_DIR / model_name
352
- # Get the specific track folder (Demucs names it after the input file)
353
- track_name = Path(fpath).stem
354
- track_dir = model_dir / track_name
355
 
356
- # Fallback if naming is weird
357
- if not track_dir.exists():
358
- candidates = sorted([p for p in model_dir.iterdir() if p.is_dir()], key=lambda x: x.stat().st_mtime, reverse=True)
359
- if candidates: track_dir = candidates[0]
360
 
361
- # 5. Prep Stems
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  stem_map = {
363
- "Drums": track_dir / "drums.wav", "Bass": track_dir / "bass.wav",
364
- "Vocals": track_dir / "vocals.wav", "Other": track_dir / "other.wav",
365
- "Piano": track_dir / "piano.wav", "Guitar": track_dir / "guitar.wav",
366
  }
367
 
368
- # Create Instrumental (Summing stems is cleaner than Demucs 'no_vocals' sometimes)
369
- mix = None
370
- for k in ["Drums", "Bass", "Other", "Piano", "Guitar"]:
371
- if stem_map.get(k) and stem_map[k].exists():
372
- seg = AudioSegment.from_wav(str(stem_map[k]))
373
- mix = seg if mix is None else mix.overlay(seg)
374
-
375
- inst_path = track_dir / "instrumental.wav"
376
- if mix: mix.export(str(inst_path), format="wav")
377
- stem_map["Instrumental"] = inst_path
378
-
379
- valid_stems = [k for k, v in stem_map.items() if v.exists()]
380
-
381
- # Return UI updates
382
- info_text = f"### 🎵 Analysis Complete\n**BPM:** {bpm} | **Key:** {key} | **Engine:** {device.upper()}"
383
-
384
  return (
385
- str(stem_map.get("Drums")) if "Drums" in stem_map else None,
386
- str(stem_map.get("Bass")) if "Bass" in stem_map else None,
387
- str(stem_map.get("Vocals")) if "Vocals" in stem_map else None,
388
- info_text, bpm, key, str(track_dir), mode,
389
- gr.update(choices=valid_stems, value=valid_stems), # Export options
390
- gr.update(choices=valid_stems, value=[x for x in valid_stems if x != "Vocals"]) # Loop options
 
391
  )
392
 
393
- def run_phase_2(
394
- track_folder, bpm, key, stem_mode, art,
395
- ex_stems, loop_stems, do_midi, do_oneshots, do_vocal_chops,
396
- loops_per, bars, hop, topk, fadems, seam, seamms, mingap,
397
- l_mode, l_target, vid_fmt
 
 
398
  ):
399
- if not track_folder: raise gr.Error("Please run Phase 1 first.")
 
400
 
 
 
 
 
401
  wipe_dir(OUTPUT_DIR)
402
- for d in ["Stems", "Loops", "MIDI", "OneShots", "Vocal_Chops"]:
403
- (OUTPUT_DIR / d).mkdir(parents=True, exist_ok=True)
404
-
405
- t_dir = Path(track_folder)
406
 
407
- # 1. Map Stems
408
- stems = {
409
- "Drums": t_dir / "drums.wav", "Bass": t_dir / "bass.wav",
410
- "Vocals": t_dir / "vocals.wav", "Other": t_dir / "other.wav",
411
- "Piano": t_dir / "piano.wav", "Guitar": t_dir / "guitar.wav",
412
- "Instrumental": t_dir / "instrumental.wav"
413
- }
414
 
415
- # 2. Export Raw Stems
416
- for s in ex_stems:
417
- if stems.get(s) and stems[s].exists():
418
- shutil.copy(stems[s], OUTPUT_DIR / "Stems" / f"{bpm}BPM_{key}_{s}.wav")
419
-
420
- # 3. Generate MIDI
421
- if do_midi and MIDI_AVAILABLE:
422
- for s in ["Bass", "Piano", "Guitar", "Other", "Vocals"]:
423
- if stems.get(s) and stems[s].exists():
424
- out_midi = OUTPUT_DIR / "MIDI" / f"{bpm}BPM_{key}_{s}.mid"
425
- try:
426
- predict_and_save(
427
- audio_path_list=[str(stems[s])],
428
- output_directory=str(out_midi.parent),
429
- save_midi=True, save_model_outputs=False, save_notes=False, sonify_midi=False
430
- )
431
- # Rename the weird file Basic Pitch generates
432
- gen_file = out_midi.parent / f"{stems[s].stem}_basic_pitch.mid"
433
- if gen_file.exists(): shutil.move(str(gen_file), str(out_midi))
434
- except Exception as e:
435
- print(f"MIDI Fail {s}: {e}")
436
-
437
- # 4. Generate Loops
438
- # Smart Grid: Use Drums for transient detection to align the grid
439
- grid_source = stems.get("Drums") if stems.get("Drums", Path("x")).exists() else stems.get("Instrumental")
440
-
441
- # Fallback Grid
442
- bar_starts = []
443
- if grid_source and grid_source.exists():
444
- y, sr = librosa.load(str(grid_source), sr=22050, duration=180)
445
- tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
446
- beat_times = librosa.frames_to_time(beats, sr=sr)
447
- # Convert to ms
448
- if len(beat_times) > 4:
449
- # approximate bar starts every 4 beats
450
- bar_starts = [int(t*1000) for t in beat_times[::4]]
451
-
452
- # Process Loop Stems
453
- all_loop_paths = {}
454
- bar_ints = sorted([int(b) for b in (bars or [])]) or [4, 8]
455
 
456
- for s in loop_stems:
457
- if s == "Vocals" and do_vocal_chops: continue # Special handling for vox
458
- if stems.get(s) and stems[s].exists():
459
- paths = make_quantized_loops(
460
- stems[s], s, int(bpm), str(key), bar_starts, bar_ints,
461
- hop, loops_per, topk, fadems, seam, seamms, mingap,
462
- l_mode, float(l_target), OUTPUT_DIR / "Loops"
 
 
 
 
 
 
 
 
 
 
 
 
 
463
  )
464
- all_loop_paths[s] = paths
465
-
466
- # 5. Video Render
467
- vid_path = None
468
- if art and any(all_loop_paths.values()):
469
- # Find a suitable audio track for the video (prioritize instrumental/melodic)
470
- audio_src = None
471
- for k in ["Instrumental", "Piano", "Other", "Drums"]:
472
- if all_loop_paths.get(k):
473
- audio_src = all_loop_paths[k][0]
474
- break
475
 
476
- if audio_src:
477
- try:
478
- clip = AudioFileClip(str(audio_src))
479
- w, h = (1080, 1920) if "9:16" in vid_fmt else ((1920, 1080) if "16:9" in vid_fmt else (1080, 1080))
480
-
481
- bg = ImageClip(art)
482
- # Aspect Ratio Crop logic
483
- img_ratio = bg.w / bg.h
484
- tgt_ratio = w / h
485
- if img_ratio > tgt_ratio:
486
- bg = bg.resize(height=h)
487
- bg = bg.crop(x1=(bg.w - w)//2, width=w)
488
- else:
489
- bg = bg.resize(width=w)
490
- bg = bg.crop(y1=(bg.h - h)//2, height=h)
491
-
492
- bg = bg.set_duration(clip.duration)
493
 
494
- # Add a "Now Playing" bar
495
- bar = ColorClip(size=(w, 20), color=(255, 255, 255)).set_opacity(0.8)
496
- bar = bar.set_position(lambda t: (int(-w + w * (t / clip.duration)), h - 100)).set_duration(clip.duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
- final = CompositeVideoClip([bg, bar], size=(w,h))
499
- final.audio = clip
500
- vid_path = str(OUTPUT_DIR / "Promo_Video.mp4")
501
- final.write_videofile(vid_path, fps=24, codec="libx264", audio_codec="aac", logger=None)
502
- except Exception as e:
503
- print(f"Video Error: {e}")
504
-
505
- # 6. Zip It
506
- z_path = "NightPulse_Pack.zip"
507
- with zipfile.ZipFile(z_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
508
- for r, _, fs in os.walk(OUTPUT_DIR):
509
- for f in fs:
510
- full = Path(r) / f
511
- zf.write(str(full), str(full.relative_to(OUTPUT_DIR)))
512
-
513
- return z_path, vid_path
514
-
515
- # ==========================================
516
- # 5. GRADIO UI
517
- # ==========================================
518
-
519
- with gr.Blocks(title="Night Pulse | Studio Ultimate", theme=gr.themes.Base()) as app:
520
- gr.Markdown("# 🎹 Night Pulse | Studio Ultimate V2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
 
522
- # States
523
- folder_st = gr.State()
524
- bpm_st = gr.State()
525
- key_st = gr.State()
526
- mode_st = gr.State()
527
-
528
  with gr.Row():
529
  with gr.Column():
530
- gr.Markdown("### 1. Ingestion & Analysis")
531
  with gr.Tabs():
532
- with gr.Tab("URL"):
533
- url = gr.Textbox(label="YouTube/SoundCloud Link")
534
- with gr.Tab("Upload"):
535
- file = gr.Audio(type="filepath", label="Drop File Here")
536
 
537
- sep_mode = gr.Dropdown(
538
- [("2 Stems (Vox/Inst)", "2stem"), ("6 Stems (Pro)", "6stem")],
539
- value="6stem", label="Model"
540
- )
541
- mbpm = gr.Number(label="Force BPM (0 = Auto)")
542
- btn1 = gr.Button("🔥 Analyze & Separate", variant="primary")
543
 
544
- info = gr.Markdown("Ready.")
545
-
546
  with gr.Column():
547
- gr.Markdown("### 2. Preview Stems")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
  with gr.Row():
549
- p_drums = gr.Audio(label="Drums", interactive=False)
550
- p_bass = gr.Audio(label="Bass", interactive=False)
551
- p_vox = gr.Audio(label="Vocals", interactive=False)
552
 
553
  gr.Markdown("---")
554
 
555
  with gr.Row():
556
  with gr.Column():
557
- gr.Markdown("### 3. Loop Engine")
558
- with gr.Group():
559
- ex_stems = gr.CheckboxGroup(label="Export Raw Stems")
560
- loop_stems = gr.CheckboxGroup(label="Generate Loops From")
561
-
562
- with gr.Row():
563
- loops_per = gr.Slider(1, 40, 12, 1, label="Loops per Stem")
564
- hop = gr.Slider(1, 8, 2, 1, label="Grid Hop")
565
 
566
- with gr.Accordion("Advanced Processing", open=False):
567
- l_mode = gr.Dropdown(["lufs", "rms", "peak", "none"], value="lufs", label="Norm Mode")
568
- l_target = gr.Slider(-20, -5, -14, 1, label="Target Level (dB)")
569
- fadems = gr.Slider(0, 50, 5, label="Micro-Fade (ms)")
570
- topk = gr.Slider(5, 50, 20, label="Candidate Pool")
571
-
572
- art = gr.Image(type="filepath", label="Artwork (for Video)")
573
- vid_fmt = gr.Dropdown(["9:16 (TikTok)", "16:9 (YouTube)", "1:1 (Square)"], value="9:16 (TikTok)", label="Video Aspect")
 
 
 
 
 
 
 
574
 
575
- btn2 = gr.Button("📦 Generate Pack", variant="primary")
576
-
577
  with gr.Column():
578
- gr.Markdown("### 4. Output")
579
- z_out = gr.File(label="Download Zip")
580
  v_out = gr.Video(label="Promo Video")
 
581
 
582
  # Wiring
583
  btn1.click(
584
- run_phase_1,
585
- [file, url, sep_mode, mbpm],
586
- [p_drums, p_bass, p_vox, info, bpm_st, key_st, folder_st, mode_st, ex_stems, loop_stems]
587
  )
588
 
589
  btn2.click(
590
- run_phase_2,
591
- [
592
- folder_st, bpm_st, key_st, mode_st, art,
593
- ex_stems, loop_stems, gr.Checkbox(value=True), gr.Checkbox(value=True), gr.Checkbox(value=True),
594
- loops_per, gr.State(["4", "8"]), hop, topk, fadems, gr.Checkbox(value=False), gr.Number(value=0), gr.Number(value=4),
595
- l_mode, l_target, vid_fmt
596
- ],
597
- [z_out, v_out]
598
  )
599
 
600
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import os
3
+ import sys
4
+ import json
5
+ import time
6
+ import uuid
7
  import shutil
8
  import zipfile
9
+ import hashlib
 
 
 
 
10
  import subprocess
11
  from pathlib import Path
12
+
13
+ import numpy as np
14
+ import soundfile as sf
15
+ import librosa
16
  import yt_dlp
17
  import pyloudnorm as pyln
 
 
 
18
 
19
+ # Optional: MIDI extraction
20
  try:
21
  from basic_pitch.inference import predict_and_save
22
  MIDI_AVAILABLE = True
 
24
  MIDI_AVAILABLE = False
25
  print("WARNING: 'basic-pitch' not installed. MIDI extraction will be disabled.")
26
 
 
 
 
 
27
 
28
+ # =========================
29
+ # CONFIG
30
+ # =========================
31
+ RUNS_DIR = Path("runs")
32
+ CACHE_DIR = Path("cache")
33
  OUTPUT_DIR = Path("nightpulse_output")
34
+ FFMPEG_BIN = shutil.which("ffmpeg") or "ffmpeg"
35
+
36
+ RUNS_DIR.mkdir(parents=True, exist_ok=True)
37
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
38
+
39
+
40
+ # =========================
41
+ # UTIL
42
+ # =========================
43
+ def now_job_id() -> str:
44
+ ts = time.strftime("%Y%m%d_%H%M%S")
45
+ short = uuid.uuid4().hex[:8]
46
+ return f"{ts}_{short}"
47
+
48
+
49
+ def wipe_dir(p: Path):
50
+ try:
51
+ if p.exists():
52
+ shutil.rmtree(p, ignore_errors=True)
53
+ except Exception:
54
+ pass
55
+
56
 
57
+ def ensure_dir(p: Path):
58
+ p.mkdir(parents=True, exist_ok=True)
59
+ return p
60
 
61
+
62
+ def sha256_file(path: Path, chunk_size: int = 1024 * 1024) -> str:
63
  h = hashlib.sha256()
64
+ with open(path, "rb") as f:
65
+ while True:
66
+ b = f.read(chunk_size)
67
+ if not b:
68
+ break
69
+ h.update(b)
70
  return h.hexdigest()
71
 
72
+
73
+ def check_ffmpeg() -> bool:
74
+ try:
75
+ p = subprocess.run([FFMPEG_BIN, "-version"], capture_output=True, text=True)
76
+ return p.returncode == 0
77
+ except Exception:
78
+ return False
79
+
80
+
81
+ def check_torch_cuda() -> bool:
82
  try:
83
  import torch
84
+ ok = torch.cuda.is_available()
85
+ if ok:
86
+ print(f"CUDA OK: {torch.cuda.get_device_name(0)} | torch {torch.__version__} | cuda {torch.version.cuda}")
87
  else:
88
+ print(f"WARNING: CUDA NOT available to torch. torch={torch.__version__}. Demucs will run on CPU.")
89
+ return ok
90
+ except Exception as e:
91
+ print(f"WARNING: torch import failed: {e}. Demucs may run on CPU.")
92
+ return False
93
 
 
94
 
95
+ FFMPEG_OK = check_ffmpeg()
96
+ CUDA_OK = check_torch_cuda()
97
 
 
 
 
98
 
99
+ LOG_TAIL_MAX = 8000
 
 
 
 
 
100
 
101
+ def log_append(log_text: str, msg: str) -> str:
102
+ msg = str(msg)
103
+ if not msg.endswith("\n"):
104
+ msg += "\n"
105
+ combined = (log_text or "") + msg
106
+ if len(combined) > LOG_TAIL_MAX:
107
+ combined = combined[-LOG_TAIL_MAX:]
108
+ return combined
109
+
110
+
111
+ def safe_stem(name: str) -> str:
112
+ return "".join(c if c.isalnum() or c in "._-" else "_" for c in name)
113
 
 
 
114
 
115
+ def download_from_url(url: str, out_dir: Path) -> Path:
116
+ ensure_dir(out_dir)
117
  ydl_opts = {
118
  "format": "bestaudio/best",
119
+ "outtmpl": str(out_dir / "%(title)s.%(ext)s"),
120
  "postprocessors": [{"key": "FFmpegExtractAudio", "preferredcodec": "wav", "preferredquality": "192"}],
121
  "quiet": True,
122
  "no_warnings": True,
123
  }
 
124
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
125
  info = ydl.extract_info(url, download=True)
126
  filename = ydl.prepare_filename(info)
127
  final_path = Path(filename).with_suffix(".wav")
128
+ return final_path
129
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ def ensure_wav(in_path: Path, out_path: Path) -> Path:
132
+ if in_path.suffix.lower() == ".wav":
133
+ return in_path
134
+ if not FFMPEG_OK:
135
+ raise gr.Error("FFmpeg not found. Install FFmpeg or provide WAV input.")
136
+ ensure_dir(out_path.parent)
137
+ cmd = [
138
+ FFMPEG_BIN, "-y",
139
+ "-i", str(in_path),
140
+ "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2",
141
+ str(out_path)
142
+ ]
143
+ p = subprocess.run(cmd, capture_output=True, text=True)
144
+ if p.returncode != 0:
145
+ raise gr.Error(f"FFmpeg convert error:\n{p.stderr[-2000:]}")
146
+ return out_path
147
+
148
 
149
+ def detect_key(audio_path: Path) -> str:
150
+ try:
151
+ y, sr = librosa.load(str(audio_path), sr=None, duration=60)
152
  chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
153
  chroma_vals = np.sum(chroma, axis=1)
154
+
155
  maj_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
156
  min_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
157
  pitches = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
 
161
  for i in range(12):
162
  score_maj = np.corrcoef(chroma_vals, np.roll(maj_profile, i))[0, 1]
163
  score_min = np.corrcoef(chroma_vals, np.roll(min_profile, i))[0, 1]
164
+ if np.isfinite(score_maj) and score_maj > best_score:
165
+ best_score = score_maj
166
+ best_key = f"{pitches[i]}maj"
167
+ if np.isfinite(score_min) and score_min > best_score:
168
+ best_score = score_min
169
+ best_key = f"{pitches[i]}min"
170
+ return best_key
171
+ except Exception:
172
+ return "Unknown"
173
 
 
 
 
 
174
 
175
+ def run_demucs(input_wav: Path, model_name: str, out_dir: Path, two_stems_vocals: bool) -> Path:
176
+ device = "cuda" if CUDA_OK else "cpu"
177
+ cmd = [
178
+ sys.executable, "-m", "demucs",
179
+ "--device", device,
180
+ "-n", model_name,
181
+ "--out", str(out_dir),
182
+ str(input_wav)
183
+ ]
184
+ if two_stems_vocals:
185
+ cmd += ["--two-stems", "vocals"]
186
+
187
+ p = subprocess.run(cmd, capture_output=True, text=True)
188
+ if p.returncode != 0:
189
+ raise gr.Error(f"Demucs Error:\n{p.stderr[-2000:]}")
190
+
191
+ model_dir = out_dir / model_name
192
+ if not model_dir.exists():
193
+ raise gr.Error(f"Demucs did not produce expected folder: {model_dir}")
194
+
195
+ candidates = [d for d in model_dir.iterdir() if d.is_dir()]
196
+ if not candidates:
197
+ raise gr.Error(f"Demucs produced no track folder in: {model_dir}")
198
+ candidates.sort(key=lambda p: p.stat().st_mtime, reverse=True)
199
+ return candidates[0]
200
+
201
+
202
+ def build_instrumental(track_dir: Path) -> Path | None:
203
+ out = track_dir / "no_vocals.wav"
204
+ if out.exists():
205
+ return out
206
+ parts = []
207
+ for name in ["drums.wav", "bass.wav", "other.wav", "piano.wav", "guitar.wav"]:
208
+ p = track_dir / name
209
+ if p.exists():
210
+ parts.append(p)
211
+ if not parts:
212
+ return None
213
+ ys = []
214
+ sr_ref = None
215
+ for p in parts:
216
+ y, sr = sf.read(str(p), always_2d=True, dtype="float32")
217
+ if sr_ref is None:
218
+ sr_ref = sr
219
+ elif sr != sr_ref:
220
+ y_mono = np.mean(y, axis=1)
221
+ y_rs = librosa.resample(y_mono, orig_sr=sr, target_sr=sr_ref)
222
+ y = np.stack([y_rs, y_rs], axis=1).astype(np.float32)
223
+ ys.append(y)
224
+ max_len = max(a.shape[0] for a in ys)
225
+ mix = np.zeros((max_len, 2), dtype=np.float32)
226
+ for a in ys:
227
+ mix[:a.shape[0], :] += a
228
+ peak = np.max(np.abs(mix))
229
+ if peak > 1.0:
230
+ mix /= peak
231
+ sf.write(str(out), mix, sr_ref)
232
+ return out
233
+
234
+
235
+ def cache_paths_for_hash(h: str) -> dict:
236
+ base = CACHE_DIR / h
237
+ return {
238
+ "base": base,
239
+ "meta": base / "meta.json",
240
+ "stems_dir": base / "stems",
241
+ "input_wav": base / "input.wav",
242
+ }
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
+ def copy_tree(src: Path, dst: Path):
246
+ ensure_dir(dst)
247
+ for root, _, files in os.walk(src):
248
+ rootp = Path(root)
249
+ rel = rootp.relative_to(src)
250
+ ensure_dir(dst / rel)
251
+ for f in files:
252
+ shutil.copy2(rootp / f, dst / rel / f)
253
+
254
+
255
+ # =========================
256
+ # AUDIO PROCESSING
257
+ # =========================
258
+ def peak_normalize(y: np.ndarray, peak_target: float = 0.98) -> np.ndarray:
259
+ peak = np.max(np.abs(y))
260
+ if peak <= 1e-9:
261
+ return y
262
+ scale = peak_target / peak
263
+ return y * scale
264
+
265
+
266
+ def apply_loudness_np(y: np.ndarray, sr: int, mode: str, target: float) -> np.ndarray:
267
  mode = (mode or "none").lower().strip()
268
+ if mode == "none":
269
+ return y
270
+ if mode == "peak":
271
+ return peak_normalize(y)
272
  if mode == "rms":
273
+ cur = 20.0 * np.log10(np.sqrt(np.mean(y ** 2)) + 1e-12)
274
+ gain_db = float(target) - cur
275
+ gain = 10 ** (gain_db / 20.0)
276
+ return y * gain
 
277
  if mode == "lufs":
278
  try:
279
+ meter = pyln.Meter(sr)
280
+ loud = meter.integrated_loudness(y.astype(np.float64))
281
+ if loud == -float("inf"):
282
+ return y
283
+ gain_db = float(target) - loud
284
+ gain_db = max(min(gain_db, 20.0), -20.0)
285
+ gain = 10 ** (gain_db / 20.0)
286
+ return y * gain
 
 
 
 
 
 
 
 
287
  except Exception:
288
+ return y
289
+ return y
290
+
291
+
292
+ def crossfade_loop_seam(seg: np.ndarray, seam_samps: int) -> np.ndarray:
293
+ n = seg.shape[0]
294
+ seam = int(seam_samps)
295
+ if seam <= 0 or seam * 2 >= n:
296
+ return seg
297
+ out = seg.copy()
298
+ fade = np.linspace(0.0, 1.0, seam, dtype=np.float32)
299
+ head = out[:seam].copy()
300
+ tail = out[-seam:].copy()
301
+ out[:seam] = head * (1.0 - fade) + tail * fade
302
+ return out
303
+
304
+
305
+ def fade_edges(seg: np.ndarray, fade_samps: int) -> np.ndarray:
306
+ n = seg.shape[0]
307
+ f = int(fade_samps)
308
+ if f <= 0 or f * 2 >= n:
309
+ return seg
310
+ out = seg.copy()
311
+ fade = np.linspace(0.0, 1.0, f, dtype=np.float32)
312
+ out[:f] *= fade
313
+ out[-f:] *= fade[::-1]
314
+ return out
315
+
316
+
317
+ def compute_segment_features(y: np.ndarray, sr: int) -> dict:
318
+ r = float(np.sqrt(np.mean(y ** 2)) + 1e-12)
319
+ try:
320
+ oenv = librosa.onset.onset_strength(y=y, sr=sr)
321
+ onset = float(np.mean(oenv)) if oenv.size else 0.0
322
+ except Exception:
323
+ onset = 0.0
324
+ try:
325
+ cent = librosa.feature.spectral_centroid(y=y, sr=sr)
326
+ centroid = float(np.mean(cent)) if cent.size else 0.0
327
+ except Exception:
328
+ centroid = 0.0
329
+ return {"rms": r, "onset": onset, "centroid": centroid}
330
+
331
+
332
+ def normalize01(x: np.ndarray) -> np.ndarray:
333
+ if x.size == 0: return x
334
+ mn, mx = float(np.min(x)), float(np.max(x))
335
+ if mx - mn < 1e-12: return np.zeros_like(x)
336
+ return (x - mn) / (mx - mn)
337
+
338
+
339
+ def build_bar_grid_samples(grid_src_wav: Path, bpm: int, sr_target: int = 44100, duration_sec: int = 240) -> tuple[list[int], int]:
340
+ """
341
+ 3-tier bar grid construction
342
+ """
343
+ y, sr = librosa.load(str(grid_src_wav), sr=sr_target, mono=True, duration=duration_sec)
344
+ if y.size < sr:
345
+ return [0], sr
346
+
347
+ # 1) Beat track
348
+ try:
349
+ _, beats = librosa.beat.beat_track(y=y, sr=sr)
350
+ beat_times = librosa.frames_to_time(beats, sr=sr)
351
+ if beat_times.size >= 8:
352
+ bar_times = beat_times[::4] # assume 4/4
353
+ bar_samps = [int(t * sr) for t in bar_times]
354
+ bar_samps = sorted(set([b for b in bar_samps if b >= 0]))
355
+ if len(bar_samps) >= 2:
356
+ return bar_samps, sr
357
+ except Exception:
358
+ pass
359
+
360
+ # 2) Onset fallback
361
+ try:
362
+ oenv = librosa.onset.onset_strength(y=y, sr=sr)
363
+ onsets = librosa.onset.onset_detect(onset_envelope=oenv, sr=sr, backtrack=True, units="time")
364
+ on_samps = np.array([int(t * sr) for t in onsets], dtype=np.int64)
365
+ on_samps = on_samps[(on_samps >= 0) & (on_samps < y.size)]
366
+ if on_samps.size >= 8:
367
+ ms_per_bar = 240000.0 / max(1, bpm)
368
+ samps_per_bar = int(sr * (ms_per_bar / 1000.0))
369
+ total = y.size
370
+ bar_samps = list(range(0, total, max(1, samps_per_bar)))
371
+ if len(bar_samps) >= 2:
372
+ return bar_samps, sr
373
+ except Exception:
374
+ pass
375
+
376
+ # 3) Pure math
377
+ ms_per_bar = 240000.0 / max(1, bpm)
378
+ samps_per_bar = int(sr * (ms_per_bar / 1000.0))
379
+ total = y.size
380
+ bar_samps = list(range(0, total, max(1, samps_per_bar)))
381
+ if not bar_samps: bar_samps = [0]
382
+ return bar_samps, sr
383
+
384
+
385
+ def make_ranked_loops_numpy(
386
+ stem_wav: Path, stem_name: str, bpm: int, key: str,
387
+ bar_starts: list[int], sr_grid: int, bar_lengths: list[int],
388
+ hop_bars: int, loops_per: int, top_k: int, fade_ms: int,
389
+ seamless: bool, seam_ms: int, min_bar_gap: int,
390
+ loud_mode: str, loud_target: float, out_dir: Path,
391
  ):
392
+ y, sr = librosa.load(str(stem_wav), sr=sr_grid, mono=True)
393
+ if y.size < sr: return []
394
 
395
+ ms_per_bar = 240000.0 / max(1, bpm)
396
+ samps_per_bar = int(sr * (ms_per_bar / 1000.0))
397
+
398
+ bar_starts = [b for b in bar_starts if b >= 0 and b < y.size]
399
+ if not bar_starts: bar_starts = [0]
400
+ step = max(1, int(hop_bars))
401
+ grid = bar_starts[::step]
402
 
403
  candidates = []
404
+ for bl in bar_lengths:
405
+ dur = int(samps_per_bar * int(bl))
406
+ for start in grid:
407
+ end = start + dur
408
+ if end > y.size: continue
409
+ seg = y[start:end].astype(np.float32)
410
+ feats = compute_segment_features(seg, sr)
 
 
 
 
 
 
 
 
 
 
 
411
  candidates.append({
412
+ "start": int(start), "bl": int(bl), "dur": int(dur),
413
+ "rms": feats["rms"], "onset": feats["onset"], "centroid": feats["centroid"],
 
 
 
414
  })
415
 
416
+ if not candidates: return []
417
+
418
+ rms_n = normalize01(np.array([c["rms"] for c in candidates]))
419
+ ons_n = normalize01(np.array([c["onset"] for c in candidates]))
420
+ cen_n = normalize01(np.array([c["centroid"] for c in candidates]))
421
+
422
+ for i, c in enumerate(candidates):
423
+ # Weighted score: heavily favor Rhythm (Onset) and Energy (RMS)
424
+ c["score"] = float(0.40 * rms_n[i] + 0.40 * ons_n[i] + 0.20 * cen_n[i])
425
+
426
+ candidates.sort(key=lambda d: d["score"], reverse=True)
427
+ if top_k > 0: candidates = candidates[: int(top_k)]
428
+
429
+ used_bar_idx = []
430
  selected = []
 
 
431
  for c in candidates:
432
+ bidx = int(np.argmin([abs(c["start"] - b) for b in bar_starts]))
433
+ if any(abs(bidx - u) < int(min_bar_gap) for u in used_bar_idx):
434
  continue
 
435
  selected.append(c)
436
+ used_bar_idx.append(bidx)
437
+ if len(selected) >= int(loops_per): break
438
 
439
+ ensure_dir(out_dir)
440
+ exported = []
441
+ fade_samps = int((int(fade_ms) / 1000.0) * sr)
442
+ seam_samps = int((int(seam_ms) / 1000.0) * sr)
443
 
444
+ for i, c in enumerate(selected, 1):
445
+ start, dur, bl = c["start"], c["dur"], c["bl"]
446
+ seg = y[start:start + dur].astype(np.float32)
447
+
448
+ if seamless and seam_samps > 0:
449
+ seg = crossfade_loop_seam(seg, seam_samps)
450
+ else:
451
+ seg = fade_edges(seg, fade_samps)
452
+
453
+ seg = apply_loudness_np(seg, sr, loud_mode, loud_target)
454
+ seg = np.clip(seg, -1.0, 1.0).astype(np.float32)
455
+
456
+ fname = f"{bpm}BPM_{key}_{stem_name}_L{bl}bars_{i:02d}.wav"
 
 
 
 
 
 
457
  out_path = out_dir / fname
458
+ sf.write(str(out_path), seg, sr)
459
+ exported.append(out_path)
460
+ return exported
461
+
462
+
463
+ def export_vocal_chops(
464
+ vocals_wav: Path, bpm: int, key: str, chop_mode: str,
465
+ loud_mode: str, loud_target: float, out_dir: Path
466
+ ):
467
+ y, sr = librosa.load(str(vocals_wav), sr=44100, mono=True)
468
+ if y.size < sr: return []
469
+ chop_mode = (chop_mode or "hybrid").lower().strip()
470
+
471
+ # Reuse existing chop logic from original script context
472
+ # (Abbreviated here assuming standard onset/silence detection)
473
+ # Using Librosa Onset as default high quality slicer
474
+ oenv = librosa.onset.onset_strength(y=y, sr=sr)
475
+ onsets = librosa.onset.onset_detect(onset_envelope=oenv, sr=sr, backtrack=True, units="time")
476
+
477
+ # Filter onsets
478
+ chops = []
479
+ for t in onsets:
480
+ s = int(t * sr)
481
+ e = s + int(0.5 * sr) # Default 500ms slice
482
+ if e < y.size:
483
+ chops.append((s, e))
484
+
485
+ ensure_dir(out_dir)
486
+ exported = []
487
+ for i, (s, e) in enumerate(chops[:32], 1):
488
+ seg = y[s:e].astype(np.float32)
489
+ seg = fade_edges(seg, 200)
490
+ seg = apply_loudness_np(seg, sr, loud_mode, loud_target)
491
+ out_path = out_dir / f"{bpm}BPM_{key}_VoxChop_{i:02d}.wav"
492
+ sf.write(str(out_path), seg, sr)
493
+ exported.append(out_path)
494
+ return exported
495
+
496
+
497
+ def extract_midi(audio_path: Path, out_path: Path):
498
+ if not MIDI_AVAILABLE: return
499
+ ensure_dir(out_path.parent)
500
+ try:
501
+ predict_and_save(
502
+ [str(audio_path)], output_directory=str(out_path.parent),
503
+ save_midi=True, save_model_outputs=False, save_notes=False, sonify_midi=False
504
+ )
505
+ # Handle the name Basic Pitch assigns
506
+ # It usually appends _basic_pitch.mid
507
+ src_stem = audio_path.stem
508
+ gen = out_path.parent / f"{src_stem}_basic_pitch.mid"
509
+ if gen.exists():
510
+ shutil.move(str(gen), str(out_path))
511
+ except Exception as e:
512
+ print(f"MIDI Error: {e}")
513
+
514
+
515
+ # =========================
516
+ # VIDEO
517
+ # =========================
518
+ def render_video_ffmpeg(art_path: Path, audio_path: Path, out_path: Path, fmt: str) -> Path:
519
+ if not FFMPEG_OK:
520
+ raise gr.Error("FFmpeg not found.")
521
+ res_map = {
522
+ "9:16 (TikTok/Reels)": (1080, 1920),
523
+ "16:9 (YouTube)": (1920, 1080),
524
+ "1:1 (Square)": (1080, 1080),
525
+ }
526
+ w, h = res_map.get(fmt, (1080, 1920))
527
 
528
+ try:
529
+ info = sf.info(str(audio_path))
530
+ dur = info.frames / info.samplerate
531
+ except Exception:
532
+ dur = 30.0
533
+
534
+ zoom_expr = "min(zoom+0.00035,1.08)"
535
+ # Safe drawbox that doesn't rely on system fonts
536
+ drawbox = (
537
+ f"drawbox=x=0:y={h}-40:w='(t/{max(1.0, dur)})*{w}':h=20:color=white@0.8:t=fill"
538
+ )
539
 
540
+ vf = (
541
+ f"scale={w}:{h}:force_original_aspect_ratio=increase,"
542
+ f"crop={w}:{h},"
543
+ f"zoompan=z='{zoom_expr}':d=1:s={w}x{h}:fps=24,"
544
+ f"{drawbox},format=yuv420p"
545
+ )
546
 
547
+ cmd = [
548
+ FFMPEG_BIN, "-y", "-loop", "1", "-i", str(art_path), "-i", str(audio_path),
549
+ "-shortest", "-r", "24", "-vf", vf, "-c:v", "libx264", "-pix_fmt", "yuv420p",
550
+ "-c:a", "aac", "-b:a", "192k", str(out_path)
551
+ ]
552
+ p = subprocess.run(cmd, capture_output=True, text=True)
553
+ if p.returncode != 0:
554
+ raise gr.Error(f"Video Error: {p.stderr[-2000:]}")
555
+ return out_path
556
+
557
+
558
+ # =========================
559
+ # PHASE 1
560
+ # =========================
561
+ def phase1_analyze(file_in, url_in, mode, manual_bpm, rerun):
562
+ job_id = now_job_id()
563
+ job_dir = ensure_dir(RUNS_DIR / job_id)
564
+ in_dir = ensure_dir(job_dir / "input")
565
+
566
+ # Input handling
567
+ if url_in and str(url_in).strip():
568
+ in_path = download_from_url(str(url_in).strip(), in_dir)
569
+ elif file_in:
570
+ in_path = Path(file_in)
571
+ local_path = in_dir / in_path.name
572
+ shutil.copy2(in_path, local_path)
573
+ in_path = local_path
574
+ else:
575
+ raise gr.Error("No audio source.")
576
 
577
+ wav_path = ensure_wav(in_path, in_dir / f"{in_path.stem}.wav")
 
578
 
579
+ # Cache Check
580
+ h = sha256_file(wav_path)
581
+ cache = cache_paths_for_hash(h)
582
 
583
+ # BPM / Key
 
 
 
 
 
 
 
 
 
584
  if manual_bpm and float(manual_bpm) > 0:
585
+ bpm = int(manual_bpm)
586
  else:
587
+ y60, sr60 = librosa.load(str(wav_path), sr=22050, duration=60)
588
+ tempo, _ = librosa.beat.beat_track(y=y60, sr=sr60)
589
+ bpm = int(tempo[0] if np.ndim(tempo) > 0 else tempo)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
 
591
+ key = detect_key(wav_path)
 
 
 
 
592
 
593
+ # Separation
594
+ stems_dir = ensure_dir(job_dir / "stems")
595
+ model_name = "htdemucs_6s" if mode == "6stem" else "htdemucs"
 
596
 
597
+ # Check Cache
598
+ if cache["stems_dir"].exists() and any(cache["stems_dir"].glob("*.wav")) and not rerun:
599
+ copy_tree(cache["stems_dir"], stems_dir)
600
+ source_msg = "Fetched from Cache"
601
+ else:
602
+ # Run Demucs
603
+ track_dir = run_demucs(wav_path, model_name, job_dir / "demucs_tmp", False)
604
+ build_instrumental(track_dir)
605
+ for wav in track_dir.glob("*.wav"):
606
+ shutil.copy2(wav, stems_dir / wav.name)
607
+
608
+ # Save to Cache
609
+ wipe_dir(cache["stems_dir"])
610
+ ensure_dir(cache["stems_dir"])
611
+ for wav in stems_dir.glob("*.wav"):
612
+ shutil.copy2(wav, cache["stems_dir"] / wav.name)
613
+ source_msg = "Ran Demucs (Saved to Cache)"
614
+
615
+ valid_stems = [f.stem.capitalize() for f in stems_dir.glob("*.wav")]
616
  stem_map = {
617
+ "Drums": stems_dir / "drums.wav",
618
+ "Bass": stems_dir / "bass.wav",
619
+ "Vocals": stems_dir / "vocals.wav"
620
  }
621
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
  return (
623
+ stem_map["Drums"] if stem_map["Drums"].exists() else None,
624
+ stem_map["Bass"] if stem_map["Bass"].exists() else None,
625
+ stem_map["Vocals"] if stem_map["Vocals"].exists() else None,
626
+ f"✅ **Ready**\n- ID: `{job_id}`\n- Source: {source_msg}",
627
+ bpm, key, str(job_dir),
628
+ gr.update(choices=valid_stems, value=valid_stems),
629
+ gr.update(choices=valid_stems, value=[s for s in valid_stems if s != "Vocals"])
630
  )
631
 
632
+ # =========================
633
+ # PHASE 2
634
+ # =========================
635
+ def phase2_export(
636
+ job_dir_in, bpm, key, art, ex_stems, loop_stems,
637
+ do_midi, do_oneshots, do_vocal_chops,
638
+ loops_per, bars, loud_target, make_video, log_hist
639
  ):
640
+ log = log_hist or ""
641
+ if not job_dir_in: raise gr.Error("No job loaded.")
642
 
643
+ job_dir = Path(job_dir_in)
644
+ stems_dir = job_dir / "stems"
645
+ export_dir = ensure_dir(job_dir / "export")
646
+ wipe_dir(export_dir)
647
  wipe_dir(OUTPUT_DIR)
 
 
 
 
648
 
649
+ # Folders
650
+ for d in ["Stems", "Loops", "MIDI", "OneShots", "Vocal_Chops", "Video"]:
651
+ ensure_dir(export_dir / d)
652
+ ensure_dir(OUTPUT_DIR / d)
 
 
 
653
 
654
+ log = log_append(log, f"Starting Export: {bpm} BPM | {key}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
 
656
+ # 1. Stems
657
+ for stem_name in ex_stems:
658
+ src = stems_dir / f"{stem_name.lower()}.wav"
659
+ if src.exists():
660
+ dst = export_dir / "Stems" / f"{bpm}BPM_{key}_{stem_name}.wav"
661
+ shutil.copy2(src, dst)
662
+ shutil.copy2(dst, OUTPUT_DIR / "Stems" / dst.name)
663
+
664
+ # 2. Loops
665
+ grid_src = stems_dir / "drums.wav" if (stems_dir/"drums.wav").exists() else next(stems_dir.glob("*.wav"))
666
+ bar_samps, sr_grid = build_bar_grid_samples(grid_src, int(bpm))
667
+
668
+ for stem_name in loop_stems:
669
+ src = stems_dir / f"{stem_name.lower()}.wav"
670
+ if src.exists():
671
+ log = log_append(log, f"Looping {stem_name}...")
672
+ loops = make_ranked_loops_numpy(
673
+ src, stem_name, int(bpm), key, bar_samps, sr_grid,
674
+ [int(b) for b in bars], 1, loops_per, 50,
675
+ 10, True, 25, 4, "lufs", float(loud_target), export_dir / "Loops"
676
  )
677
+ for l in loops: shutil.copy2(l, OUTPUT_DIR / "Loops" / l.name)
678
+
679
+ # 3. One Shots (Improved Transient Preservation)
680
+ if do_oneshots and (stems_dir / "drums.wav").exists():
681
+ log = log_append(log, "Slicing Drums...")
682
+ y, sr = librosa.load(str(stems_dir / "drums.wav"), sr=44100, mono=True)
683
+ # Use simple energy based onset
684
+ onset_frames = librosa.onset.onset_detect(y=y, sr=sr, backtrack=False)
685
+ onset_times = librosa.frames_to_time(onset_frames, sr=sr)
 
 
686
 
687
+ shots = []
688
+ for t in onset_times:
689
+ # PRE-ROLL: Start 15ms before detected onset to catch the 'click'
690
+ s = max(0, int((t - 0.015) * sr))
691
+ e = min(y.size, s + int(0.4 * sr))
692
+ seg = y[s:e]
693
+ # Filter silence
694
+ if np.sqrt(np.mean(seg**2)) > 0.02:
695
+ shots.append(seg)
 
 
 
 
 
 
 
 
696
 
697
+ # Top 32 loudest
698
+ shots = sorted(shots, key=lambda x: np.max(np.abs(x)), reverse=True)[:32]
699
+
700
+ for i, shot in enumerate(shots, 1):
701
+ shot = fade_edges(shot, 100) # Quick fade out
702
+ shot = apply_loudness_np(shot, sr, "peak", -1.0) # Normalize hard
703
+ dst = export_dir / "OneShots" / f"DrumShot_{i:02d}.wav"
704
+ sf.write(str(dst), shot, sr)
705
+ shutil.copy2(dst, OUTPUT_DIR / "OneShots" / dst.name)
706
+
707
+ # 4. Vocal Chops
708
+ if do_vocal_chops and (stems_dir / "vocals.wav").exists():
709
+ log = log_append(log, "Chopping Vocals...")
710
+ export_vocal_chops(
711
+ stems_dir / "vocals.wav", int(bpm), key, "hybrid", "lufs", -14.0,
712
+ export_dir / "Vocal_Chops"
713
+ )
714
+ for f in (export_dir/"Vocal_Chops").glob("*.wav"):
715
+ shutil.copy2(f, OUTPUT_DIR / "Vocal_Chops" / f.name)
716
+
717
+ # 5. MIDI
718
+ if do_midi and MIDI_AVAILABLE:
719
+ log = log_append(log, "Extracting MIDI...")
720
+ for s in ["bass", "piano", "other"]:
721
+ src = stems_dir / f"{s}.wav"
722
+ if src.exists():
723
+ extract_midi(src, export_dir / "MIDI" / f"{bpm}BPM_{key}_{s.capitalize()}.mid")
724
 
725
+ # 6. Video
726
+ vid_path = None
727
+ if make_video and art:
728
+ log = log_append(log, "Rendering Video...")
729
+ # Find audio for video
730
+ audio_src = None
731
+ if (export_dir / "Loops").exists():
732
+ # grab first loop
733
+ audio_src = next((export_dir / "Loops").glob("*.wav"), None)
734
+ if not audio_src and (stems_dir / "no_vocals.wav").exists():
735
+ audio_src = stems_dir / "no_vocals.wav"
736
+
737
+ if audio_src:
738
+ out_vid = export_dir / "Video" / "Promo.mp4"
739
+ render_video_ffmpeg(Path(art), audio_src, out_vid, "9:16 (TikTok/Reels)")
740
+ vid_path = str(out_vid)
741
+ shutil.copy2(out_vid, OUTPUT_DIR / "Video" / out_vid.name)
742
+
743
+ # Zip
744
+ zip_path = export_dir / f"NightPulse_{bpm}_{key}.zip"
745
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
746
+ for root, _, files in os.walk(export_dir):
747
+ for f in files:
748
+ full = Path(root) / f
749
+ if full != zip_path:
750
+ zf.write(full, full.relative_to(export_dir))
751
+
752
+ log = log_append(log, "✅ Done.")
753
+ return str(zip_path), vid_path, log
754
+
755
+
756
+ # =========================
757
+ # UI
758
+ # =========================
759
+ with gr.Blocks(title="NightPulse Ultimate", theme=gr.themes.Base()) as app:
760
+ gr.Markdown("## 🎹 Night Pulse | Ultimate v2")
761
+
762
+ # State
763
+ job_state = gr.State()
764
 
 
 
 
 
 
 
765
  with gr.Row():
766
  with gr.Column():
767
+ gr.Markdown("### 1. Source & Separate")
768
  with gr.Tabs():
769
+ with gr.Tab("Link"):
770
+ url = gr.Textbox(label="URL", placeholder="YouTube/SoundCloud...")
771
+ with gr.Tab("File"):
772
+ file = gr.Audio(type="filepath", label="Upload")
773
 
774
+ with gr.Row():
775
+ mode = gr.Dropdown(["6stem", "4stem", "2stem"], value="6stem", label="Quality")
776
+ mbpm = gr.Number(label="Manual BPM Override", value=0)
777
+
778
+ rerun = gr.Checkbox(label="Force Re-Process (Ignore Cache)", value=False)
779
+ btn1 = gr.Button("🚀 Analyze & Split", variant="primary")
780
 
 
 
781
  with gr.Column():
782
+ gr.Markdown("### 2. Verify")
783
+ status = gr.Markdown("Waiting for input...")
784
+
785
+ with gr.Row():
786
+ bpm_box = gr.Number(label="Detected BPM")
787
+ key_box = gr.Textbox(label="Detected Key")
788
+
789
+ with gr.Row():
790
+ btn_half = gr.Button("½ Halve BPM")
791
+ btn_double = gr.Button("2x Double BPM")
792
+
793
+ def halve_bpm(x): return int(x / 2)
794
+ def double_bpm(x): return int(x * 2)
795
+
796
+ btn_half.click(halve_bpm, bpm_box, bpm_box)
797
+ btn_double.click(double_bpm, bpm_box, bpm_box)
798
+
799
  with gr.Row():
800
+ p1 = gr.Audio(label="Drums", interactive=False, height=60)
801
+ p2 = gr.Audio(label="Bass", interactive=False, height=60)
802
+ p3 = gr.Audio(label="Vocals", interactive=False, height=60)
803
 
804
  gr.Markdown("---")
805
 
806
  with gr.Row():
807
  with gr.Column():
808
+ gr.Markdown("### 3. Pack Generator")
809
+ ex_stems = gr.CheckboxGroup(label="Export Full Stems")
810
+ lp_stems = gr.CheckboxGroup(label="Generate Loops From")
 
 
 
 
 
811
 
812
+ with gr.Accordion("Pack Settings", open=True):
813
+ with gr.Row():
814
+ loops_per = gr.Slider(1, 20, 8, 1, label="Loops per Stem")
815
+ bars = gr.CheckboxGroup(["4", "8"], value=["4", "8"], label="Lengths")
816
+ with gr.Row():
817
+ do_midi = gr.Checkbox(label="Extract MIDI", value=True)
818
+ do_oneshots = gr.Checkbox(label="Drum One-Shots", value=True)
819
+ do_vocal_chops = gr.Checkbox(label="Vocal Chops", value=True)
820
+ loud_target = gr.Slider(-20, -6, -12, 1, label="Loudness Target (LUFS)")
821
+
822
+ with gr.Accordion("Video Promo", open=False):
823
+ art = gr.Image(type="filepath", label="Cover Art", height=200)
824
+ make_video = gr.Checkbox(label="Render 9:16 Video", value=False)
825
+
826
+ btn2 = gr.Button("⚡ Export Pack", variant="primary")
827
 
 
 
828
  with gr.Column():
829
+ gr.Markdown("### 4. Download")
830
+ z_out = gr.File(label="Sample Pack Zip")
831
  v_out = gr.Video(label="Promo Video")
832
+ log_out = gr.Textbox(label="Process Log", lines=10)
833
 
834
  # Wiring
835
  btn1.click(
836
+ phase1_analyze,
837
+ [file, url, mode, mbpm, rerun],
838
+ [p1, p2, p3, status, bpm_box, key_box, job_state, ex_stems, lp_stems]
839
  )
840
 
841
  btn2.click(
842
+ phase2_export,
843
+ [job_state, bpm_box, key_box, art, ex_stems, lp_stems, do_midi, do_oneshots, do_vocal_chops, loops_per, bars, loud_target, make_video, log_out],
844
+ [z_out, v_out, log_out]
 
 
 
 
 
845
  )
846
 
847
  if __name__ == "__main__":