"""Ambience beds for LoFinity tapes. MusicGen ignores texture words ("vinyl crackle", "ocean waves"), so the background layer is mixed in here instead: a bed is rendered at song length and summed a few dB under the music. Lofi ambience loops through the whole track anyway, so nothing needs to be generated per song. vinyl_crackle and tape_hiss are synthesized procedurally (cheap, and never sound repeated); the other seven are loops in assets/ambience/.wav, rendered once by scripts/make_ambience.py and tiled with crossfades. A missing asset falls back to vinyl crackle so every tape still has texture. """ import wave from pathlib import Path import numpy as np ASSETS = Path(__file__).parent / "assets" / "ambience" # Bed RMS relative to the music RMS, in dB. Starting points — tune by ear: # spiky textures (crackle, fire) read louder than their RMS suggests. GAIN_DB = { "vinyl_crackle": -14.0, "tape_hiss": -18.0, "soft_rain": -14.0, "ocean_waves": -12.0, "fireplace_crackle": -14.0, "birdsong": -16.0, "night_crickets": -16.0, "cafe_murmur": -16.0, "wind_in_trees": -14.0, } DEFAULT = "vinyl_crackle" # Checked in order; first hit wins ("fireplace crackle" must match fire # before crackle can claim it for vinyl). _KEYWORDS = ( ("fire", "fireplace_crackle"), ("rain", "soft_rain"), ("wave", "ocean_waves"), ("ocean", "ocean_waves"), ("sea", "ocean_waves"), ("bird", "birdsong"), ("cricket", "night_crickets"), ("cafe", "cafe_murmur"), ("coffee", "cafe_murmur"), ("murmur", "cafe_murmur"), ("chatter", "cafe_murmur"), ("wind", "wind_in_trees"), ("tree", "wind_in_trees"), ("leaves", "wind_in_trees"), ("vinyl", "vinyl_crackle"), ("crackle", "vinyl_crackle"), ("static", "vinyl_crackle"), ("record", "vinyl_crackle"), ("hiss", "tape_hiss"), ("tape", "tape_hiss"), ("noise", "tape_hiss"), ) def normalize_slug(value) -> str: """Map whatever the LLM produced onto a known slug ("Ocean waves!" -> ocean_waves); anything unrecognizable becomes the default crackle.""" text = str(value or "").strip().lower() slug = text.replace(" ", "_").replace("-", "_") if slug in GAIN_DB: return slug for word, match in _KEYWORDS: if word in text: return match return DEFAULT # --- procedural beds ---------------------------------------------------------- def _lowpassed_noise(n: int, rate: int, cutoff: float, rng) -> np.ndarray: """Cheap dull noise: draw at ~2*cutoff and linearly upsample (the interpolation is the lowpass).""" low_rate = max(int(cutoff * 2), 200) m = max(int(n * low_rate / rate) + 2, 2) coarse = rng.standard_normal(m) return np.interp(np.arange(n) * (low_rate / rate), np.arange(m), coarse) def _vinyl_crackle(n: int, rate: int, rng) -> np.ndarray: """Dusty surface noise plus sparse pops, tiny pops, not loud.""" out = _lowpassed_noise(n, rate, 2500, rng) * 0.06 for pos in rng.integers(0, n, max(int(n / rate * 9), 1)): length = int(rate * rng.uniform(0.001, 0.004)) amp = rng.uniform(0.15, 1.0) ** 2 * np.sign(rng.standard_normal()) pop = amp * np.exp(-np.arange(length) / (length / 5)) end = min(pos + length, n) out[pos:end] += pop[: end - pos] return out def _tape_hiss(n: int, rate: int, rng) -> np.ndarray: white = rng.standard_normal(n) # first difference tilts the spectrum toward the highs, where hiss lives tilted = np.zeros(n) tilted[1:] = np.diff(white) hiss = 0.35 * white + 0.65 * tilted # slow wobble so it breathes like a real transport lfo = 0.3 # Hz phase = rng.uniform(0, 2 * np.pi) return hiss * (1.0 + 0.08 * np.sin(2 * np.pi * lfo * np.arange(n) / rate + phase)) _PROCEDURAL = {"vinyl_crackle": _vinyl_crackle, "tape_hiss": _tape_hiss} # --- sampled beds --------------------------------------------------------------- def _read_wav(path: Path) -> tuple[np.ndarray, int]: with wave.open(str(path), "rb") as w: rate, channels, width = w.getframerate(), w.getnchannels(), w.getsampwidth() raw = w.readframes(w.getnframes()) if width != 2: raise ValueError(f"{path.name}: expected 16-bit wav, got {width * 8}-bit") data = np.frombuffer(raw, dtype=" 1: data = data.reshape(-1, channels).mean(axis=1) return data, rate def _resample(data: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray: if src_rate == dst_rate: return data m = int(len(data) * dst_rate / src_rate) return np.interp(np.arange(m) * (src_rate / dst_rate), np.arange(len(data)), data) def _tile(loop: np.ndarray, n: int, rate: int) -> np.ndarray: """Repeat the loop out to n samples, crossfading each seam so it doesn't click. The loop does not need to be seamless. The fade uses equal-power (sqrt) ramps, not linear: the tail and head being blended are uncorrelated audio, so linear ramps would sum to ~3-6 dB below the surrounding level at the crossfade midpoint (an audible dip every loop). With sqrt ramps gain_out**2 + gain_in**2 == 1, holding power steady.""" if len(loop) >= n: return loop[:n].copy() fade = min(int(rate * 0.5), len(loop) // 4) if fade == 0: return np.tile(loop, n // len(loop) + 1)[:n] ramp = np.sqrt(np.linspace(0.0, 1.0, fade)) out = np.zeros(n + len(loop)) pos = 0 while pos < n: seg = loop.copy() if pos: seg[:fade] *= ramp seg[-fade:] *= ramp[::-1] out[pos : pos + len(seg)] += seg pos += len(loop) - fade return out[:n] # --- public API ----------------------------------------------------------------- def render(slug: str, n: int, rate: int) -> np.ndarray: """A peak-normalized bed of n samples at `rate`; the caller sets the level.""" if slug in _PROCEDURAL: bed = _PROCEDURAL[slug](n, rate, np.random.default_rng()) else: loop, loop_rate = _read_wav(ASSETS / f"{slug}.wav") bed = _tile(_resample(loop, loop_rate, rate), n, rate) return bed / (float(np.abs(bed).max()) or 1.0) def mix(music, rate: int, slug: str) -> np.ndarray: """Sum the ambience bed under the music at its slug's relative RMS level.""" slug = normalize_slug(slug) if slug not in _PROCEDURAL and not (ASSETS / f"{slug}.wav").exists(): print( f"[lofinity] no ambience asset for {slug!r} " "(run scripts/make_ambience.py), using vinyl crackle" ) slug = DEFAULT music = np.asarray(music, dtype=np.float64) music_rms = float(np.sqrt(np.mean(music**2))) if music_rms < 1e-6: # silence in, silence out return music bed = render(slug, len(music), rate) bed_rms = float(np.sqrt(np.mean(bed**2))) or 1.0 bed *= music_rms * 10 ** (GAIN_DB[slug] / 20) / bed_rms edge = min(int(rate * 0.75), len(bed) // 4) if edge: ramp = np.linspace(0.0, 1.0, edge) bed[:edge] *= ramp bed[-edge:] *= ramp[::-1] return music + bed