Spaces:
Running on Zero
Running on Zero
Add ambience generation features and assets. Introduced ambience.py for procedural and sampled ambience beds, updated app.py to integrate ambience selection into music generation, and modified requirements.txt to include new dependencies. Added scripts for fetching and rendering ambience samples, along with new audio assets and credits for attribution.
722a5d8 | """Ambience beds for LoFinity tapes. | |
| MusicGen ignores texture words ("vinyl crackle", "ocean waves"), so the | |
| background layer is mixed in here instead: a bed is rendered at song length | |
| and summed a few dB under the music. Lofi ambience loops through the whole | |
| track anyway, so nothing needs to be generated per song. | |
| vinyl_crackle and tape_hiss are synthesized procedurally (cheap, and never | |
| sound repeated); the other seven are loops in assets/ambience/<slug>.wav, | |
| rendered once by scripts/make_ambience.py and tiled with crossfades. A | |
| missing asset falls back to vinyl crackle so every tape still has texture. | |
| """ | |
| import wave | |
| from pathlib import Path | |
| import numpy as np | |
| ASSETS = Path(__file__).parent / "assets" / "ambience" | |
| # Bed RMS relative to the music RMS, in dB. Starting points — tune by ear: | |
| # spiky textures (crackle, fire) read louder than their RMS suggests. | |
| GAIN_DB = { | |
| "vinyl_crackle": -14.0, | |
| "tape_hiss": -18.0, | |
| "soft_rain": -14.0, | |
| "ocean_waves": -12.0, | |
| "fireplace_crackle": -14.0, | |
| "birdsong": -16.0, | |
| "night_crickets": -16.0, | |
| "cafe_murmur": -16.0, | |
| "wind_in_trees": -14.0, | |
| } | |
| DEFAULT = "vinyl_crackle" | |
| # Checked in order; first hit wins ("fireplace crackle" must match fire | |
| # before crackle can claim it for vinyl). | |
| _KEYWORDS = ( | |
| ("fire", "fireplace_crackle"), | |
| ("rain", "soft_rain"), | |
| ("wave", "ocean_waves"), | |
| ("ocean", "ocean_waves"), | |
| ("sea", "ocean_waves"), | |
| ("bird", "birdsong"), | |
| ("cricket", "night_crickets"), | |
| ("cafe", "cafe_murmur"), | |
| ("coffee", "cafe_murmur"), | |
| ("murmur", "cafe_murmur"), | |
| ("chatter", "cafe_murmur"), | |
| ("wind", "wind_in_trees"), | |
| ("tree", "wind_in_trees"), | |
| ("leaves", "wind_in_trees"), | |
| ("vinyl", "vinyl_crackle"), | |
| ("crackle", "vinyl_crackle"), | |
| ("static", "vinyl_crackle"), | |
| ("record", "vinyl_crackle"), | |
| ("hiss", "tape_hiss"), | |
| ("tape", "tape_hiss"), | |
| ("noise", "tape_hiss"), | |
| ) | |
| def normalize_slug(value) -> str: | |
| """Map whatever the LLM produced onto a known slug ("Ocean waves!" -> | |
| ocean_waves); anything unrecognizable becomes the default crackle.""" | |
| text = str(value or "").strip().lower() | |
| slug = text.replace(" ", "_").replace("-", "_") | |
| if slug in GAIN_DB: | |
| return slug | |
| for word, match in _KEYWORDS: | |
| if word in text: | |
| return match | |
| return DEFAULT | |
| # --- procedural beds ---------------------------------------------------------- | |
| def _lowpassed_noise(n: int, rate: int, cutoff: float, rng) -> np.ndarray: | |
| """Cheap dull noise: draw at ~2*cutoff and linearly upsample (the | |
| interpolation is the lowpass).""" | |
| low_rate = max(int(cutoff * 2), 200) | |
| m = max(int(n * low_rate / rate) + 2, 2) | |
| coarse = rng.standard_normal(m) | |
| return np.interp(np.arange(n) * (low_rate / rate), np.arange(m), coarse) | |
| def _vinyl_crackle(n: int, rate: int, rng) -> np.ndarray: | |
| """Dusty surface noise plus sparse pops, tiny pops, not loud.""" | |
| out = _lowpassed_noise(n, rate, 2500, rng) * 0.06 | |
| for pos in rng.integers(0, n, max(int(n / rate * 9), 1)): | |
| length = int(rate * rng.uniform(0.001, 0.004)) | |
| amp = rng.uniform(0.15, 1.0) ** 2 * np.sign(rng.standard_normal()) | |
| pop = amp * np.exp(-np.arange(length) / (length / 5)) | |
| end = min(pos + length, n) | |
| out[pos:end] += pop[: end - pos] | |
| return out | |
| def _tape_hiss(n: int, rate: int, rng) -> np.ndarray: | |
| white = rng.standard_normal(n) | |
| # first difference tilts the spectrum toward the highs, where hiss lives | |
| tilted = np.zeros(n) | |
| tilted[1:] = np.diff(white) | |
| hiss = 0.35 * white + 0.65 * tilted | |
| # slow wobble so it breathes like a real transport | |
| lfo = 0.3 # Hz | |
| phase = rng.uniform(0, 2 * np.pi) | |
| return hiss * (1.0 + 0.08 * np.sin(2 * np.pi * lfo * np.arange(n) / rate + phase)) | |
| _PROCEDURAL = {"vinyl_crackle": _vinyl_crackle, "tape_hiss": _tape_hiss} | |
| # --- sampled beds --------------------------------------------------------------- | |
| def _read_wav(path: Path) -> tuple[np.ndarray, int]: | |
| with wave.open(str(path), "rb") as w: | |
| rate, channels, width = w.getframerate(), w.getnchannels(), w.getsampwidth() | |
| raw = w.readframes(w.getnframes()) | |
| if width != 2: | |
| raise ValueError(f"{path.name}: expected 16-bit wav, got {width * 8}-bit") | |
| data = np.frombuffer(raw, dtype="<i2").astype(np.float64) / 32768.0 | |
| if channels > 1: | |
| data = data.reshape(-1, channels).mean(axis=1) | |
| return data, rate | |
| def _resample(data: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray: | |
| if src_rate == dst_rate: | |
| return data | |
| m = int(len(data) * dst_rate / src_rate) | |
| return np.interp(np.arange(m) * (src_rate / dst_rate), np.arange(len(data)), data) | |
| def _tile(loop: np.ndarray, n: int, rate: int) -> np.ndarray: | |
| """Repeat the loop out to n samples, crossfading each seam so it | |
| doesn't click. The loop does not need to be seamless. | |
| The fade uses equal-power (sqrt) ramps, not linear: the tail and head | |
| being blended are uncorrelated audio, so linear ramps would sum to ~3-6 dB | |
| below the surrounding level at the crossfade midpoint (an audible dip every | |
| loop). With sqrt ramps gain_out**2 + gain_in**2 == 1, holding power steady.""" | |
| if len(loop) >= n: | |
| return loop[:n].copy() | |
| fade = min(int(rate * 0.5), len(loop) // 4) | |
| if fade == 0: | |
| return np.tile(loop, n // len(loop) + 1)[:n] | |
| ramp = np.sqrt(np.linspace(0.0, 1.0, fade)) | |
| out = np.zeros(n + len(loop)) | |
| pos = 0 | |
| while pos < n: | |
| seg = loop.copy() | |
| if pos: | |
| seg[:fade] *= ramp | |
| seg[-fade:] *= ramp[::-1] | |
| out[pos : pos + len(seg)] += seg | |
| pos += len(loop) - fade | |
| return out[:n] | |
| # --- public API ----------------------------------------------------------------- | |
| def render(slug: str, n: int, rate: int) -> np.ndarray: | |
| """A peak-normalized bed of n samples at `rate`; the caller sets the level.""" | |
| if slug in _PROCEDURAL: | |
| bed = _PROCEDURAL[slug](n, rate, np.random.default_rng()) | |
| else: | |
| loop, loop_rate = _read_wav(ASSETS / f"{slug}.wav") | |
| bed = _tile(_resample(loop, loop_rate, rate), n, rate) | |
| return bed / (float(np.abs(bed).max()) or 1.0) | |
| def mix(music, rate: int, slug: str) -> np.ndarray: | |
| """Sum the ambience bed under the music at its slug's relative RMS level.""" | |
| slug = normalize_slug(slug) | |
| if slug not in _PROCEDURAL and not (ASSETS / f"{slug}.wav").exists(): | |
| print( | |
| f"[lofinity] no ambience asset for {slug!r} " | |
| "(run scripts/make_ambience.py), using vinyl crackle" | |
| ) | |
| slug = DEFAULT | |
| music = np.asarray(music, dtype=np.float64) | |
| music_rms = float(np.sqrt(np.mean(music**2))) | |
| if music_rms < 1e-6: # silence in, silence out | |
| return music | |
| bed = render(slug, len(music), rate) | |
| bed_rms = float(np.sqrt(np.mean(bed**2))) or 1.0 | |
| bed *= music_rms * 10 ** (GAIN_DB[slug] / 20) / bed_rms | |
| edge = min(int(rate * 0.75), len(bed) // 4) | |
| if edge: | |
| ramp = np.linspace(0.0, 1.0, edge) | |
| bed[:edge] *= ramp | |
| bed[-edge:] *= ramp[::-1] | |
| return music + bed | |