LoFinity / ambience.py
eloigil6's picture
Add ambience generation features and assets. Introduced ambience.py for procedural and sampled ambience beds, updated app.py to integrate ambience selection into music generation, and modified requirements.txt to include new dependencies. Added scripts for fetching and rendering ambience samples, along with new audio assets and credits for attribution.
722a5d8
Raw
History Blame Contribute Delete
7.16 kB
"""Ambience beds for LoFinity tapes.
MusicGen ignores texture words ("vinyl crackle", "ocean waves"), so the
background layer is mixed in here instead: a bed is rendered at song length
and summed a few dB under the music. Lofi ambience loops through the whole
track anyway, so nothing needs to be generated per song.
vinyl_crackle and tape_hiss are synthesized procedurally (cheap, and never
sound repeated); the other seven are loops in assets/ambience/<slug>.wav,
rendered once by scripts/make_ambience.py and tiled with crossfades. A
missing asset falls back to vinyl crackle so every tape still has texture.
"""
import wave
from pathlib import Path
import numpy as np
ASSETS = Path(__file__).parent / "assets" / "ambience"
# Bed RMS relative to the music RMS, in dB. Starting points — tune by ear:
# spiky textures (crackle, fire) read louder than their RMS suggests.
GAIN_DB = {
"vinyl_crackle": -14.0,
"tape_hiss": -18.0,
"soft_rain": -14.0,
"ocean_waves": -12.0,
"fireplace_crackle": -14.0,
"birdsong": -16.0,
"night_crickets": -16.0,
"cafe_murmur": -16.0,
"wind_in_trees": -14.0,
}
DEFAULT = "vinyl_crackle"
# Checked in order; first hit wins ("fireplace crackle" must match fire
# before crackle can claim it for vinyl).
_KEYWORDS = (
("fire", "fireplace_crackle"),
("rain", "soft_rain"),
("wave", "ocean_waves"),
("ocean", "ocean_waves"),
("sea", "ocean_waves"),
("bird", "birdsong"),
("cricket", "night_crickets"),
("cafe", "cafe_murmur"),
("coffee", "cafe_murmur"),
("murmur", "cafe_murmur"),
("chatter", "cafe_murmur"),
("wind", "wind_in_trees"),
("tree", "wind_in_trees"),
("leaves", "wind_in_trees"),
("vinyl", "vinyl_crackle"),
("crackle", "vinyl_crackle"),
("static", "vinyl_crackle"),
("record", "vinyl_crackle"),
("hiss", "tape_hiss"),
("tape", "tape_hiss"),
("noise", "tape_hiss"),
)
def normalize_slug(value) -> str:
"""Map whatever the LLM produced onto a known slug ("Ocean waves!" ->
ocean_waves); anything unrecognizable becomes the default crackle."""
text = str(value or "").strip().lower()
slug = text.replace(" ", "_").replace("-", "_")
if slug in GAIN_DB:
return slug
for word, match in _KEYWORDS:
if word in text:
return match
return DEFAULT
# --- procedural beds ----------------------------------------------------------
def _lowpassed_noise(n: int, rate: int, cutoff: float, rng) -> np.ndarray:
"""Cheap dull noise: draw at ~2*cutoff and linearly upsample (the
interpolation is the lowpass)."""
low_rate = max(int(cutoff * 2), 200)
m = max(int(n * low_rate / rate) + 2, 2)
coarse = rng.standard_normal(m)
return np.interp(np.arange(n) * (low_rate / rate), np.arange(m), coarse)
def _vinyl_crackle(n: int, rate: int, rng) -> np.ndarray:
"""Dusty surface noise plus sparse pops, tiny pops, not loud."""
out = _lowpassed_noise(n, rate, 2500, rng) * 0.06
for pos in rng.integers(0, n, max(int(n / rate * 9), 1)):
length = int(rate * rng.uniform(0.001, 0.004))
amp = rng.uniform(0.15, 1.0) ** 2 * np.sign(rng.standard_normal())
pop = amp * np.exp(-np.arange(length) / (length / 5))
end = min(pos + length, n)
out[pos:end] += pop[: end - pos]
return out
def _tape_hiss(n: int, rate: int, rng) -> np.ndarray:
white = rng.standard_normal(n)
# first difference tilts the spectrum toward the highs, where hiss lives
tilted = np.zeros(n)
tilted[1:] = np.diff(white)
hiss = 0.35 * white + 0.65 * tilted
# slow wobble so it breathes like a real transport
lfo = 0.3 # Hz
phase = rng.uniform(0, 2 * np.pi)
return hiss * (1.0 + 0.08 * np.sin(2 * np.pi * lfo * np.arange(n) / rate + phase))
_PROCEDURAL = {"vinyl_crackle": _vinyl_crackle, "tape_hiss": _tape_hiss}
# --- sampled beds ---------------------------------------------------------------
def _read_wav(path: Path) -> tuple[np.ndarray, int]:
with wave.open(str(path), "rb") as w:
rate, channels, width = w.getframerate(), w.getnchannels(), w.getsampwidth()
raw = w.readframes(w.getnframes())
if width != 2:
raise ValueError(f"{path.name}: expected 16-bit wav, got {width * 8}-bit")
data = np.frombuffer(raw, dtype="<i2").astype(np.float64) / 32768.0
if channels > 1:
data = data.reshape(-1, channels).mean(axis=1)
return data, rate
def _resample(data: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray:
if src_rate == dst_rate:
return data
m = int(len(data) * dst_rate / src_rate)
return np.interp(np.arange(m) * (src_rate / dst_rate), np.arange(len(data)), data)
def _tile(loop: np.ndarray, n: int, rate: int) -> np.ndarray:
"""Repeat the loop out to n samples, crossfading each seam so it
doesn't click. The loop does not need to be seamless.
The fade uses equal-power (sqrt) ramps, not linear: the tail and head
being blended are uncorrelated audio, so linear ramps would sum to ~3-6 dB
below the surrounding level at the crossfade midpoint (an audible dip every
loop). With sqrt ramps gain_out**2 + gain_in**2 == 1, holding power steady."""
if len(loop) >= n:
return loop[:n].copy()
fade = min(int(rate * 0.5), len(loop) // 4)
if fade == 0:
return np.tile(loop, n // len(loop) + 1)[:n]
ramp = np.sqrt(np.linspace(0.0, 1.0, fade))
out = np.zeros(n + len(loop))
pos = 0
while pos < n:
seg = loop.copy()
if pos:
seg[:fade] *= ramp
seg[-fade:] *= ramp[::-1]
out[pos : pos + len(seg)] += seg
pos += len(loop) - fade
return out[:n]
# --- public API -----------------------------------------------------------------
def render(slug: str, n: int, rate: int) -> np.ndarray:
"""A peak-normalized bed of n samples at `rate`; the caller sets the level."""
if slug in _PROCEDURAL:
bed = _PROCEDURAL[slug](n, rate, np.random.default_rng())
else:
loop, loop_rate = _read_wav(ASSETS / f"{slug}.wav")
bed = _tile(_resample(loop, loop_rate, rate), n, rate)
return bed / (float(np.abs(bed).max()) or 1.0)
def mix(music, rate: int, slug: str) -> np.ndarray:
"""Sum the ambience bed under the music at its slug's relative RMS level."""
slug = normalize_slug(slug)
if slug not in _PROCEDURAL and not (ASSETS / f"{slug}.wav").exists():
print(
f"[lofinity] no ambience asset for {slug!r} "
"(run scripts/make_ambience.py), using vinyl crackle"
)
slug = DEFAULT
music = np.asarray(music, dtype=np.float64)
music_rms = float(np.sqrt(np.mean(music**2)))
if music_rms < 1e-6: # silence in, silence out
return music
bed = render(slug, len(music), rate)
bed_rms = float(np.sqrt(np.mean(bed**2))) or 1.0
bed *= music_rms * 10 ** (GAIN_DB[slug] / 20) / bed_rms
edge = min(int(rate * 0.75), len(bed) // 4)
if edge:
ramp = np.linspace(0.0, 1.0, edge)
bed[:edge] *= ramp
bed[-edge:] *= ramp[::-1]
return music + bed