File size: 7,160 Bytes
722a5d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
"""Ambience beds for LoFinity tapes.

MusicGen ignores texture words ("vinyl crackle", "ocean waves"), so the
background layer is mixed in here instead: a bed is rendered at song length
and summed a few dB under the music. Lofi ambience loops through the whole
track anyway, so nothing needs to be generated per song.

vinyl_crackle and tape_hiss are synthesized procedurally (cheap, and never
sound repeated); the other seven are loops in assets/ambience/<slug>.wav,
rendered once by scripts/make_ambience.py and tiled with crossfades. A
missing asset falls back to vinyl crackle so every tape still has texture.
"""

import wave
from pathlib import Path

import numpy as np

ASSETS = Path(__file__).parent / "assets" / "ambience"

# Bed RMS relative to the music RMS, in dB. Starting points — tune by ear:
# spiky textures (crackle, fire) read louder than their RMS suggests.
GAIN_DB = {
    "vinyl_crackle": -14.0,
    "tape_hiss": -18.0,
    "soft_rain": -14.0,
    "ocean_waves": -12.0,
    "fireplace_crackle": -14.0,
    "birdsong": -16.0,
    "night_crickets": -16.0,
    "cafe_murmur": -16.0,
    "wind_in_trees": -14.0,
}
DEFAULT = "vinyl_crackle"

# Checked in order; first hit wins ("fireplace crackle" must match fire
# before crackle can claim it for vinyl).
_KEYWORDS = (
    ("fire", "fireplace_crackle"),
    ("rain", "soft_rain"),
    ("wave", "ocean_waves"),
    ("ocean", "ocean_waves"),
    ("sea", "ocean_waves"),
    ("bird", "birdsong"),
    ("cricket", "night_crickets"),
    ("cafe", "cafe_murmur"),
    ("coffee", "cafe_murmur"),
    ("murmur", "cafe_murmur"),
    ("chatter", "cafe_murmur"),
    ("wind", "wind_in_trees"),
    ("tree", "wind_in_trees"),
    ("leaves", "wind_in_trees"),
    ("vinyl", "vinyl_crackle"),
    ("crackle", "vinyl_crackle"),
    ("static", "vinyl_crackle"),
    ("record", "vinyl_crackle"),
    ("hiss", "tape_hiss"),
    ("tape", "tape_hiss"),
    ("noise", "tape_hiss"),
)


def normalize_slug(value) -> str:
    """Map whatever the LLM produced onto a known slug ("Ocean waves!" ->
    ocean_waves); anything unrecognizable becomes the default crackle."""
    text = str(value or "").strip().lower()
    slug = text.replace(" ", "_").replace("-", "_")
    if slug in GAIN_DB:
        return slug
    for word, match in _KEYWORDS:
        if word in text:
            return match
    return DEFAULT


# --- procedural beds ----------------------------------------------------------


def _lowpassed_noise(n: int, rate: int, cutoff: float, rng) -> np.ndarray:
    """Cheap dull noise: draw at ~2*cutoff and linearly upsample (the
    interpolation is the lowpass)."""
    low_rate = max(int(cutoff * 2), 200)
    m = max(int(n * low_rate / rate) + 2, 2)
    coarse = rng.standard_normal(m)
    return np.interp(np.arange(n) * (low_rate / rate), np.arange(m), coarse)


def _vinyl_crackle(n: int, rate: int, rng) -> np.ndarray:
    """Dusty surface noise plus sparse pops, tiny pops, not loud."""
    out = _lowpassed_noise(n, rate, 2500, rng) * 0.06
    for pos in rng.integers(0, n, max(int(n / rate * 9), 1)):
        length = int(rate * rng.uniform(0.001, 0.004))
        amp = rng.uniform(0.15, 1.0) ** 2 * np.sign(rng.standard_normal())
        pop = amp * np.exp(-np.arange(length) / (length / 5))
        end = min(pos + length, n)
        out[pos:end] += pop[: end - pos]
    return out


def _tape_hiss(n: int, rate: int, rng) -> np.ndarray:
    white = rng.standard_normal(n)
    # first difference tilts the spectrum toward the highs, where hiss lives
    tilted = np.zeros(n)
    tilted[1:] = np.diff(white)
    hiss = 0.35 * white + 0.65 * tilted
    # slow wobble so it breathes like a real transport
    lfo = 0.3  # Hz
    phase = rng.uniform(0, 2 * np.pi)
    return hiss * (1.0 + 0.08 * np.sin(2 * np.pi * lfo * np.arange(n) / rate + phase))


_PROCEDURAL = {"vinyl_crackle": _vinyl_crackle, "tape_hiss": _tape_hiss}


# --- sampled beds ---------------------------------------------------------------


def _read_wav(path: Path) -> tuple[np.ndarray, int]:
    with wave.open(str(path), "rb") as w:
        rate, channels, width = w.getframerate(), w.getnchannels(), w.getsampwidth()
        raw = w.readframes(w.getnframes())
    if width != 2:
        raise ValueError(f"{path.name}: expected 16-bit wav, got {width * 8}-bit")
    data = np.frombuffer(raw, dtype="<i2").astype(np.float64) / 32768.0
    if channels > 1:
        data = data.reshape(-1, channels).mean(axis=1)
    return data, rate


def _resample(data: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray:
    if src_rate == dst_rate:
        return data
    m = int(len(data) * dst_rate / src_rate)
    return np.interp(np.arange(m) * (src_rate / dst_rate), np.arange(len(data)), data)


def _tile(loop: np.ndarray, n: int, rate: int) -> np.ndarray:
    """Repeat the loop out to n samples, crossfading each seam so it
    doesn't click. The loop does not need to be seamless.

    The fade uses equal-power (sqrt) ramps, not linear: the tail and head
    being blended are uncorrelated audio, so linear ramps would sum to ~3-6 dB
    below the surrounding level at the crossfade midpoint (an audible dip every
    loop). With sqrt ramps gain_out**2 + gain_in**2 == 1, holding power steady."""
    if len(loop) >= n:
        return loop[:n].copy()
    fade = min(int(rate * 0.5), len(loop) // 4)
    if fade == 0:
        return np.tile(loop, n // len(loop) + 1)[:n]
    ramp = np.sqrt(np.linspace(0.0, 1.0, fade))
    out = np.zeros(n + len(loop))
    pos = 0
    while pos < n:
        seg = loop.copy()
        if pos:
            seg[:fade] *= ramp
        seg[-fade:] *= ramp[::-1]
        out[pos : pos + len(seg)] += seg
        pos += len(loop) - fade
    return out[:n]


# --- public API -----------------------------------------------------------------


def render(slug: str, n: int, rate: int) -> np.ndarray:
    """A peak-normalized bed of n samples at `rate`; the caller sets the level."""
    if slug in _PROCEDURAL:
        bed = _PROCEDURAL[slug](n, rate, np.random.default_rng())
    else:
        loop, loop_rate = _read_wav(ASSETS / f"{slug}.wav")
        bed = _tile(_resample(loop, loop_rate, rate), n, rate)
    return bed / (float(np.abs(bed).max()) or 1.0)


def mix(music, rate: int, slug: str) -> np.ndarray:
    """Sum the ambience bed under the music at its slug's relative RMS level."""
    slug = normalize_slug(slug)
    if slug not in _PROCEDURAL and not (ASSETS / f"{slug}.wav").exists():
        print(
            f"[lofinity] no ambience asset for {slug!r} "
            "(run scripts/make_ambience.py), using vinyl crackle"
        )
        slug = DEFAULT
    music = np.asarray(music, dtype=np.float64)
    music_rms = float(np.sqrt(np.mean(music**2)))
    if music_rms < 1e-6:  # silence in, silence out
        return music
    bed = render(slug, len(music), rate)
    bed_rms = float(np.sqrt(np.mean(bed**2))) or 1.0
    bed *= music_rms * 10 ** (GAIN_DB[slug] / 20) / bed_rms
    edge = min(int(rate * 0.75), len(bed) // 4)
    if edge:
        ramp = np.linspace(0.0, 1.0, edge)
        bed[:edge] *= ramp
        bed[-edge:] *= ramp[::-1]
    return music + bed