Spaces:
Running on Zero
Running on Zero
File size: 17,403 Bytes
722a5d8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 | """Download the sampled ambience beds from Wikimedia Commons.
A no-GPU alternative to make_ambience.py: instead of generating the seven
sampled beds, this pulls real field recordings from Wikimedia Commons
(public-domain / CC-licensed), trims each to a steady ~14 s loop, and writes
mono 16-bit wavs into assets/ambience/ — the format ambience.py expects.
It auto-selects: for each slug it searches Commons, drops obvious junk
(alarms, music, traffic…) by keyword, then downloads candidates in turn and
measures them, keeping the first that is long enough and not near-silent.
Provenance + licence for every pick is written to assets/ambience/CREDITS.md
so attribution can be honored when the Space ships.
Usage:
uv pip install soundfile # bundles libsndfile (ogg/mp3/flac/wav)
python scripts/fetch_ambience.py # fill in what's missing
python scripts/fetch_ambience.py ocean_waves --force
"""
import argparse
import io
import json
import re
import sys
import time
import unicodedata
import urllib.parse
import urllib.request
import wave
from pathlib import Path
import numpy as np
ROOT = Path(__file__).resolve().parent.parent
OUT_DIR = ROOT / "assets" / "ambience"
API = "https://commons.wikimedia.org/w/api.php"
UA = "LoFinity/0.1 (lofi hackathon ambience fetcher; https://huggingface.co/spaces)"
TARGET_S = 30.0 # loop length we keep == default song length, so a bed
# this long tiles to a 30 s song with zero seams
MIN_SRC_DUR = 8.0 # too short to be useful ambience
MAX_SRC_DUR = 400.0 # skip anything longer (podcasts, mixes)
MAX_BYTES = 30_000_000 # don't pull giant wavs
MAX_RATE = 32000 # cap stored rate (== musicgen rate); keeps files small
# How to find each bed: a list of probes whose results are unioned. Commons
# search ANDs every word in a probe, so each probe stays 1-2 words; more
# probes = more candidates to fall back through. ("category", name) lists a
# curated category; ("search", terms) is a File-namespace full-text search.
SOURCES = {
"soft_rain": [("category", "Sounds of rain"), ("search", "rain ambience")],
"ocean_waves": [("search", "ocean waves"), ("search", "sea waves"),
("search", "surf beach")],
"fireplace_crackle": [("search", "campfire"), ("search", "fireplace"),
("search", "fire crackling")],
"birdsong": [("search", "birdsong"), ("search", "dawn chorus"),
("search", "birds chirping")],
"night_crickets": [("search", "crickets"), ("search", "cricket chirping"),
("search", "cicada")],
"wind_in_trees": [("search", "wind trees"), ("search", "wind forest"),
("search", "wind leaves")],
"cafe_murmur": [("search", "restaurant ambience"), ("search", "cafe ambience"),
("search", "crowd murmur")],
}
# Hand-vetted Commons files tried before falling back to search — auto-selection
# can't judge "continuous dawn chorus" vs "one repetitive cuckoo", so the good
# picks found during development are pinned here. Still run through every gate
# below, so a renamed/deleted file just falls through to search.
PREFERRED = {
"soft_rain": "File:Lluvia en techo de lamina.wav",
"ocean_waves": "File:Sea waves.wav",
"fireplace_crackle": "File:WWS Fireoftheforge.ogg",
"birdsong": "File:Birds singing in Fribourg 01.ogg",
"night_crickets": "File:Black-Prince-Cicada- Psaltoda-plaga.wav",
"wind_in_trees": "File:Wind in forest (Gravity Sound).wav",
"cafe_murmur": "File:Shopping mall less crowded.ogg",
}
# Title contains any of these (lowercased) -> not ambience, skip it. This is
# what keeps "fire" from returning fire *alarms*, "sea" from podcasts, and
# "waves" from sine-wave test tones.
BLOCKLIST = (
"alarm", "podcast", "episode", "interview", "speech", "talk", "lecture",
"music", "song -", "band", "orchestra", "anthem", "hymn", "vocal", "choir",
"dance", "ritual", "march", "siren", "horn", "traffic", "tram", "engine",
"motor", "gun", "explosion", "war", "radio", "national", "voice", "demo",
"sine", "tone", "hz", "sweep", "beep", "dtmf", "calibration", "signal",
"woodwind", "clarinet", "flute", "accordion", "instrument", "guitar",
)
# Chosen file's title must contain one of these (accent-stripped) — a sound
# actually related to the slug. Multilingual because Commons is international.
RELEVANCE = {
"soft_rain": ("rain", "lluvia", "regen", "pluie", "pioggia", "chuva",
"downpour", "drizzle", "storm"),
"ocean_waves": ("ocean", "wave", "sea", "surf", "beach", "mar", "ola",
"vague", "welle", "tide", "shore", "playa", "costa"),
"fireplace_crackle": ("fire", "campfire", "fireplace", "crackl", "crepit",
"feu", "fuego", "hoguera", "fogata", "ember", "hearth"),
"birdsong": ("bird", "song", "chorus", "dawn", "chirp", "cuckoo", "wren",
"sparrow", "robin", "blackbird", "finch", "warbler", "thrush",
"nightingale", "lark", "vogel", "oiseau", "pajaro", "canto"),
"night_crickets": ("cricket", "cicada", "cicad", "cigarra", "grasshopper",
"grillo", "grille", "katydid", "locust", "insect", "chirp"),
"wind_in_trees": ("wind", "breeze", "gust", "rustl", "viento", "vent",
"howl", "gale", "brisa", "blowing"),
"cafe_murmur": ("cafe", "restaurant", "crowd", "murmur", "coffee", "bar",
"pub", "chatter", "ambien", "mall", "station", "people",
"plaza", "market", "tunnel", "hall", "lobby", "gente"),
}
def _norm(s):
"""Lowercase + strip accents so 'pájaro'/'Pajaro' both match 'pajaro'."""
s = unicodedata.normalize("NFKD", str(s))
return "".join(c for c in s if not unicodedata.combining(c)).lower()
def commons_api(params, tries=5):
params = {**params, "format": "json", "formatversion": "2"}
url = API + "?" + urllib.parse.urlencode(params)
for i in range(tries):
try:
req = urllib.request.Request(url, headers={"User-Agent": UA})
with urllib.request.urlopen(req, timeout=30) as r:
return json.load(r)
except urllib.error.HTTPError as e:
if e.code == 429 and i < tries - 1:
time.sleep(2 * (i + 1))
continue
raise
return {}
def find_titles(slug):
titles = []
for kind, value in SOURCES[slug]:
if kind == "category":
res = commons_api({"action": "query", "list": "categorymembers",
"cmtitle": f"Category:{value}", "cmtype": "file",
"cmlimit": "30"})
hits = [m["title"] for m in res.get("query", {}).get("categorymembers", [])]
else:
res = commons_api({"action": "query", "list": "search", "srnamespace": "6",
"srsearch": f"filetype:audio {value}", "srlimit": "15"})
hits = [h["title"] for h in res.get("query", {}).get("search", [])]
titles += hits
time.sleep(1)
# dedupe (keep order); drop junk, then require a slug-relevant word
seen, kept = set(), []
for t in titles:
nt = _norm(t)
if t in seen or any(b in nt for b in BLOCKLIST):
continue
if not any(kw in nt for kw in RELEVANCE[slug]):
continue
seen.add(t)
kept.append(t)
return kept
def file_info(titles):
"""title -> dict(url, dur, license, artist, page) for a batch of titles."""
out = {}
for i in range(0, len(titles), 20):
info = commons_api({"action": "query", "titles": "|".join(titles[i:i + 20]),
"prop": "imageinfo",
"iiprop": "url|size|mediatype|extmetadata"})
for page in info.get("query", {}).get("pages", []):
ii = (page.get("imageinfo") or [{}])[0]
ext = ii.get("extmetadata", {})
def field(k):
return ext.get(k, {}).get("value", "")
out[page.get("title", "?")] = {
"url": ii.get("url", ""),
"dur": float(ii.get("duration") or 0.0),
"mediatype": ii.get("mediatype", ""),
"license": field("LicenseShortName") or "?",
"artist": _strip_html(field("Artist")) or "Unknown",
"page": ii.get("descriptionurl", ""),
}
time.sleep(1)
return out
def _strip_html(s):
return re.sub(r"<[^>]+>", "", s).strip()
def spectral_flatness(mono, rate):
"""Ratio of geometric to arithmetic mean of the power spectrum. ~0 for a
pure tone, higher for broadband texture — catches test tones that slip
past the title filter (a 'Sine Wave' file is named like a sea 'wave').
The signal is detrended and high-passed (first difference) first: crowd
and surf ambience carries heavy low-frequency rumble that otherwise
dominates the spectrum and reads as falsely 'tonal' (calibration showed
real cafe recordings at 2e-5 raw vs 1e-12 for a true sine — too close;
after the high-pass they separate to 2e-3 vs 1e-12)."""
seg = mono[: rate * 4].astype(np.float64)
if len(seg) < 256:
return 1.0
seg = np.diff(seg - seg.mean())
power = np.abs(np.fft.rfft(seg * np.hanning(len(seg)))) ** 2 + 1e-12
return float(np.exp(np.mean(np.log(power))) / np.mean(power))
def download(url):
req = urllib.request.Request(url, headers={"User-Agent": UA})
with urllib.request.urlopen(req, timeout=60) as r:
length = int(r.headers.get("Content-Length") or 0)
if length and length > MAX_BYTES:
raise ValueError(f"too big ({length / 1e6:.0f} MB)")
return r.read(MAX_BYTES + 1)
def decode_mono(blob):
import soundfile as sf
try:
data, rate = sf.read(io.BytesIO(blob), dtype="float64", always_2d=True)
return data.mean(axis=1), rate
except sf.LibsndfileError:
return _decode_av(blob) # Opus/other codecs libsndfile can't open
def _decode_av(blob):
"""Fallback decoder via PyAV (bundles ffmpeg) — most Commons crowd/cafe
recordings are Ogg/Opus, which libsndfile doesn't support."""
import av
with av.open(io.BytesIO(blob)) as container:
stream = container.streams.audio[0]
rate = stream.codec_context.sample_rate
chunks = []
resampler = av.AudioResampler(format="flt", layout="mono", rate=rate)
for frame in container.decode(stream):
for out in resampler.resample(frame):
chunks.append(out.to_ndarray().reshape(-1))
if not chunks:
raise ValueError("no audio frames decoded")
return np.concatenate(chunks).astype(np.float64), rate
def steady_window(mono, rate):
"""Pick the best TARGET_S loop window. Short clips are returned whole (the
mixer tiles them). The window is scored on three things, because the mixer
crossfades the loop's tail back into its head:
- steady interior (low RMS variation) so it doesn't swell or drop
- head and tail at matched energy, so the crossfade blends like-for-like
- neither boundary in a lull, so the loop point doesn't briefly drop out
The last two matter for sparse textures (birdsong, fireplace): a window
that merely minimizes variance can still start/end in a gap, dipping ~10 dB
every loop."""
n = int(TARGET_S * rate)
if len(mono) <= n:
return mono
hop = max(int(rate * 0.1), 1) # 100 ms frames: fine enough to see the seam
frame_rms = np.array([
np.sqrt(np.mean(mono[i:i + hop] ** 2)) for i in range(0, len(mono) - hop, hop)
])
median = float(np.median(frame_rms)) or 1.0
win_frames = max(n // hop, 1)
edge = max(int(rate * 0.5) // hop, 1) # frames spanning one crossfade (~0.5 s)
best, best_score = None, 1e9
for start in range(0, len(frame_rms) - win_frames, max(win_frames // 8, 1)):
seg = frame_rms[start:start + win_frames]
mean = float(seg.mean())
if mean < 0.5 * median: # window mostly in a lull
continue
head, tail = float(seg[:edge].mean()), float(seg[-edge:].mean())
cv = float(seg.std()) / (mean or 1.0)
mismatch = abs(head - tail) / median
lull = max(0.0, 1.0 - min(head, tail) / median) # 0 once boundary >= median
score = cv + 2.0 * mismatch + 2.0 * lull
if score < best_score:
best_score, best = score, start * hop
start = best if best is not None else (len(mono) - n) // 2
return mono[start:start + n]
def resample(mono, src, dst):
if src <= dst:
return mono, src
m = int(len(mono) * dst / src)
return np.interp(np.arange(m) * (src / dst), np.arange(len(mono)), mono), dst
def write_wav(mono, rate, path):
peak = float(np.abs(mono).max() or 1.0)
pcm = (mono * (0.9 / peak) * 32767).astype("<i2")
with wave.open(str(path), "wb") as w:
w.setnchannels(1)
w.setsampwidth(2)
w.setframerate(rate)
w.writeframes(pcm.tobytes())
def fetch_one(slug):
"""Return a credit dict on success, or None if nothing usable was found."""
found = find_titles(slug)
pref = PREFERRED.get(slug)
# the pinned pick is tried first; search results (relevance order) back it up
lookup, seen = [], set()
for t in ([pref] if pref else []) + found:
if t not in seen:
seen.add(t)
lookup.append(t)
if not lookup:
print(f" no candidates found for {slug}")
return None
info = file_info(lookup)
for title in [t for t in lookup if info.get(t, {}).get("url")][:8]:
meta = info[title]
if meta["dur"] and meta["dur"] > MAX_SRC_DUR:
continue
try:
blob = download(meta["url"])
mono, rate = decode_mono(blob)
except Exception as e: # noqa: BLE001 — try the next candidate
print(f" skip {title[5:][:40]!r}: {e}")
continue
dur = len(mono) / rate
rms = float(np.sqrt(np.mean(mono ** 2)))
flat = spectral_flatness(mono, rate)
if dur < MIN_SRC_DUR or dur > MAX_SRC_DUR or rms < 5e-3:
print(f" skip {title[5:][:40]!r}: dur={dur:.0f}s rms={rms:.3f}")
continue
if flat < 1e-3: # essentially a pure tone, not ambience (sines ~1e-12)
print(f" skip {title[5:][:40]!r}: too tonal (flatness {flat:.0e})")
continue
seg = steady_window(mono, rate)
seg, out_rate = resample(seg, rate, MAX_RATE)
write_wav(seg, out_rate, OUT_DIR / f"{slug}.wav")
seams = "no seam" if len(seg) / out_rate >= 30 else "1 seam @30s"
print(f" {slug} <- {title[5:][:42]!r} "
f"({dur:.0f}s src -> {len(seg)/out_rate:.0f}s, {seams}, {meta['license']})")
return {"slug": slug, "title": title[5:], "license": meta["license"],
"artist": meta["artist"], "page": meta["page"]}
print(f" no usable file for {slug} (all candidates failed checks)")
return None
def save_credits(new_credits):
"""Merge this run's picks into credits.json (the source of truth, keyed by
slug) and re-render CREDITS.md. Merging means fetching one slug doesn't
drop the others' attribution."""
store = OUT_DIR / "credits.json"
merged = {}
if store.exists():
try:
merged = json.loads(store.read_text())
except ValueError:
pass
for c in new_credits:
merged[c["slug"]] = c
store.write_text(json.dumps(merged, indent=2, sort_keys=True))
lines = ["# Ambience sample credits", "",
"Auto-fetched from Wikimedia Commons by `scripts/fetch_ambience.py`.",
"vinyl_crackle and tape_hiss are synthesized in `ambience.py` and not listed.", ""]
for slug in sorted(merged):
c = merged[slug]
lines += [
f"## {slug}",
f"- **{c['title']}**",
f"- Author: {c['artist']}",
f"- Licence: {c['license']}",
f"- Source: {c['page']}",
"",
]
(OUT_DIR / "CREDITS.md").write_text("\n".join(lines))
def main():
parser = argparse.ArgumentParser(description=__doc__.split("\n")[0])
parser.add_argument("slugs", nargs="*", choices=[*SOURCES, []], metavar="slug",
help=f"beds to fetch (default: missing ones). One of: {', '.join(SOURCES)}")
parser.add_argument("--force", action="store_true", help="re-fetch even if the wav exists")
args = parser.parse_args()
todo = args.slugs or [s for s in SOURCES if args.force or not (OUT_DIR / f"{s}.wav").exists()]
if not todo:
print("all sampled beds already present — use --force to refetch")
return 0
OUT_DIR.mkdir(parents=True, exist_ok=True)
credits = []
for slug in todo:
print(f"\n[{slug}]")
c = fetch_one(slug)
if c:
credits.append(c)
time.sleep(1)
if credits:
save_credits(credits) # merges into credits.json, won't drop other slugs
got = len(credits)
print(f"\nfetched {got}/{len(todo)} beds -> {OUT_DIR.relative_to(ROOT)}")
return 0 if got else 1
if __name__ == "__main__":
sys.exit(main())
|