LoFinity / scripts /make_ambience.py
eloigil6's picture
Add ambience generation features and assets. Introduced ambience.py for procedural and sampled ambience beds, updated app.py to integrate ambience selection into music generation, and modified requirements.txt to include new dependencies. Added scripts for fetching and rendering ambience samples, along with new audio assets and credits for attribution.
722a5d8
Raw
History Blame Contribute Delete
4.44 kB
"""One-off renderer for the sampled ambience beds.
vinyl_crackle and tape_hiss are synthesized live in ambience.py; the seven
beds below only need to exist once on disk. This script fills
assets/ambience/ with text-to-audio renders from AudioLDM2.
(AudioGen would also work, but it lives in the unmaintained audiocraft
package which doesn't install on Python 3.13; AudioLDM2 ships in diffusers
and runs next to the project's torch/transformers as-is.)
Usage:
pip install diffusers
python scripts/make_ambience.py # render whatever is missing
python scripts/make_ambience.py ocean_waves --force # redo one
Each clip is ~12 s; the runtime mixer tiles it with crossfades, so it does
not need to loop perfectly. Re-run any slug whose render sounds off β€”
text-to-audio is a slot machine, two pulls usually land one keeper.
"""
import argparse
import os
import sys
import wave
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
OUT_DIR = ROOT / "assets" / "ambience"
PROMPTS = {
"soft_rain": "gentle steady rain falling on leaves, calm rain ambience, no thunder",
"ocean_waves": "calm ocean waves gently rolling onto a sandy beach, soft surf",
"fireplace_crackle": "cozy fireplace, fire crackling and popping softly",
"birdsong": "soft morning birdsong, small birds chirping in a quiet garden",
"night_crickets": "crickets chirping steadily on a calm summer night",
"cafe_murmur": "quiet coffee shop ambience, soft murmur of distant conversation, occasional clink of cups",
"wind_in_trees": "soft wind rustling through tree leaves, gentle breeze",
}
NEGATIVE = "music, melody, singing, speech, voice, loud, harsh, low quality, distortion"
def write_wav(samples, rate: int, path: Path) -> None:
import numpy as np
peak = float(np.abs(samples).max() or 1.0)
pcm = (samples * (0.9 / peak) * 32767).astype("<i2")
with wave.open(str(path), "wb") as w:
w.setnchannels(1)
w.setsampwidth(2)
w.setframerate(rate)
w.writeframes(pcm.tobytes())
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__.split("\n")[0])
parser.add_argument("slugs", nargs="*", choices=[*PROMPTS, []], metavar="slug",
help=f"which beds to render (default: all missing). One of: {', '.join(PROMPTS)}")
parser.add_argument("--force", action="store_true", help="re-render even if the wav exists")
parser.add_argument("--duration", type=float, default=12.0, help="clip length in seconds")
parser.add_argument("--steps", type=int, default=200, help="diffusion steps (more = cleaner, slower)")
parser.add_argument("--candidates", type=int, default=2,
help="waveforms per prompt; the pipeline keeps the best text match")
args = parser.parse_args()
todo = args.slugs or [s for s in PROMPTS if args.force or not (OUT_DIR / f"{s}.wav").exists()]
if not todo:
print("all ambience beds already rendered β€” use --force to redo")
return 0
OUT_DIR.mkdir(parents=True, exist_ok=True)
import torch
from diffusers import AudioLDM2Pipeline
device = os.getenv("LOFINITY_DEVICE") or ("mps" if torch.backends.mps.is_available() else "cpu")
print(f"first run downloads ~3 GB (cvssp/audioldm2); rendering on {device}")
pipe = AudioLDM2Pipeline.from_pretrained("cvssp/audioldm2")
pipe.to(device)
for slug in todo:
path = OUT_DIR / f"{slug}.wav"
if path.exists() and not args.force and not args.slugs:
continue
print(f"rendering {slug}: {PROMPTS[slug]!r}")
def run():
return pipe(
prompt=PROMPTS[slug],
negative_prompt=NEGATIVE,
num_inference_steps=args.steps,
audio_length_in_s=args.duration,
num_waveforms_per_prompt=args.candidates,
).audios[0] # audios come back ranked by text alignment
try:
audio = run()
except Exception as e: # noqa: BLE001 β€” mps kernels are still patchy
if device == "cpu":
raise
print(f" {device} failed ({e!r}), retrying on cpu")
pipe.to("cpu")
device = "cpu"
audio = run()
write_wav(audio, 16000, path)
print(f" -> {path.relative_to(ROOT)}")
return 0
if __name__ == "__main__":
sys.exit(main())