Spaces:
Running on Zero
Running on Zero
Add ambience generation features and assets. Introduced ambience.py for procedural and sampled ambience beds, updated app.py to integrate ambience selection into music generation, and modified requirements.txt to include new dependencies. Added scripts for fetching and rendering ambience samples, along with new audio assets and credits for attribution.
722a5d8 | """One-off renderer for the sampled ambience beds. | |
| vinyl_crackle and tape_hiss are synthesized live in ambience.py; the seven | |
| beds below only need to exist once on disk. This script fills | |
| assets/ambience/ with text-to-audio renders from AudioLDM2. | |
| (AudioGen would also work, but it lives in the unmaintained audiocraft | |
| package which doesn't install on Python 3.13; AudioLDM2 ships in diffusers | |
| and runs next to the project's torch/transformers as-is.) | |
| Usage: | |
| pip install diffusers | |
| python scripts/make_ambience.py # render whatever is missing | |
| python scripts/make_ambience.py ocean_waves --force # redo one | |
| Each clip is ~12 s; the runtime mixer tiles it with crossfades, so it does | |
| not need to loop perfectly. Re-run any slug whose render sounds off β | |
| text-to-audio is a slot machine, two pulls usually land one keeper. | |
| """ | |
| import argparse | |
| import os | |
| import sys | |
| import wave | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parent.parent | |
| OUT_DIR = ROOT / "assets" / "ambience" | |
| PROMPTS = { | |
| "soft_rain": "gentle steady rain falling on leaves, calm rain ambience, no thunder", | |
| "ocean_waves": "calm ocean waves gently rolling onto a sandy beach, soft surf", | |
| "fireplace_crackle": "cozy fireplace, fire crackling and popping softly", | |
| "birdsong": "soft morning birdsong, small birds chirping in a quiet garden", | |
| "night_crickets": "crickets chirping steadily on a calm summer night", | |
| "cafe_murmur": "quiet coffee shop ambience, soft murmur of distant conversation, occasional clink of cups", | |
| "wind_in_trees": "soft wind rustling through tree leaves, gentle breeze", | |
| } | |
| NEGATIVE = "music, melody, singing, speech, voice, loud, harsh, low quality, distortion" | |
| def write_wav(samples, rate: int, path: Path) -> None: | |
| import numpy as np | |
| peak = float(np.abs(samples).max() or 1.0) | |
| pcm = (samples * (0.9 / peak) * 32767).astype("<i2") | |
| with wave.open(str(path), "wb") as w: | |
| w.setnchannels(1) | |
| w.setsampwidth(2) | |
| w.setframerate(rate) | |
| w.writeframes(pcm.tobytes()) | |
| def main() -> int: | |
| parser = argparse.ArgumentParser(description=__doc__.split("\n")[0]) | |
| parser.add_argument("slugs", nargs="*", choices=[*PROMPTS, []], metavar="slug", | |
| help=f"which beds to render (default: all missing). One of: {', '.join(PROMPTS)}") | |
| parser.add_argument("--force", action="store_true", help="re-render even if the wav exists") | |
| parser.add_argument("--duration", type=float, default=12.0, help="clip length in seconds") | |
| parser.add_argument("--steps", type=int, default=200, help="diffusion steps (more = cleaner, slower)") | |
| parser.add_argument("--candidates", type=int, default=2, | |
| help="waveforms per prompt; the pipeline keeps the best text match") | |
| args = parser.parse_args() | |
| todo = args.slugs or [s for s in PROMPTS if args.force or not (OUT_DIR / f"{s}.wav").exists()] | |
| if not todo: | |
| print("all ambience beds already rendered β use --force to redo") | |
| return 0 | |
| OUT_DIR.mkdir(parents=True, exist_ok=True) | |
| import torch | |
| from diffusers import AudioLDM2Pipeline | |
| device = os.getenv("LOFINITY_DEVICE") or ("mps" if torch.backends.mps.is_available() else "cpu") | |
| print(f"first run downloads ~3 GB (cvssp/audioldm2); rendering on {device}") | |
| pipe = AudioLDM2Pipeline.from_pretrained("cvssp/audioldm2") | |
| pipe.to(device) | |
| for slug in todo: | |
| path = OUT_DIR / f"{slug}.wav" | |
| if path.exists() and not args.force and not args.slugs: | |
| continue | |
| print(f"rendering {slug}: {PROMPTS[slug]!r}") | |
| def run(): | |
| return pipe( | |
| prompt=PROMPTS[slug], | |
| negative_prompt=NEGATIVE, | |
| num_inference_steps=args.steps, | |
| audio_length_in_s=args.duration, | |
| num_waveforms_per_prompt=args.candidates, | |
| ).audios[0] # audios come back ranked by text alignment | |
| try: | |
| audio = run() | |
| except Exception as e: # noqa: BLE001 β mps kernels are still patchy | |
| if device == "cpu": | |
| raise | |
| print(f" {device} failed ({e!r}), retrying on cpu") | |
| pipe.to("cpu") | |
| device = "cpu" | |
| audio = run() | |
| write_wav(audio, 16000, path) | |
| print(f" -> {path.relative_to(ROOT)}") | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |