Spaces:
Running on Zero
Running on Zero
Scale model + tape length to the hardware (GPU vs CPU)
Browse filesOn a ZeroGPU Space: musicgen-medium, tapes up to 90s (chunked) - unchanged. Without a GPU: fall back to musicgen-small and a single 30s shot (no chunking), since medium + chunking on CPU would take minutes. Both the model default and ALLOWED_SECONDS now branch on IS_ZEROGPU (env still overrides the model). New /api/config exposes the allowed lengths; the length slider fetches it and collapses to a single 0:30 when that's all the backend offers - defensive, so any fetch failure keeps the 30/60/90 default and the GPU path is untouched.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
- app.py +24 -6
- frontend/ui.js +19 -6
app.py
CHANGED
|
@@ -9,10 +9,16 @@ loops a background bed (waves, crackle, rain…) underneath. MusicGen ignores
|
|
| 9 |
texture words in prompts, hence the separate bed. The enrichment LLM is
|
| 10 |
MiniCPM (on cuda) on a ZeroGPU Space, or a local Ollama daemon in dev.
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
Env knobs:
|
| 13 |
LOFINITY_ENGINE musicgen (default) | stub
|
| 14 |
LOFINITY_DURATION clip length in seconds (default 30, the single-shot max)
|
| 15 |
LOFINITY_DEVICE cuda | mps | cpu (default: cuda on ZeroGPU, else mps if available)
|
|
|
|
|
|
|
|
|
|
| 16 |
LOFINITY_ENRICHER MiniCPM model id for ZeroGPU enrichment (default MiniCPM5-1B)
|
| 17 |
OLLAMA_URL default http://localhost:11434 (local enrichment)
|
| 18 |
OLLAMA_MODEL default llama3.2:3b (local enrichment)
|
|
@@ -70,10 +76,14 @@ print(
|
|
| 70 |
)
|
| 71 |
|
| 72 |
ENGINE = os.getenv("LOFINITY_ENGINE", "musicgen")
|
| 73 |
-
#
|
| 74 |
-
#
|
| 75 |
-
# fall back to
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
# 30s is musicgen-small's single-shot max (1500 tokens). Longer tapes are
|
| 78 |
# stitched from 30s chunks: each one re-seeds the model with the last OVERLAP_S
|
| 79 |
# of the track so it keeps playing from there. musicgen-small's context is 2048
|
|
@@ -85,8 +95,9 @@ OVERLAP_S = float(os.getenv("LOFINITY_OVERLAP_S", "2")) # seconds of tail fed b
|
|
| 85 |
# total output (seed + new) at MAX_GEN_S to stay inside that window. Env-tunable.
|
| 86 |
MAX_GEN_S = float(os.getenv("LOFINITY_MAX_GEN_S", "28"))
|
| 87 |
SEAM_S = 0.4 # equal-power crossfade at each stitch, to hide the join
|
| 88 |
-
# the
|
| 89 |
-
|
|
|
|
| 90 |
DEFAULT_SECONDS = int(os.getenv("LOFINITY_DURATION", "30"))
|
| 91 |
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
|
| 92 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.2:3b")
|
|
@@ -539,6 +550,13 @@ def progress() -> dict:
|
|
| 539 |
return dict(_PROGRESS)
|
| 540 |
|
| 541 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
@app.get("/")
|
| 543 |
async def homepage():
|
| 544 |
return FileResponse(FRONTEND / "index.html")
|
|
|
|
| 9 |
texture words in prompts, hence the separate bed. The enrichment LLM is
|
| 10 |
MiniCPM (on cuda) on a ZeroGPU Space, or a local Ollama daemon in dev.
|
| 11 |
|
| 12 |
+
On a ZeroGPU Space it runs musicgen-medium and allows tapes up to 90s (chunked);
|
| 13 |
+
without a GPU it falls back to musicgen-small and a single 30s shot (no chunking).
|
| 14 |
+
|
| 15 |
Env knobs:
|
| 16 |
LOFINITY_ENGINE musicgen (default) | stub
|
| 17 |
LOFINITY_DURATION clip length in seconds (default 30, the single-shot max)
|
| 18 |
LOFINITY_DEVICE cuda | mps | cpu (default: cuda on ZeroGPU, else mps if available)
|
| 19 |
+
LOFINITY_MUSICGEN model id (default: musicgen-medium on ZeroGPU, else musicgen-small)
|
| 20 |
+
LOFINITY_OVERLAP_S continuation seed length, seconds (default 2)
|
| 21 |
+
LOFINITY_MAX_GEN_S cap on a continuation's total output, seconds (default 28)
|
| 22 |
LOFINITY_ENRICHER MiniCPM model id for ZeroGPU enrichment (default MiniCPM5-1B)
|
| 23 |
OLLAMA_URL default http://localhost:11434 (local enrichment)
|
| 24 |
OLLAMA_MODEL default llama3.2:3b (local enrichment)
|
|
|
|
| 76 |
)
|
| 77 |
|
| 78 |
ENGINE = os.getenv("LOFINITY_ENGINE", "musicgen")
|
| 79 |
+
# Model + tape length scale with the hardware: a ZeroGPU Space gets the bigger,
|
| 80 |
+
# cleaner-continuing musicgen-medium and full chunked tapes (up to 90s); without a
|
| 81 |
+
# GPU we fall back to the smaller, faster musicgen-small and a single 30s shot
|
| 82 |
+
# (medium + chunking on CPU would take minutes). The env var still overrides.
|
| 83 |
+
MUSICGEN_MODEL = os.getenv(
|
| 84 |
+
"LOFINITY_MUSICGEN",
|
| 85 |
+
"facebook/musicgen-medium" if IS_ZEROGPU else "facebook/musicgen-small",
|
| 86 |
+
)
|
| 87 |
# 30s is musicgen-small's single-shot max (1500 tokens). Longer tapes are
|
| 88 |
# stitched from 30s chunks: each one re-seeds the model with the last OVERLAP_S
|
| 89 |
# of the track so it keeps playing from there. musicgen-small's context is 2048
|
|
|
|
| 95 |
# total output (seed + new) at MAX_GEN_S to stay inside that window. Env-tunable.
|
| 96 |
MAX_GEN_S = float(os.getenv("LOFINITY_MAX_GEN_S", "28"))
|
| 97 |
SEAM_S = 0.4 # equal-power crossfade at each stitch, to hide the join
|
| 98 |
+
# the tape lengths the API allows (it snaps any request to the nearest). Only a
|
| 99 |
+
# GPU gets the longer, chunked tapes; a CPU-only fallback is capped to one 30s shot.
|
| 100 |
+
ALLOWED_SECONDS = (30, 60, 90) if IS_ZEROGPU else (30,)
|
| 101 |
DEFAULT_SECONDS = int(os.getenv("LOFINITY_DURATION", "30"))
|
| 102 |
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
|
| 103 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.2:3b")
|
|
|
|
| 550 |
return dict(_PROGRESS)
|
| 551 |
|
| 552 |
|
| 553 |
+
@app.get("/api/config")
|
| 554 |
+
def config() -> dict:
|
| 555 |
+
"""Frontend config: the tape lengths this backend allows. Hardware-dependent —
|
| 556 |
+
a CPU-only fallback offers only 30s — so the slider reads it and adapts."""
|
| 557 |
+
return {"allowed_seconds": list(ALLOWED_SECONDS)}
|
| 558 |
+
|
| 559 |
+
|
| 560 |
@app.get("/")
|
| 561 |
async def homepage():
|
| 562 |
return FileResponse(FRONTEND / "index.html")
|
frontend/ui.js
CHANGED
|
@@ -25,13 +25,26 @@ export function initUI({
|
|
| 25 |
const coinBtn = $("coin-button");
|
| 26 |
|
| 27 |
// slider stops → (seconds sent to the backend, label on the screen). 1 min and
|
| 28 |
-
// 1.5 min are stitched from 30s chunks
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
];
|
| 34 |
const selectedLength = () => LENGTHS[Number(lengthSlider.value)] ?? LENGTHS[0];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
const controlsRow = $("controls-row");
|
| 36 |
const generating = $("generating");
|
| 37 |
const brewFill = $("brew-bar-fill");
|
|
|
|
| 25 |
const coinBtn = $("coin-button");
|
| 26 |
|
| 27 |
// slider stops → (seconds sent to the backend, label on the screen). 1 min and
|
| 28 |
+
// 1.5 min are stitched from 30s chunks. The set is hardware-dependent — a
|
| 29 |
+
// CPU-only backend allows only 30s — so we fetch the real list from /api/config
|
| 30 |
+
// and collapse the slider when there's a single option.
|
| 31 |
+
const fmtLen = (s) => `${Math.floor(s / 60)}:${String(s % 60).padStart(2, "0")}`;
|
| 32 |
+
let LENGTHS = [30, 60, 90].map((s) => ({ seconds: s, label: fmtLen(s) }));
|
|
|
|
| 33 |
const selectedLength = () => LENGTHS[Number(lengthSlider.value)] ?? LENGTHS[0];
|
| 34 |
+
// adapt the slider to what this backend actually allows; defensive — any failure
|
| 35 |
+
// keeps the 30/60/90 default, so the GPU path is never affected
|
| 36 |
+
fetch("/api/config")
|
| 37 |
+
.then((r) => (r.ok ? r.json() : null))
|
| 38 |
+
.then((cfg) => {
|
| 39 |
+
const allowed = cfg && Array.isArray(cfg.allowed_seconds) ? cfg.allowed_seconds : null;
|
| 40 |
+
if (!allowed || !allowed.length) return;
|
| 41 |
+
LENGTHS = allowed.map((s) => ({ seconds: s, label: fmtLen(s) }));
|
| 42 |
+
lengthSlider.max = String(Math.max(0, LENGTHS.length - 1));
|
| 43 |
+
if (Number(lengthSlider.value) > LENGTHS.length - 1) lengthSlider.value = "0";
|
| 44 |
+
lengthValue.textContent = selectedLength().label;
|
| 45 |
+
if (LENGTHS.length <= 1) lengthRow.style.display = "none"; // single option → no slider
|
| 46 |
+
})
|
| 47 |
+
.catch(() => {});
|
| 48 |
const controlsRow = $("controls-row");
|
| 49 |
const generating = $("generating");
|
| 50 |
const brewFill = $("brew-bar-fill");
|