eloigil6 Claude Opus 4.8 commited on
Commit
613bdc6
·
1 Parent(s): b97956f

Scale model + tape length to the hardware (GPU vs CPU)

Browse files

On a ZeroGPU Space: musicgen-medium, tapes up to 90s (chunked) - unchanged. Without a GPU: fall back to musicgen-small and a single 30s shot (no chunking), since medium + chunking on CPU would take minutes. Both the model default and ALLOWED_SECONDS now branch on IS_ZEROGPU (env still overrides the model). New /api/config exposes the allowed lengths; the length slider fetches it and collapses to a single 0:30 when that's all the backend offers - defensive, so any fetch failure keeps the 30/60/90 default and the GPU path is untouched.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +24 -6
  2. frontend/ui.js +19 -6
app.py CHANGED
@@ -9,10 +9,16 @@ loops a background bed (waves, crackle, rain…) underneath. MusicGen ignores
9
  texture words in prompts, hence the separate bed. The enrichment LLM is
10
  MiniCPM (on cuda) on a ZeroGPU Space, or a local Ollama daemon in dev.
11
 
 
 
 
12
  Env knobs:
13
  LOFINITY_ENGINE musicgen (default) | stub
14
  LOFINITY_DURATION clip length in seconds (default 30, the single-shot max)
15
  LOFINITY_DEVICE cuda | mps | cpu (default: cuda on ZeroGPU, else mps if available)
 
 
 
16
  LOFINITY_ENRICHER MiniCPM model id for ZeroGPU enrichment (default MiniCPM5-1B)
17
  OLLAMA_URL default http://localhost:11434 (local enrichment)
18
  OLLAMA_MODEL default llama3.2:3b (local enrichment)
@@ -70,10 +76,14 @@ print(
70
  )
71
 
72
  ENGINE = os.getenv("LOFINITY_ENGINE", "musicgen")
73
- # musicgen-medium continues from an audio seed more cleanly than -small, whose
74
- # continuations degrade into noise after a chunk or two. Override via the env to
75
- # fall back to facebook/musicgen-small (faster, smaller) if needed.
76
- MUSICGEN_MODEL = os.getenv("LOFINITY_MUSICGEN", "facebook/musicgen-medium")
 
 
 
 
77
  # 30s is musicgen-small's single-shot max (1500 tokens). Longer tapes are
78
  # stitched from 30s chunks: each one re-seeds the model with the last OVERLAP_S
79
  # of the track so it keeps playing from there. musicgen-small's context is 2048
@@ -85,8 +95,9 @@ OVERLAP_S = float(os.getenv("LOFINITY_OVERLAP_S", "2")) # seconds of tail fed b
85
  # total output (seed + new) at MAX_GEN_S to stay inside that window. Env-tunable.
86
  MAX_GEN_S = float(os.getenv("LOFINITY_MAX_GEN_S", "28"))
87
  SEAM_S = 0.4 # equal-power crossfade at each stitch, to hide the join
88
- # the lengths the tape-length slider offers; the API snaps any value to one
89
- ALLOWED_SECONDS = (30, 60, 90)
 
90
  DEFAULT_SECONDS = int(os.getenv("LOFINITY_DURATION", "30"))
91
  OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
92
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.2:3b")
@@ -539,6 +550,13 @@ def progress() -> dict:
539
  return dict(_PROGRESS)
540
 
541
 
 
 
 
 
 
 
 
542
  @app.get("/")
543
  async def homepage():
544
  return FileResponse(FRONTEND / "index.html")
 
9
  texture words in prompts, hence the separate bed. The enrichment LLM is
10
  MiniCPM (on cuda) on a ZeroGPU Space, or a local Ollama daemon in dev.
11
 
12
+ On a ZeroGPU Space it runs musicgen-medium and allows tapes up to 90s (chunked);
13
+ without a GPU it falls back to musicgen-small and a single 30s shot (no chunking).
14
+
15
  Env knobs:
16
  LOFINITY_ENGINE musicgen (default) | stub
17
  LOFINITY_DURATION clip length in seconds (default 30, the single-shot max)
18
  LOFINITY_DEVICE cuda | mps | cpu (default: cuda on ZeroGPU, else mps if available)
19
+ LOFINITY_MUSICGEN model id (default: musicgen-medium on ZeroGPU, else musicgen-small)
20
+ LOFINITY_OVERLAP_S continuation seed length, seconds (default 2)
21
+ LOFINITY_MAX_GEN_S cap on a continuation's total output, seconds (default 28)
22
  LOFINITY_ENRICHER MiniCPM model id for ZeroGPU enrichment (default MiniCPM5-1B)
23
  OLLAMA_URL default http://localhost:11434 (local enrichment)
24
  OLLAMA_MODEL default llama3.2:3b (local enrichment)
 
76
  )
77
 
78
  ENGINE = os.getenv("LOFINITY_ENGINE", "musicgen")
79
+ # Model + tape length scale with the hardware: a ZeroGPU Space gets the bigger,
80
+ # cleaner-continuing musicgen-medium and full chunked tapes (up to 90s); without a
81
+ # GPU we fall back to the smaller, faster musicgen-small and a single 30s shot
82
+ # (medium + chunking on CPU would take minutes). The env var still overrides.
83
+ MUSICGEN_MODEL = os.getenv(
84
+ "LOFINITY_MUSICGEN",
85
+ "facebook/musicgen-medium" if IS_ZEROGPU else "facebook/musicgen-small",
86
+ )
87
  # 30s is musicgen-small's single-shot max (1500 tokens). Longer tapes are
88
  # stitched from 30s chunks: each one re-seeds the model with the last OVERLAP_S
89
  # of the track so it keeps playing from there. musicgen-small's context is 2048
 
95
  # total output (seed + new) at MAX_GEN_S to stay inside that window. Env-tunable.
96
  MAX_GEN_S = float(os.getenv("LOFINITY_MAX_GEN_S", "28"))
97
  SEAM_S = 0.4 # equal-power crossfade at each stitch, to hide the join
98
+ # the tape lengths the API allows (it snaps any request to the nearest). Only a
99
+ # GPU gets the longer, chunked tapes; a CPU-only fallback is capped to one 30s shot.
100
+ ALLOWED_SECONDS = (30, 60, 90) if IS_ZEROGPU else (30,)
101
  DEFAULT_SECONDS = int(os.getenv("LOFINITY_DURATION", "30"))
102
  OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
103
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.2:3b")
 
550
  return dict(_PROGRESS)
551
 
552
 
553
+ @app.get("/api/config")
554
+ def config() -> dict:
555
+ """Frontend config: the tape lengths this backend allows. Hardware-dependent —
556
+ a CPU-only fallback offers only 30s — so the slider reads it and adapts."""
557
+ return {"allowed_seconds": list(ALLOWED_SECONDS)}
558
+
559
+
560
  @app.get("/")
561
  async def homepage():
562
  return FileResponse(FRONTEND / "index.html")
frontend/ui.js CHANGED
@@ -25,13 +25,26 @@ export function initUI({
25
  const coinBtn = $("coin-button");
26
 
27
  // slider stops → (seconds sent to the backend, label on the screen). 1 min and
28
- // 1.5 min are stitched from 30s chunks (the backend continues from the last 6s).
29
- const LENGTHS = [
30
- { seconds: 30, label: "0:30" },
31
- { seconds: 60, label: "1:00" },
32
- { seconds: 90, label: "1:30" },
33
- ];
34
  const selectedLength = () => LENGTHS[Number(lengthSlider.value)] ?? LENGTHS[0];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  const controlsRow = $("controls-row");
36
  const generating = $("generating");
37
  const brewFill = $("brew-bar-fill");
 
25
  const coinBtn = $("coin-button");
26
 
27
  // slider stops → (seconds sent to the backend, label on the screen). 1 min and
28
+ // 1.5 min are stitched from 30s chunks. The set is hardware-dependent a
29
+ // CPU-only backend allows only 30s — so we fetch the real list from /api/config
30
+ // and collapse the slider when there's a single option.
31
+ const fmtLen = (s) => `${Math.floor(s / 60)}:${String(s % 60).padStart(2, "0")}`;
32
+ let LENGTHS = [30, 60, 90].map((s) => ({ seconds: s, label: fmtLen(s) }));
 
33
  const selectedLength = () => LENGTHS[Number(lengthSlider.value)] ?? LENGTHS[0];
34
+ // adapt the slider to what this backend actually allows; defensive — any failure
35
+ // keeps the 30/60/90 default, so the GPU path is never affected
36
+ fetch("/api/config")
37
+ .then((r) => (r.ok ? r.json() : null))
38
+ .then((cfg) => {
39
+ const allowed = cfg && Array.isArray(cfg.allowed_seconds) ? cfg.allowed_seconds : null;
40
+ if (!allowed || !allowed.length) return;
41
+ LENGTHS = allowed.map((s) => ({ seconds: s, label: fmtLen(s) }));
42
+ lengthSlider.max = String(Math.max(0, LENGTHS.length - 1));
43
+ if (Number(lengthSlider.value) > LENGTHS.length - 1) lengthSlider.value = "0";
44
+ lengthValue.textContent = selectedLength().label;
45
+ if (LENGTHS.length <= 1) lengthRow.style.display = "none"; // single option → no slider
46
+ })
47
+ .catch(() => {});
48
  const controlsRow = $("controls-row");
49
  const generating = $("generating");
50
  const brewFill = $("brew-bar-fill");