polats Claude Opus 4.8 (1M context) commited on
Commit
2151ea1
·
1 Parent(s): 476605d

Portraits step 2: /portrait engine routing + imagen facade + Settings picker

Browse files

- app.py /portrait now honors an `engine` hint ('local' | 'cloud' | auto), so the
frontend can pick Z-Image-on-GPU vs cloud FLUX explicitly. Local loader uses
4-bit NF4 + a per-process VRAM cap (peak ~8 GB) so it can't crash the display.
- web/imagen.js — image facade mirroring tts.js (engines, persisted activeId,
onImageEngineChange, generatePortrait → PNG Blob).
- web/imagenServer.js — engines backed by /portrait: zimage-local (your GPU,
localhost-only) + flux / flux-dev (cloud NIM, HF fallback).
- web/imagenBar.js + settingsPanel.js — a "Portrait" provider picker section,
injected after Voice, persisted and synced like the voice picker.

Verified locally: picker renders/persists; engine:'local' generates a 1024² PNG;
engine:'cloud' cleanly 503s without keys (cloud verified in prod).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (5) hide show
  1. app.py +142 -0
  2. web/imagen.js +42 -0
  3. web/imagenBar.js +42 -0
  4. web/imagenServer.js +50 -0
  5. web/settingsPanel.js +4 -0
app.py CHANGED
@@ -396,6 +396,148 @@ async def qwen_tts(request: Request):
396
  return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
397
 
398
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  @fastapi_app.get("/persona/status")
400
  def persona_status():
401
  return llm.status()
 
396
  return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
397
 
398
 
399
+ # ── Persona portraits (image generation) ─────────────────────────────────────
400
+ # Mirrors the voice path: TINY_IMAGE_MODE=local runs the OPEN WEIGHTS on your GPU
401
+ # (Z-Image-Turbo, 6B, ~12 GB bf16 — coexists with the TTS model on a 24 GB card);
402
+ # otherwise we proxy a cloud provider so its key stays server-side. Returns a PNG.
403
+ IMAGE_MODE = os.environ.get("TINY_IMAGE_MODE", "").strip().lower()
404
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
405
+ NIM_KEY = os.environ.get("NVIDIA_NIM_API_KEY", "")
406
+ _NIM_BASE = "https://ai.api.nvidia.com/v1/genai"
407
+ # id -> NIM FLUX preset (same shapes woid uses: schnell fast, dev higher quality).
408
+ _NIM_PROVIDERS = {
409
+ "flux-schnell": {"model": "black-forest-labs/flux.1-schnell", "steps": 4, "cfg": 0.0},
410
+ "flux-dev": {"model": "black-forest-labs/flux.1-dev", "steps": 28, "cfg": 3.5},
411
+ }
412
+ _MIN_IMAGE_BYTES = 15_000 # smaller than this = a blank/safety-blocked frame → retry
413
+
414
+ _img_pipe = None
415
+ _img_lock = threading.Lock()
416
+
417
+
418
+ def _load_image_pipe():
419
+ import torch
420
+ from diffusers import ZImagePipeline
421
+ mid = os.environ.get("TINY_IMAGE_MODEL", "Tongyi-MAI/Z-Image-Turbo")
422
+ if not torch.cuda.is_available():
423
+ return ZImagePipeline.from_pretrained(mid, torch_dtype=torch.float32).to("cpu")
424
+ dt = torch.bfloat16
425
+ # GUARDRAIL: the 3090 also drives the desktop, so cap THIS process's VRAM — a spike can
426
+ # then never grab the whole card and crash the display (it OOM-errors instead). Default
427
+ # ~60% of 24 GB ≈ 14 GB; measured peak is ~8 GB, so there's comfortable headroom.
428
+ try:
429
+ torch.cuda.set_per_process_memory_fraction(float(os.environ.get("TINY_IMAGE_VRAM_FRAC", "0.6")), 0)
430
+ except Exception: # noqa: BLE001
431
+ pass
432
+ if os.environ.get("TINY_IMAGE_QUANT", "1").lower() not in ("0", "false", "no"):
433
+ # 4-bit NF4 quantize the 6B transformer (~12 GB bf16 → ~3 GB). With cpu-offload the
434
+ # measured peak is ~8 GB (vs ~20 GB unquantized, which crashed the display), and the
435
+ # VRAM frees between generations. Portrait quality is effectively unchanged.
436
+ from diffusers import ZImageTransformer2DModel, BitsAndBytesConfig
437
+ quant = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=dt)
438
+ transformer = ZImageTransformer2DModel.from_pretrained(mid, subfolder="transformer", quantization_config=quant, torch_dtype=dt)
439
+ pipe = ZImagePipeline.from_pretrained(mid, transformer=transformer, torch_dtype=dt)
440
+ else:
441
+ pipe = ZImagePipeline.from_pretrained(mid, torch_dtype=dt)
442
+ # Non-transformer components stream CPU↔GPU per forward; VRAM returns to ~0 when idle.
443
+ pipe.enable_model_cpu_offload()
444
+ return pipe
445
+
446
+
447
+ def _local_portrait(prompt, seed=None, width=1024, height=1024, steps=9):
448
+ global _img_pipe
449
+ import io
450
+ import torch
451
+ with _img_lock: # one GPU model can't decode in parallel
452
+ if _img_pipe is None:
453
+ _img_pipe = _load_image_pipe()
454
+ gen = None
455
+ if seed is not None:
456
+ dev = "cuda" if torch.cuda.is_available() else "cpu"
457
+ gen = torch.Generator(dev).manual_seed(int(seed))
458
+ img = _img_pipe(prompt=prompt, height=height, width=width,
459
+ num_inference_steps=steps, guidance_scale=0.0, generator=gen).images[0]
460
+ out = io.BytesIO(); img.save(out, format="PNG")
461
+ return out.getvalue()
462
+
463
+
464
+ def _nim_portrait(prompt, provider="flux-schnell", width=1024, height=1024):
465
+ import random
466
+ p = _NIM_PROVIDERS.get(provider, _NIM_PROVIDERS["flux-schnell"])
467
+ url = f"{_NIM_BASE}/{p['model']}"
468
+ for _ in range(3): # retry under the blank/safety-blocked threshold (woid's guard)
469
+ payload = _json.dumps({
470
+ "prompt": prompt, "cfg_scale": p["cfg"], "width": width, "height": height,
471
+ "seed": random.randint(0, 2_147_483_647), "steps": p["steps"],
472
+ }).encode()
473
+ req = urllib.request.Request(url, data=payload, method="POST", headers={
474
+ "Authorization": f"Bearer {NIM_KEY}", "Content-Type": "application/json", "Accept": "application/json",
475
+ })
476
+ try:
477
+ with urllib.request.urlopen(req, timeout=120) as r:
478
+ j = _json.loads(r.read().decode())
479
+ except urllib.error.HTTPError as e:
480
+ return None, f"nim image {e.code}: {e.read().decode()[:200]}"
481
+ except Exception as e: # noqa: BLE001
482
+ return None, f"nim image error: {e}"
483
+ b64 = j.get("image") or (j.get("artifacts") or [{}])[0].get("base64")
484
+ if not b64:
485
+ continue
486
+ data = base64.b64decode(b64)
487
+ if len(data) >= _MIN_IMAGE_BYTES:
488
+ return data, None
489
+ return None, "image kept coming back blank — safety-blocked prompt?"
490
+
491
+
492
+ def _hf_portrait(prompt, model="black-forest-labs/FLUX.1-schnell"):
493
+ # HF Inference (text-to-image) returns raw image bytes; reuses our existing HF_TOKEN.
494
+ url = f"https://api-inference.huggingface.co/models/{model}"
495
+ payload = _json.dumps({"inputs": prompt}).encode()
496
+ req = urllib.request.Request(url, data=payload, method="POST", headers={
497
+ "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json", "Accept": "image/png",
498
+ })
499
+ try:
500
+ with urllib.request.urlopen(req, timeout=120) as r:
501
+ data = r.read()
502
+ except urllib.error.HTTPError as e:
503
+ return None, f"hf image {e.code}: {e.read().decode()[:200]}"
504
+ except Exception as e: # noqa: BLE001
505
+ return None, f"hf image error: {e}"
506
+ if len(data) < _MIN_IMAGE_BYTES:
507
+ return None, "hf returned a tiny/blank image"
508
+ return data, None
509
+
510
+
511
+ @fastapi_app.post("/portrait")
512
+ async def portrait(request: Request):
513
+ body = await request.json()
514
+ prompt = (body.get("prompt") or "").strip()
515
+ seed = body.get("seed")
516
+ provider = body.get("provider") or "" # cloud sub-provider hint (e.g. flux-dev)
517
+ engine = (body.get("engine") or "").strip().lower() # 'local' | 'cloud' | '' = auto
518
+ if not prompt:
519
+ return Response("prompt required", status_code=400)
520
+ want_local = engine == "local" or (not engine and IMAGE_MODE == "local")
521
+ if want_local: # in-process open weights on your GPU (dev)
522
+ if IMAGE_MODE != "local":
523
+ return Response("local image mode not enabled (run with TINY_IMAGE_MODE=local)", status_code=503)
524
+ try:
525
+ png = await asyncio.to_thread(_local_portrait, prompt, seed)
526
+ except Exception as e: # noqa: BLE001 — surface a clear setup hint
527
+ return Response(f"local image error (pip install 'git+https://github.com/huggingface/diffusers' accelerate?): {e}", status_code=500)
528
+ return Response(png, media_type="image/png", headers={"Cache-Control": "no-store"})
529
+ # Cloud: prefer NVIDIA NIM (woid's FLUX path), else HF Inference (our HF_TOKEN).
530
+ if NIM_KEY:
531
+ png, err = await asyncio.to_thread(_nim_portrait, prompt, provider or "flux-schnell")
532
+ elif HF_TOKEN:
533
+ png, err = await asyncio.to_thread(_hf_portrait, prompt)
534
+ else:
535
+ return Response("no image provider (set NVIDIA_NIM_API_KEY / HF_TOKEN, or TINY_IMAGE_MODE=local)", status_code=503)
536
+ if err:
537
+ return Response(err, status_code=502)
538
+ return Response(png, media_type="image/png", headers={"Cache-Control": "no-store"})
539
+
540
+
541
  @fastapi_app.get("/persona/status")
542
  def persona_status():
543
  return llm.status()
web/imagen.js ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Image facade — mirrors tts.js. Picks the active portrait engine (local Z-Image on your
2
+ // GPU, or cloud FLUX; in-browser SD-Turbo / Janus get added here later) and exposes one
3
+ // generatePortrait(). The persona panel + the Settings image bar import only from here.
4
+ import { engineLocal as zimagelocal, engineCloud as flux, engineCloudDev as fluxdev, isLocalhost } from '/web/imagenServer.js'
5
+
6
+ const ENGINES = [zimagelocal, flux, fluxdev]
7
+ // Default: local Z-Image on localhost (your GPU), cloud FLUX in prod. Persisted across
8
+ // refreshes; a saved choice wins if it's still available.
9
+ const KEY = 'tinyarmy.imageEngine'
10
+ let activeId = (() => {
11
+ let saved = ''
12
+ try { saved = localStorage.getItem(KEY) || '' } catch { /* ignore */ }
13
+ const e = ENGINES.find((x) => x.id === saved)
14
+ return e && e.available() ? saved : (isLocalhost() ? 'zimage-local' : 'flux')
15
+ })()
16
+
17
+ const eng = () => ENGINES.find((e) => e.id === activeId) || ENGINES.find((e) => e.available()) || ENGINES[0]
18
+
19
+ export const listImageEngines = () =>
20
+ ENGINES.map((e) => ({ id: e.id, label: e.label, available: e.available(), note: e.note || '' }))
21
+ export const getImageEngineId = () => activeId
22
+
23
+ const _listeners = new Set()
24
+ export function onImageEngineChange(fn) { _listeners.add(fn); return () => _listeners.delete(fn) }
25
+ export function setImageEngine(id) {
26
+ if (!ENGINES.some((e) => e.id === id) || id === activeId) return
27
+ activeId = id
28
+ try { localStorage.setItem(KEY, id) } catch { /* ignore */ }
29
+ for (const fn of _listeners) { try { fn(id) } catch { /* ignore */ } }
30
+ }
31
+
32
+ export const imageNeedsDownload = () => !!eng().needsDownload
33
+ export const imageBackendLabel = () => eng().backendLabel()
34
+ export const imageNetworked = () => !!eng().networked
35
+
36
+ export async function ensureImage(onProgress) { return eng().ensure(onProgress) }
37
+
38
+ // Generate a portrait → PNG Blob. `prompt` is the appearance description; `seed` keeps it
39
+ // reproducible where the engine supports it.
40
+ export async function generatePortrait(prompt, { seed } = {}) {
41
+ return eng().generate(prompt, { seed })
42
+ }
web/imagenBar.js ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Portrait-model picker for the Settings page. Chooses the engine that draws hero
2
+ // portraits (local Z-Image on your GPU, or cloud FLUX). Mirrors ttsBar.js: it only sets
3
+ // the engine on the shared imagen.js facade; the persona panel reads that choice.
4
+ import {
5
+ listImageEngines, getImageEngineId, setImageEngine,
6
+ imageBackendLabel, imageNeedsDownload, onImageEngineChange,
7
+ } from '/web/imagen.js'
8
+
9
+ function el(tag, props = {}, kids = []) {
10
+ const n = document.createElement(tag)
11
+ for (const [k, v] of Object.entries(props)) {
12
+ if (k === 'class') n.className = v
13
+ else if (k.startsWith('on') && typeof v === 'function') n.addEventListener(k.slice(2), v)
14
+ else if (v != null) n.setAttribute(k, v)
15
+ }
16
+ for (const kid of [].concat(kids)) if (kid != null) n.append(kid)
17
+ return n
18
+ }
19
+
20
+ export function mountImagenBar(host, { onChange } = {}) {
21
+ const engSel = el('select', { class: 'model-select engine-select' })
22
+ const info = el('div', { class: 'model-info' })
23
+ host.append(el('div', { class: 'model-bar imagen-bar' }, [
24
+ el('label', { class: 'persona-label' }, '🖼 Portrait model'),
25
+ engSel, info,
26
+ ]))
27
+
28
+ engSel.replaceChildren(...listImageEngines().map((e) =>
29
+ el('option', { value: e.id, ...(e.available ? {} : { disabled: 'disabled' }) },
30
+ `${e.label}${e.available ? '' : ' · ' + (e.note || 'n/a')}`)))
31
+ engSel.value = getImageEngineId()
32
+
33
+ function renderInfo() {
34
+ info.textContent = `${imageBackendLabel()}${imageNeedsDownload() ? ' · downloads on first use' : ''}`
35
+ }
36
+
37
+ engSel.addEventListener('change', () => { setImageEngine(engSel.value); renderInfo(); onChange && onChange() })
38
+ onImageEngineChange((id) => { engSel.value = id; renderInfo() })
39
+
40
+ renderInfo()
41
+ return { refresh: renderInfo }
42
+ }
web/imagenServer.js ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Image engines backed by our /portrait route (server-side, so keys stay off the client):
2
+ // • zimage-local — Z-Image-Turbo on YOUR GPU (TINY_IMAGE_MODE=local). Localhost only.
3
+ // • flux / flux-dev — FLUX via NVIDIA NIM (or HF Inference) in the cloud.
4
+ // Mirrors ttsQwen3.js (engine + engineLocal). generate() returns a PNG Blob.
5
+
6
+ export const isLocalhost = () => {
7
+ try { return /^(localhost|127\.0\.0\.1|\[?::1\]?|0\.0\.0\.0)$/i.test(location.hostname) } catch { return false }
8
+ }
9
+
10
+ async function postPortrait(body) {
11
+ const resp = await fetch('/portrait', {
12
+ method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body),
13
+ })
14
+ if (!resp.ok) throw new Error(`portrait ${resp.status}: ${(await resp.text()).slice(0, 160)}`)
15
+ return resp.blob() // image/png
16
+ }
17
+
18
+ const common = { needsDownload: false, networked: true, ensure: async () => {} }
19
+
20
+ // LOCAL: same-origin /portrait on a locally-run app.py (TINY_IMAGE_MODE=local → Z-Image
21
+ // on your GPU). Only offered on localhost; shown disabled with a note in prod.
22
+ export const engineLocal = {
23
+ ...common,
24
+ id: 'zimage-local',
25
+ label: 'Z-Image-Turbo · local (your GPU)',
26
+ available: () => isLocalhost(),
27
+ note: 'run the project locally',
28
+ generate: (prompt, { seed } = {}) => postPortrait({ prompt, seed, engine: 'local' }),
29
+ backendLabel: () => '🖥 local model',
30
+ }
31
+
32
+ // CLOUD: FLUX via the backend proxy (NVIDIA NIM, else HF Inference). `provider` picks the
33
+ // NIM sub-model. schnell = fast (4 steps), dev = higher quality (28 steps).
34
+ export const engineCloud = {
35
+ ...common,
36
+ id: 'flux',
37
+ label: 'FLUX schnell · cloud (fast)',
38
+ available: () => true,
39
+ generate: (prompt, { seed } = {}) => postPortrait({ prompt, seed, engine: 'cloud', provider: 'flux-schnell' }),
40
+ backendLabel: () => '☁ FLUX (NIM)',
41
+ }
42
+
43
+ export const engineCloudDev = {
44
+ ...common,
45
+ id: 'flux-dev',
46
+ label: 'FLUX dev · cloud (higher quality)',
47
+ available: () => true,
48
+ generate: (prompt, { seed } = {}) => postPortrait({ prompt, seed, engine: 'cloud', provider: 'flux-dev' }),
49
+ backendLabel: () => '☁ FLUX dev (NIM)',
50
+ }
web/settingsPanel.js CHANGED
@@ -8,6 +8,7 @@
8
  // just won't appear (graceful no-op) and the app still runs on defaults.
9
  import { mountModelBar } from '/web/modelBar.js'
10
  import { mountTtsBar } from '/web/ttsBar.js'
 
11
  import { mountPersonaPromptBar } from '/web/personaPromptBar.js'
12
  import { mountQualityBar } from '/web/qualityBar.js'
13
 
@@ -51,6 +52,9 @@ export function mountSettingsPanel() {
51
  'The provider that voices your heroes. Qwen3-TTS designs a voice from each hero’s ' +
52
  'description; Kokoro/Kitten run on your device with a named voice you pick per hero. ' +
53
  'The voice belongs to the hero, so there’s no global voice to choose here.', mountTtsBar)
 
 
 
54
  injectSection(sample, 'tac-persona-prompt-settings', 'Persona Prompt',
55
  'The system prompt that writes each hero (name, about, quote and voice design). ' +
56
  'Edit it to change their style; Save uses it on the next “Recruit hero”.', mountPersonaPromptBar)
 
8
  // just won't appear (graceful no-op) and the app still runs on defaults.
9
  import { mountModelBar } from '/web/modelBar.js'
10
  import { mountTtsBar } from '/web/ttsBar.js'
11
+ import { mountImagenBar } from '/web/imagenBar.js'
12
  import { mountPersonaPromptBar } from '/web/personaPromptBar.js'
13
  import { mountQualityBar } from '/web/qualityBar.js'
14
 
 
52
  'The provider that voices your heroes. Qwen3-TTS designs a voice from each hero’s ' +
53
  'description; Kokoro/Kitten run on your device with a named voice you pick per hero. ' +
54
  'The voice belongs to the hero, so there’s no global voice to choose here.', mountTtsBar)
55
+ injectSection(sample, 'tac-image-settings', 'Portrait',
56
+ 'The model that paints hero portraits. Z-Image-Turbo runs on your GPU when you host ' +
57
+ 'the project locally; FLUX runs in the cloud otherwise.', mountImagenBar)
58
  injectSection(sample, 'tac-persona-prompt-settings', 'Persona Prompt',
59
  'The system prompt that writes each hero (name, about, quote and voice design). ' +
60
  'Edit it to change their style; Save uses it on the next “Recruit hero”.', mountPersonaPromptBar)