Spaces:

build-small-hackathon
/

tiny-army

Running

App Files Files Community

polats commited on 4 days ago

Commit

f77d660

1 Parent(s): db6b273

Add Klein ZeroGPU portrait option

Browse files

Files changed (5) hide show

.gitignore +1 -0
app.py +96 -8
requirements.txt +1 -0
web/imagen.js +2 -2
web/imagenServer.js +9 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
 __pycache__/
 *.pyc
 .venv/

 __pycache__/
 *.pyc
 .venv/
+logs/

app.py CHANGED Viewed

@@ -20,6 +20,20 @@ import json as _json
 import os
 import threading
 import gradio as gr
 import uvicorn
 from fastapi import FastAPI, Request
@@ -35,6 +49,7 @@ import prompts
 HERE = os.path.dirname(os.path.abspath(__file__))
 WEB = os.path.join(HERE, "web")
 # The Sprite tab's character picker + controls are built entirely by the shared
 # playground (web/playground.js) from /sprites/characters.json — no Python-side
@@ -176,7 +191,7 @@ def diary(unit, traits):
         yield header + f"Today I held the line. _(model unavailable: {e})_"
-with gr.Blocks(title="Tiny Army") as demo:
     gr.HTML(SIDEBAR_HTML)
     with gr.Tabs():
         with gr.Tab("Battle") as battle_tab:
@@ -201,7 +216,7 @@ with gr.Blocks(title="Tiny Army") as demo:
         # (footer "Settings" → ?view=settings) by web/settingsPanel.js — not a tab.
 # Mount Gradio on FastAPI so we can also serve the JS module + the sprite assets.
-fastapi_app = FastAPI()
 # Behind HF's custom-domain proxy Gradio emits its theme.css <link> as http://
@@ -413,6 +428,12 @@ _MIN_IMAGE_BYTES = 15_000  # smaller than this = a blank/safety-blocked frame
 _img_pipe = None
 _img_lock = threading.Lock()
 def _load_image_pipe():
@@ -461,6 +482,50 @@ def _local_portrait(prompt, seed=None, width=1024, height=1024, steps=9):
     return out.getvalue()
 def _nim_portrait(prompt, provider="flux-schnell", width=1024, height=1024):
     import random
     p = _NIM_PROVIDERS.get(provider, _NIM_PROVIDERS["flux-schnell"])
@@ -524,10 +589,15 @@ async def portrait(request: Request):
     prompt = (body.get("prompt") or "").strip()
     seed = body.get("seed")
     provider = body.get("provider") or ""  # cloud sub-provider hint (e.g. flux-dev)
-    engine = (body.get("engine") or "").strip().lower()  # 'local' | 'cloud' | '' = auto
     if not prompt:
         return Response("prompt required", status_code=400)
     want_local = engine == "local" or (not engine and IMAGE_MODE == "local")
     if want_local:  # in-process open weights on your GPU (dev)
         if IMAGE_MODE != "local":
             return Response("local image mode not enabled (run with TINY_IMAGE_MODE=local)", status_code=503)
@@ -536,6 +606,15 @@ async def portrait(request: Request):
         except Exception as e:  # noqa: BLE001 — surface a clear setup hint
             return Response(f"local image error (pip install 'git+https://github.com/huggingface/diffusers' accelerate?): {e}", status_code=500)
         return Response(png, media_type="image/png", headers={"Cache-Control": "no-store"})
     # Cloud: prefer NVIDIA NIM (woid's FLUX path), else HF Inference (our HF_TOKEN).
     if NIM_KEY:
         png, err = await asyncio.to_thread(_nim_portrait, prompt, provider or "flux-schnell")
@@ -633,14 +712,23 @@ async def persona_generate_stream(request: Request):
     })
-app = gr.mount_gradio_app(fastapi_app, demo, path="/", head=HEAD, theme=gr.themes.Soft())
 if __name__ == "__main__":
     # The default UI runs the model IN THE BROWSER (wllama). The Python llama.cpp path
     # stays as a lazy fallback (only loads if /persona/generate/stream is hit), so we
     # don't pre-download it here.
-    # proxy_headers + trusting forwarded IPs lets Gradio honour X-Forwarded-Proto
-    # from HF's edge, so it generates https (not http) asset URLs behind the proxy.
-    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "7860")),
-                proxy_headers=True, forwarded_allow_ips="*")

 import os
 import threading
+# ZeroGPU requires the spaces shim to be imported before torch. Locally, or on
+# non-ZeroGPU hardware, this falls back to a no-op decorator.
+try:
+    import spaces  # type: ignore
+    GPU = spaces.GPU
+except Exception:  # pragma: no cover
+    def GPU(*dargs, **dkwargs):  # noqa: N802 - mirror spaces.GPU
+        def wrap(fn):
+            return fn
+        if len(dargs) == 1 and callable(dargs[0]) and not dkwargs:
+            return dargs[0]
+        return wrap
 import gradio as gr
 import uvicorn
 from fastapi import FastAPI, Request
 HERE = os.path.dirname(os.path.abspath(__file__))
 WEB = os.path.join(HERE, "web")
+USE_GRADIO_SERVER = os.environ.get("TINY_GRADIO_SERVER", "").lower() in ("1", "true", "yes")
 # The Sprite tab's character picker + controls are built entirely by the shared
 # playground (web/playground.js) from /sprites/characters.json — no Python-side
         yield header + f"Today I held the line. _(model unavailable: {e})_"
+with gr.Blocks(title="Tiny Army") as ui:
     gr.HTML(SIDEBAR_HTML)
     with gr.Tabs():
         with gr.Tab("Battle") as battle_tab:
         # (footer "Settings" → ?view=settings) by web/settingsPanel.js — not a tab.
 # Mount Gradio on FastAPI so we can also serve the JS module + the sprite assets.
+fastapi_app = gr.Server() if USE_GRADIO_SERVER else FastAPI()
 # Behind HF's custom-domain proxy Gradio emits its theme.css <link> as http://
 _img_pipe = None
 _img_lock = threading.Lock()
+_klein_pipe = None
+_klein_lock = threading.Lock()
+_KLEIN_MODEL_ID = os.environ.get("TINY_KLEIN_MODEL", "black-forest-labs/FLUX.2-klein-4B")
+_KLEIN_STEPS = int(os.environ.get("TINY_KLEIN_STEPS", "4"))
+_KLEIN_GUIDANCE = float(os.environ.get("TINY_KLEIN_GUIDANCE", "1.0"))
+_KLEIN_SPACE = os.environ.get("TINY_KLEIN_SPACE", "").strip()
 def _load_image_pipe():
     return out.getvalue()
+def _load_klein_pipe():
+    import torch
+    from diffusers import Flux2KleinPipeline
+    return Flux2KleinPipeline.from_pretrained(_KLEIN_MODEL_ID, torch_dtype=torch.bfloat16)
+def _remote_klein_portrait(prompt, seed=None):
+    import os as _os
+    from gradio_client import Client
+    client = Client(_KLEIN_SPACE, token=HF_TOKEN or None)
+    result = client.predict(prompt, int(seed if seed is not None else 42), api_name="/generate")
+    path = result[0] if isinstance(result, (tuple, list)) else result
+    with open(_os.fspath(path), "rb") as f:
+        return f.read()
+@GPU(duration=60)
+def _klein_portrait(prompt, seed=None, width=1024, height=1024):
+    """FLUX.2 [klein] 4B for ZeroGPU-backed portrait generation."""
+    global _klein_pipe
+    import io
+    import random
+    import torch
+    with _klein_lock:
+        if _klein_pipe is None:
+            _klein_pipe = _load_klein_pipe()
+        dev = "cuda" if torch.cuda.is_available() else "cpu"
+        _klein_pipe.to(dev)
+        s = int(seed if seed is not None else random.randint(0, 2_147_483_647))
+        img = _klein_pipe(
+            prompt=prompt, width=width, height=height,
+            num_inference_steps=_KLEIN_STEPS, guidance_scale=_KLEIN_GUIDANCE,
+            generator=torch.Generator(device=dev).manual_seed(s),
+        ).images[0]
+        if dev == "cuda":
+            _klein_pipe.to("cpu")
+            try:
+                torch.cuda.empty_cache()
+            except Exception:
+                pass
+    out = io.BytesIO(); img.save(out, format="PNG")
+    return out.getvalue()
 def _nim_portrait(prompt, provider="flux-schnell", width=1024, height=1024):
     import random
     p = _NIM_PROVIDERS.get(provider, _NIM_PROVIDERS["flux-schnell"])
     prompt = (body.get("prompt") or "").strip()
     seed = body.get("seed")
     provider = body.get("provider") or ""  # cloud sub-provider hint (e.g. flux-dev)
+    engine = (body.get("engine") or "").strip().lower()  # 'local' | 'klein' | 'cloud' | '' = auto
     if not prompt:
         return Response("prompt required", status_code=400)
     want_local = engine == "local" or (not engine and IMAGE_MODE == "local")
+    want_klein = (
+        engine in ("klein", "zerogpu")
+        or provider in ("flux-klein-4b", "klein-4b")
+        or (not engine and IMAGE_MODE in ("klein", "zerogpu", "klein-zerogpu"))
+    )
     if want_local:  # in-process open weights on your GPU (dev)
         if IMAGE_MODE != "local":
             return Response("local image mode not enabled (run with TINY_IMAGE_MODE=local)", status_code=503)
         except Exception as e:  # noqa: BLE001 — surface a clear setup hint
             return Response(f"local image error (pip install 'git+https://github.com/huggingface/diffusers' accelerate?): {e}", status_code=500)
         return Response(png, media_type="image/png", headers={"Cache-Control": "no-store"})
+    if want_klein:
+        try:
+            if _KLEIN_SPACE:
+                png = await asyncio.to_thread(_remote_klein_portrait, prompt, seed)
+            else:
+                png = await asyncio.to_thread(_klein_portrait, prompt, seed)
+        except Exception as e:  # noqa: BLE001
+            return Response(f"klein image error: {e}", status_code=500)
+        return Response(png, media_type="image/png", headers={"Cache-Control": "no-store"})
     # Cloud: prefer NVIDIA NIM (woid's FLUX path), else HF Inference (our HF_TOKEN).
     if NIM_KEY:
         png, err = await asyncio.to_thread(_nim_portrait, prompt, provider or "flux-schnell")
     })
+app = gr.mount_gradio_app(fastapi_app, ui, path="/", head=HEAD, theme=gr.themes.Soft())
+demo = app if USE_GRADIO_SERVER else ui
 if __name__ == "__main__":
     # The default UI runs the model IN THE BROWSER (wllama). The Python llama.cpp path
     # stays as a lazy fallback (only loads if /persona/generate/stream is hit), so we
     # don't pre-download it here.
+    if USE_GRADIO_SERVER:
+        app.launch(
+            server_name="0.0.0.0",
+            server_port=int(os.environ.get("PORT", "7860")),
+            head=HEAD,
+            theme=gr.themes.Soft(),
+        )
+    else:
+        # proxy_headers + trusting forwarded IPs lets Gradio honour X-Forwarded-Proto
+        # from HF's edge, so it generates https (not http) asset URLs behind the proxy.
+        uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "7860")),
+                    proxy_headers=True, forwarded_allow_ips="*")

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 gradio==6.15.2
 huggingface_hub
 # llama.cpp runtime for the persona + war-diary model. The CPU wheel index ships a

+--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
 gradio==6.15.2
 huggingface_hub
 # llama.cpp runtime for the persona + war-diary model. The CPU wheel index ships a

web/imagen.js CHANGED Viewed

@@ -1,10 +1,10 @@
 // Image facade — mirrors tts.js. Picks the active portrait engine (local Z-Image on your
 // GPU, or cloud FLUX; in-browser SD-Turbo / Janus get added here later) and exposes one
 // generatePortrait(). The persona panel + the Settings image bar import only from here.
-import { engineLocal as zimagelocal, engineCloud as flux, engineCloudDev as fluxdev, isLocalhost } from '/web/imagenServer.js'
 import { engine as bonsai } from '/web/imagenBonsai.js'
-const ENGINES = [zimagelocal, bonsai, flux, fluxdev]
 // Default: local Z-Image on localhost (your GPU), cloud FLUX in prod. Persisted across
 // refreshes; a saved choice wins if it's still available.
 const KEY = 'tinyarmy.imageEngine'

 // Image facade — mirrors tts.js. Picks the active portrait engine (local Z-Image on your
 // GPU, or cloud FLUX; in-browser SD-Turbo / Janus get added here later) and exposes one
 // generatePortrait(). The persona panel + the Settings image bar import only from here.
+import { engineLocal as zimagelocal, engineKleinZeroGpu as klein, engineCloud as flux, engineCloudDev as fluxdev, isLocalhost } from '/web/imagenServer.js'
 import { engine as bonsai } from '/web/imagenBonsai.js'
+const ENGINES = [zimagelocal, klein, bonsai, flux, fluxdev]
 // Default: local Z-Image on localhost (your GPU), cloud FLUX in prod. Persisted across
 // refreshes; a saved choice wins if it's still available.
 const KEY = 'tinyarmy.imageEngine'

web/imagenServer.js CHANGED Viewed

@@ -29,6 +29,15 @@ export const engineLocal = {
   backendLabel: () => '🖥 local model',
 }
 // CLOUD: FLUX via the backend proxy (NVIDIA NIM, else HF Inference). `provider` picks the
 // NIM sub-model. schnell = fast (4 steps), dev = higher quality (28 steps).
 export const engineCloud = {

   backendLabel: () => '🖥 local model',
 }
+export const engineKleinZeroGpu = {
+  ...common,
+  id: 'klein-zerogpu',
+  label: 'FLUX.2 klein 4B · ZeroGPU',
+  available: () => true,
+  generate: (prompt, { seed } = {}) => postPortrait({ prompt, seed, engine: 'klein', provider: 'flux-klein-4b' }),
+  backendLabel: () => 'ZeroGPU FLUX.2 klein 4B',
+}
 // CLOUD: FLUX via the backend proxy (NVIDIA NIM, else HF Inference). `provider` picks the
 // NIM sub-model. schnell = fast (4 steps), dev = higher quality (28 steps).
 export const engineCloud = {