Spaces:
Build error
Build error
File size: 13,466 Bytes
ed88b1e 4b43c9c ed88b1e 4b43c9c a1fd2c6 ed88b1e a1fd2c6 ed88b1e a1fd2c6 ed88b1e a1fd2c6 4b43c9c ed88b1e 4b43c9c ed88b1e 2bb358f ed88b1e 2bb358f 4b43c9c ed88b1e 2bb358f ed88b1e a1fd2c6 ed88b1e 2bb358f ed88b1e 2bb358f ed88b1e 2bb358f 4b43c9c 2bb358f ed88b1e 2bb358f ed88b1e 4b43c9c 2bb358f 4b43c9c 2bb358f ed88b1e 4b43c9c 2bb358f ed88b1e a1fd2c6 ed88b1e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 | """
DARKROOM HandRefiner — Hugging Face ZeroGPU Space
=================================================
Standard Gradio Interface (the pattern ZeroGPU actually supports): upload an
image, optionally paint a mask, get the hands structurally fixed on a free
on-demand GPU. This is the reliable shape — the previous "custom FastAPI route"
build failed with "No @spaces.GPU function detected" because ZeroGPU only
detects GPU functions wired into a normal Gradio app.
PIPELINE: MeshGraphormer hand-mesh -> depth map -> depth ControlNet ->
Stable Diffusion inpainting (HandRefiner). Fixes only the hand region.
--------------------------------------------------------------------------
DEPLOY (needs a HF PRO account to CREATE a ZeroGPU Space — $9/mo)
--------------------------------------------------------------------------
1. huggingface.co -> New Space -> SDK: Gradio -> Hardware: ZeroGPU
2. Upload: app.py, requirements.txt, README.md
3. Wait for build, then use the Space UI (or call it from the DARKROOM tool
via the gradio_client endpoint shown on the Space's "View API" page).
HONEST LIMITS:
* Creating a ZeroGPU Space requires PRO. Using one is free within a daily quota
(resets 24h after first use); each fix is a few GPU-seconds.
* GPU duration is capped (~120s max). We request 90s.
* Stock depth ControlNet is okay-not-perfect; swap CONTROLNET_ID to
hr16/ControlNet-HandRefiner-pruned for finetuned quality.
* MeshGraphormer can't fix unreadable hands or crossed fingers.
"""
import spaces # must precede torch for ZeroGPU
import torch
from PIL import Image, ImageFilter
import gradio as gr
# ---------------------------------------------------------------------------
# transformers compatibility shim (fixes MeshGraphormer import on new transformers)
# Newer transformers removed prune_linear_layer / Conv1D from transformers.modeling_utils,
# which is exactly what breaks the vendored MeshGraphormer (ComfyUI issue #578).
# Re-expose them so the legacy import succeeds.
# ---------------------------------------------------------------------------
def _patch_transformers():
try:
import transformers.modeling_utils as mu
need = ("prune_linear_layer", "Conv1D", "prune_layer")
if all(hasattr(mu, n) for n in need):
return
from transformers import pytorch_utils as pu
for n in need:
if not hasattr(mu, n) and hasattr(pu, n):
setattr(mu, n, getattr(pu, n))
print("[shim] transformers symbols patched", flush=True)
except Exception as e:
print("[shim] transformers patch skipped:", e, flush=True)
_patch_transformers()
SD_INPAINT_ID = "runwayml/stable-diffusion-inpainting"
CONTROLNET_ID = "lllyasviel/control_v11f1p_sd15_depth"
TILE_CN_ID = "lllyasviel/control_v11f1e_sd15_tile" # detail-regeneration ControlNet
SD_BASE_ID = "runwayml/stable-diffusion-v1-5" # base SD for img2img detail pass
MESHGRAPHORMER_ID = "hr16/ControlNet-HandRefiner-pruned"
MAX_SIDE = 768
DETAIL_MAX_SIDE = 1280 # detail pass can work larger since it's tiled-friendly
DEFAULT_PROMPT = "a detailed, anatomically correct hand with five fingers, natural proportions, same art style and lighting"
NEG = "extra fingers, fused fingers, missing fingers, deformed, mutated, blurry, low quality"
DETAIL_NEG = "blurry, soft, out of focus, jpeg artifacts, low quality, smudged, messy lines"
_PIPE = None
_MESH = None
_DETAIL = None
_MESH_OK = False
_MESH_ERR = None
def _make_mesh_detector():
"""controlnet_aux==0.0.6 ships MeshGraphormerDetector at the top level.
(Newer versions dropped it — that's why the pin matters.)"""
from controlnet_aux import MeshGraphormerDetector as MGD
return MGD.from_pretrained(MESHGRAPHORMER_ID)
def _load():
"""Load SD inpaint + ControlNet (always works, diffusers-only) and attempt
MeshGraphormer (optional). If MeshGraphormer fails, the Space still runs;
hand auto-detect is then unavailable but manual-mask + detail pass work."""
global _PIPE, _MESH, _MESH_OK, _MESH_ERR
if _PIPE is not None:
return
import time
from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel, UniPCMultistepScheduler
t0 = time.time()
print("[load] starting model load on CPU…", flush=True)
# MeshGraphormer is optional — isolate it so it can't crash the container
try:
_MESH = _make_mesh_detector()
_MESH_OK = True
print(f"[load] meshgraphormer ok ({time.time()-t0:.0f}s)", flush=True)
except Exception as e:
_MESH = None; _MESH_OK = False; _MESH_ERR = str(e)
print("[load] meshgraphormer UNAVAILABLE (manual mask still works):", e, flush=True)
cn = ControlNetModel.from_pretrained(CONTROLNET_ID, torch_dtype=torch.float16)
pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
SD_INPAINT_ID, controlnet=cn, torch_dtype=torch.float16, safety_checker=None
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
try: pipe.enable_attention_slicing()
except Exception as e: print("[load] attn-slicing skip:", e, flush=True)
try: pipe.enable_vae_tiling()
except Exception as e: print("[load] vae-tiling skip:", e, flush=True)
_PIPE = pipe
print(f"[load] pipeline ready on CPU ({time.time()-t0:.0f}s total)", flush=True)
# preload at import — runs once when the container boots, OUTSIDE any GPU-timed window
try:
_load()
except Exception as _e:
print("[load] preload deferred:", _e, flush=True)
def _load_detail():
"""Tile-ControlNet img2img pipeline for detail/lineart recovery. Loaded lazily on CPU."""
global _DETAIL
if _DETAIL is not None:
return
import time
from diffusers import StableDiffusionControlNetImg2ImgPipeline, ControlNetModel, UniPCMultistepScheduler
t0 = time.time()
print("[load] detail pipeline (tile CN) on CPU…", flush=True)
tile = ControlNetModel.from_pretrained(TILE_CN_ID, torch_dtype=torch.float16)
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
SD_BASE_ID, controlnet=tile, torch_dtype=torch.float16, safety_checker=None
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
try: pipe.enable_attention_slicing()
except Exception as e: print("[load] attn-slicing skip:", e, flush=True)
try: pipe.enable_vae_tiling()
except Exception as e: print("[load] vae-tiling skip:", e, flush=True)
_DETAIL = pipe
print(f"[load] detail pipeline ready ({time.time()-t0:.0f}s)", flush=True)
def _fit_to(img, max_side):
w, h = img.size
s = min(1.0, max_side / max(w, h))
return img.resize((max(8, int(round(w*s/8))*8), max(8, int(round(h*s/8))*8)), Image.LANCZOS), (w, h)
def _fit(img):
w, h = img.size
s = min(1.0, MAX_SIDE / max(w, h))
return img.resize((max(8, int(round(w*s/8))*8), max(8, int(round(h*s/8))*8)), Image.LANCZOS), (w, h)
@spaces.GPU(duration=120)
def fix_hands(image, mask_layers, prompt, strength):
"""ZeroGPU-allocated worker. Models are already loaded (CPU) at import;
here we move them onto the GPU that ZeroGPU just attached, then infer."""
import time, traceback
if image is None:
raise gr.Error("Upload an image first.")
try:
t0 = time.time()
_load() # no-op if already loaded
_MESH.to("cuda")
_PIPE.to("cuda")
if _MESH_OK and _MESH is not None:
try: _MESH.to("cuda")
except Exception: pass
print(f"[fix] models on GPU, t={time.time()-t0:.0f}s (mesh={_MESH_OK})", flush=True)
init, (ow, oh) = _fit(image.convert("RGB"))
W, H = init.size
print(f"[fix] input fitted to {W}x{H}", flush=True)
# optional hand-drawn mask from the ImageMask component
sent_mask = None
if isinstance(mask_layers, dict):
layers = mask_layers.get("layers") or []
if layers:
m = layers[0].convert("L").resize((W, H), Image.LANCZOS)
if m.getbbox() is not None:
sent_mask = m
depth_img = None
auto_mask = None
if _MESH_OK and _MESH is not None:
print("[fix] running MeshGraphormer…", flush=True)
try:
mg = _MESH(init)
depth_img, auto_mask = (mg[0], (mg[1] if len(mg) > 1 else None)) if isinstance(mg, tuple) else (mg, None)
if depth_img is not None:
depth_img = depth_img.convert("RGB").resize((W, H), Image.LANCZOS)
except Exception as e:
print("[fix] mesh inference failed, falling back to mask:", e, flush=True)
mask_img = sent_mask or (auto_mask.convert("L").resize((W, H), Image.LANCZOS) if auto_mask else None)
if mask_img is None:
if not _MESH_OK:
raise gr.Error("Auto hand-detection isn't available on this Space build. "
"Paint a mask over the bad hand (use the brush on the image) and run again.")
raise gr.Error("No hands detected. Paint a mask over the hand and try again.")
# if we have no depth (no mesh), use the masked region of the image as a soft control
if depth_img is None:
depth_img = init # tile/identity-style guidance keeps structure from the source
mask_img = mask_img.filter(ImageFilter.GaussianBlur(2))
print("[fix] running diffusion…", flush=True)
out = _PIPE(
prompt=prompt or DEFAULT_PROMPT, negative_prompt=NEG, image=init, mask_image=mask_img,
control_image=depth_img, num_inference_steps=25, strength=float(strength),
guidance_scale=7.5, controlnet_conditioning_scale=0.7,
).images[0]
print(f"[fix] done, total {time.time()-t0:.0f}s", flush=True)
return out.resize((ow, oh), Image.LANCZOS)
except Exception as e:
print("[fix] ERROR:\n" + traceback.format_exc(), flush=True)
raise gr.Error(f"Fix failed: {e}")
@spaces.GPU(duration=120)
def detail_pass(image, strength, scale):
"""Detail/lineart recovery via Tile-ControlNet img2img at low denoise.
Regenerates real detail and clean lines while preserving composition + style.
No prompt is used (per ControlNet-tile guidance) so it can't redraw the subject."""
import time, traceback
if image is None:
raise gr.Error("Upload an image first.")
try:
t0 = time.time()
_load_detail()
_DETAIL.to("cuda")
src = image["background"] if isinstance(image, dict) else image
src = src.convert("RGB")
# optionally enlarge first (Lanczos) — the model then fills in real detail at the higher res
scale = float(scale)
if scale > 1.01:
src = src.resize((int(src.width*scale), int(src.height*scale)), Image.LANCZOS)
work, (ow, oh) = _fit_to(src, DETAIL_MAX_SIDE)
print(f"[detail] working at {work.size}, denoise={strength}", flush=True)
# tile controlnet uses the image itself as the control signal
out = _DETAIL(
prompt="", negative_prompt=DETAIL_NEG,
image=work, control_image=work,
num_inference_steps=30, strength=float(strength),
guidance_scale=6.0, controlnet_conditioning_scale=1.0,
).images[0]
if out.size != (ow, oh):
out = out.resize((ow, oh), Image.LANCZOS)
print(f"[detail] done, total {time.time()-t0:.0f}s", flush=True)
return out
except Exception as e:
print("[detail] ERROR:\n" + traceback.format_exc(), flush=True)
raise gr.Error(f"Detail pass failed: {e}")
with gr.Blocks(title="DARKROOM", theme=gr.themes.Base()) as demo:
gr.Markdown("## 🎨 DARKROOM\nAI-art repair on GPU. **Fix hands** regenerates malformed hands "
"with correct geometry. **Add detail** uses Tile-ControlNet img2img to recover real "
"sharpness and clean lineart while keeping your original style.")
with gr.Tab("Fix hands"):
with gr.Row():
with gr.Column():
inp = gr.ImageMask(type="pil", label="Image (optionally paint over the bad hand)")
prompt = gr.Textbox(value=DEFAULT_PROMPT, label="Prompt", lines=2)
strength = gr.Slider(0.3, 1.0, value=0.75, step=0.05, label="Fix strength (denoise)")
btn = gr.Button("Fix hands", variant="primary")
with gr.Column():
out = gr.Image(type="pil", label="Result")
btn.click(fix_hands, inputs=[inp, inp, prompt, strength], outputs=out, api_name="fix_hands")
with gr.Tab("Add detail"):
with gr.Row():
with gr.Column():
dinp = gr.Image(type="pil", label="Image to sharpen / add detail")
dstrength = gr.Slider(0.15, 0.6, value=0.3, step=0.05,
label="Detail strength (low = safe & on-style, high = more new detail / more drift)")
dscale = gr.Slider(1.0, 2.0, value=1.0, step=0.5, label="Enlarge first (×)")
dbtn = gr.Button("Add detail", variant="primary")
with gr.Column():
dout = gr.Image(type="pil", label="Result")
dbtn.click(detail_pass, inputs=[dinp, dstrength, dscale], outputs=dout, api_name="detail_pass")
if __name__ == "__main__":
demo.queue().launch()
|