qr-art-generator / handler.py
Qrverse's picture
v12.2: Set overlay_opacity=0.65 for production (67-100% scan rate)
9be7c1b verified
"""
QR-Verse AI Art Generator — HuggingFace Inference Endpoint Handler v12.2
Art + QR overlay pipeline: ControlNet art generation + post-processing QR composite.
v12 KEY CHANGES from v11:
- Monster weight increased to 1.30 (from 0.85) — art has QR-compatible patterns
- Post-processing QR overlay at 55% opacity with blur=1 and 40px feather
- ControlNet provides QR-guided ART, overlay ensures SCANNABILITY
- Combined approach: 60-80% scan rate (vs gold standard's 36%)
- Art quality preserved: scene dominates, QR blends naturally
- Overlay QR perfectly aligned with ControlNet QR (same source)
Architecture:
1. ControlNet txt2img at M=1.30: generates art with QR-compatible contrast patterns
2. Post-process: alpha-composite clean QR overlay (blurred, feathered edges)
3. Result: art visible through QR, scannable, natural transition at borders
Models:
- Checkpoint: SG161222/Realistic_Vision_V5.1_noVAE (SD 1.5)
- ControlNet 1: monster-labs/control_v1p_sd15_qrcode_monster (v2)
- ControlNet 2: ioclab/control_v1p_sd15_brightness
"""
import base64
import io
import logging
import time
from typing import Any
import numpy as np
import qrcode
import torch
from diffusers import (
ControlNetModel,
StableDiffusionControlNetPipeline,
StableDiffusionControlNetImg2ImgPipeline,
DPMSolverMultistepScheduler,
MultiControlNetModel,
)
from PIL import Image, ImageFilter
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Pass 1 defaults — ART with QR-compatible patterns
# ---------------------------------------------------------------------------
P1_MONSTER_WEIGHT = 1.30
P1_BRIGHTNESS_WEIGHT = 0.15
P1_MONSTER_START = 0.05
P1_MONSTER_END = 0.85
BRIGHTNESS_START = 0.10
BRIGHTNESS_END = 0.80
# ---------------------------------------------------------------------------
# Pass 2 defaults — optional QR reinforcement (passes=2)
# ---------------------------------------------------------------------------
P2_MONSTER_WEIGHT = 1.60
P2_BRIGHTNESS_WEIGHT = 0.20
P2_MONSTER_START = 0.05
P2_MONSTER_END = 0.85
P2_CFG = 8.0
P2_STEPS = 20
P2_STRENGTH = 0.15
# ---------------------------------------------------------------------------
# QR overlay post-processing
# ---------------------------------------------------------------------------
OVERLAY_OPACITY = 0.65 # Alpha for QR modules (0=invisible, 1=solid black)
OVERLAY_BG_RATIO = 0.6 # Background alpha = opacity * ratio (lighter than modules)
OVERLAY_BLUR_SIGMA = 1.0 # Gaussian blur on overlay for softer edges
OVERLAY_FEATHER_PX = 16 # Fade-out at overlay borders (1 QR module width)
# ---------------------------------------------------------------------------
# Quality tags — NO QR tags (QR structure from ControlNet only)
# ---------------------------------------------------------------------------
QUALITY_TAGS = (
"highly detailed, 4k, high resolution, sharp focus, "
"masterpiece, best quality, ultra detailed, 8k, professional, award-winning"
)
DEFAULT_NEGATIVE = (
"blurry, low quality, nsfw, watermark, text, deformed, ugly, amateur, "
"oversaturated, grainy, bad anatomy, bad hands, multiple views"
)
# ---------------------------------------------------------------------------
# QR generation
# ---------------------------------------------------------------------------
QR_BOX_SIZE = 16
QR_BORDER = 1
QR_TARGET_SIZE = 512
QR_CANVAS_SIZE = 768
QR_BLUR_SIGMA = 0.5
# ---------------------------------------------------------------------------
# Category params
# ---------------------------------------------------------------------------
CATEGORY_PARAMS = {
"food": {"cfg": 7.5, "steps": 40},
"luxury": {"cfg": 7.5, "steps": 40},
"wedding": {"cfg": 7.5, "steps": 40},
"sports": {"cfg": 7.5, "steps": 40},
"restaurant": {"cfg": 7.5, "steps": 40},
"retail": {"cfg": 7.5, "steps": 40},
"professional": {"cfg": 7.5, "steps": 40},
"real_estate": {"cfg": 7.5, "steps": 40},
"architecture": {"cfg": 7.5, "steps": 40},
"nature": {"cfg": 7.5, "steps": 40},
"world_wonders":{"cfg": 7.5, "steps": 40},
"medieval": {"cfg": 7.5, "steps": 40},
"social": {"cfg": 7.5, "steps": 40},
"tech": {"cfg": 7.5, "steps": 40},
"seasonal": {"cfg": 7.5, "steps": 40},
"default": {"cfg": 7.5, "steps": 40},
}
class EndpointHandler:
"""Custom handler for HuggingFace Inference Endpoints — v12 Art+Overlay."""
def __init__(self, path: str = ""):
"""Load models on endpoint startup."""
logger.info("Loading QR Art Generator pipeline v12.2 (Art+Overlay)...")
start = time.time()
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32
logger.info("Loading QR Monster ControlNet v2...")
monster_cn = ControlNetModel.from_pretrained(
"monster-labs/control_v1p_sd15_qrcode_monster",
subfolder="v2",
torch_dtype=dtype,
)
logger.info("Loading IoC Lab Brightness ControlNet...")
brightness_cn = ControlNetModel.from_pretrained(
"ioclab/control_v1p_sd15_brightness",
torch_dtype=dtype,
)
multi_controlnet = MultiControlNetModel([monster_cn, brightness_cn])
logger.info("Loading txt2img pipeline...")
self.pipe_txt2img = StableDiffusionControlNetPipeline.from_pretrained(
"SG161222/Realistic_Vision_V5.1_noVAE",
controlnet=multi_controlnet,
torch_dtype=dtype,
safety_checker=None,
requires_safety_checker=False,
)
self.pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config(
self.pipe_txt2img.scheduler.config,
use_karras_sigmas=True,
algorithm_type="sde-dpmsolver++",
)
self.pipe_txt2img.to(device)
logger.info("Creating img2img pipeline (shared components)...")
self.pipe_img2img = StableDiffusionControlNetImg2ImgPipeline(
vae=self.pipe_txt2img.vae,
text_encoder=self.pipe_txt2img.text_encoder,
tokenizer=self.pipe_txt2img.tokenizer,
unet=self.pipe_txt2img.unet,
controlnet=multi_controlnet,
scheduler=self.pipe_txt2img.scheduler,
safety_checker=None,
feature_extractor=None,
requires_safety_checker=False,
)
if device == "cuda":
try:
self.pipe_txt2img.enable_xformers_memory_efficient_attention()
logger.info("xformers memory-efficient attention enabled")
except Exception:
logger.warning("xformers not available, using default attention")
self.device = device
self.dtype = dtype
elapsed = time.time() - start
logger.info(f"Pipeline v12.2 loaded in {elapsed:.1f}s on {device}")
def _generate_qr_images(self, data: str):
"""
Generate both ControlNet conditioning and overlay QR images.
Returns:
conditioning: Gray-bg QR with pre-blur (for ControlNet)
overlay: RGBA overlay with opacity/blur/feather (for post-processing)
"""
qr = qrcode.QRCode(
error_correction=qrcode.constants.ERROR_CORRECT_H,
box_size=QR_BOX_SIZE,
border=QR_BORDER,
)
qr.add_data(data)
qr.make(fit=True)
# ControlNet conditioning: black on gray
qr_gray = qr.make_image(
fill_color="black", back_color="#808080"
).convert("RGB")
# Overlay source: black on white
qr_bw = qr.make_image(
fill_color="black", back_color="white"
).convert("L")
qr_w, qr_h = qr_gray.size
# Always resize to exact target size for consistent alignment
if qr_w != QR_TARGET_SIZE or qr_h != QR_TARGET_SIZE:
qr_gray = qr_gray.resize(
(QR_TARGET_SIZE, QR_TARGET_SIZE), Image.NEAREST
)
qr_bw = qr_bw.resize(
(QR_TARGET_SIZE, QR_TARGET_SIZE), Image.NEAREST
)
logger.info(f"QR resized from {qr_w}x{qr_h} to {QR_TARGET_SIZE}x{QR_TARGET_SIZE}")
# Conditioning: center on gray canvas + pre-blur
# Both conditioning and overlay MUST use the same offset for alignment
conditioning = Image.new("RGB", (QR_CANVAS_SIZE, QR_CANVAS_SIZE), (128, 128, 128))
offset = (QR_CANVAS_SIZE - QR_TARGET_SIZE) // 2
conditioning.paste(qr_gray, (offset, offset))
conditioning = conditioning.filter(ImageFilter.GaussianBlur(radius=QR_BLUR_SIGMA))
logger.info(
f"QR: version={qr.version}, modules={qr.modules_count}, "
f"raw={qr_w}x{qr_h}, target={QR_TARGET_SIZE}, canvas={QR_CANVAS_SIZE}"
)
return conditioning, qr_bw
def _create_overlay(
self, qr_bw: Image.Image, opacity: float,
blur_sigma: float, feather_px: int,
) -> Image.Image:
"""
Create RGBA overlay for post-processing QR composite.
Dark QR modules → black at specified opacity
Light background → white at reduced opacity (opacity * BG_RATIO)
Applied: Gaussian blur + feathered edges at border
Centered on full canvas with padding.
"""
qr_size = qr_bw.size[0]
qr_array = np.array(qr_bw)
# Build RGBA overlay at QR size
overlay = np.zeros((qr_size, qr_size, 4), dtype=np.uint8)
dark_mask = qr_array < 128
# Dark modules: black at full opacity
overlay[dark_mask, 3] = int(255 * opacity)
# Light background: white at reduced opacity
overlay[~dark_mask, 0] = 255
overlay[~dark_mask, 1] = 255
overlay[~dark_mask, 2] = 255
overlay[~dark_mask, 3] = int(255 * opacity * OVERLAY_BG_RATIO)
overlay_img = Image.fromarray(overlay, "RGBA")
# Gaussian blur for softer module edges
if blur_sigma > 0:
overlay_img = overlay_img.filter(
ImageFilter.GaussianBlur(radius=blur_sigma)
)
# Feathered edges: fade out alpha near border
if feather_px > 0:
ov_arr = np.array(overlay_img)
h, w = ov_arr.shape[:2]
# Create distance-from-edge array
y_dist = np.minimum(
np.arange(h)[:, None],
np.arange(h - 1, -1, -1)[:, None],
)
x_dist = np.minimum(
np.arange(w)[None, :],
np.arange(w - 1, -1, -1)[None, :],
)
edge_dist = np.minimum(y_dist, x_dist).astype(np.float32)
fade = np.clip(edge_dist / feather_px, 0, 1)
ov_arr[:, :, 3] = (ov_arr[:, :, 3].astype(np.float32) * fade).astype(np.uint8)
overlay_img = Image.fromarray(ov_arr, "RGBA")
# Center overlay on full canvas — MUST match conditioning offset
canvas = Image.new("RGBA", (QR_CANVAS_SIZE, QR_CANVAS_SIZE), (0, 0, 0, 0))
offset = (QR_CANVAS_SIZE - QR_TARGET_SIZE) // 2
canvas.paste(overlay_img, (offset, offset))
return canvas
def _prepare_qr_from_image(self, qr_image: Image.Image):
"""
Prepare client-provided QR image.
Returns:
conditioning: Gray-bg QR for ControlNet
overlay: RGBA overlay for post-processing (derived from client QR)
"""
# Convert white background to gray (Monster v2 trained on gray)
qr_array = np.array(qr_image.convert("RGB"))
white_mask = np.all(qr_array > 200, axis=2)
if np.sum(white_mask) > 0:
logger.info("Converting white QR background to gray (#808080)")
qr_array[white_mask] = [128, 128, 128]
qr_gray = Image.fromarray(qr_array)
# Create B/W version for overlay
qr_bw = qr_image.convert("L")
# Resize to target
w, h = qr_gray.size
if w != QR_TARGET_SIZE or h != QR_TARGET_SIZE:
qr_gray = qr_gray.resize((QR_TARGET_SIZE, QR_TARGET_SIZE), Image.NEAREST)
qr_bw = qr_bw.resize((QR_TARGET_SIZE, QR_TARGET_SIZE), Image.NEAREST)
# Conditioning canvas
conditioning = Image.new("RGB", (QR_CANVAS_SIZE, QR_CANVAS_SIZE), (128, 128, 128))
offset = (QR_CANVAS_SIZE - QR_TARGET_SIZE) // 2
conditioning.paste(qr_gray, (offset, offset))
conditioning = conditioning.filter(ImageFilter.GaussianBlur(radius=QR_BLUR_SIGMA))
return conditioning, qr_bw
def __call__(self, data: dict[str, Any]) -> dict[str, Any]:
"""
Generate QR art — art + overlay pipeline.
Mode 1 — Server-side QR (recommended, pixel-perfect):
{ "inputs": { "prompt": "...", "qr_data": "https://..." } }
Mode 2 — Client QR image (backward compatible):
{ "inputs": { "prompt": "...", "qr_code_image": "<base64 PNG>" } }
Optional params:
category, seed, width, height,
passes (1 or 2, default 1),
p1_monster, p1_brightness,
p2_monster, p2_brightness, p2_strength,
overlay_opacity (0-1, default 0.55, set 0 to disable overlay),
overlay_blur (sigma, default 1.0),
overlay_feather (px, default 40),
controlnet_scale (backward compat alias for p1_monster)
"""
start = time.time()
inputs = data.get("inputs", data)
prompt = inputs.get("prompt", "")
negative_prompt = inputs.get("negative_prompt", DEFAULT_NEGATIVE)
if not prompt:
return {"error": "prompt is required"}
# --- QR conditioning + overlay ---
qr_data = inputs.get("qr_data", "")
qr_b64 = inputs.get("qr_code_image", "")
if qr_data:
qr_conditioning, qr_bw = self._generate_qr_images(qr_data)
logger.info(f"Server-side QR for: {qr_data}")
elif qr_b64:
try:
qr_image = Image.open(
io.BytesIO(base64.b64decode(qr_b64))
).convert("RGB")
except Exception as e:
return {"error": f"Failed to decode qr_code_image: {e}"}
qr_conditioning, qr_bw = self._prepare_qr_from_image(qr_image)
logger.info("Client-provided QR image")
else:
return {"error": "qr_data (string) or qr_code_image (base64) required"}
# --- Parameters ---
category = inputs.get("category", "default")
params = CATEGORY_PARAMS.get(category, CATEGORY_PARAMS["default"])
passes = inputs.get("passes", 1)
width = inputs.get("width", QR_CANVAS_SIZE)
height = inputs.get("height", QR_CANVAS_SIZE)
# Pass 1 weights
p1_monster = inputs.get(
"p1_monster",
inputs.get("controlnet_scale", P1_MONSTER_WEIGHT)
)
p1_brightness = inputs.get("p1_brightness", P1_BRIGHTNESS_WEIGHT)
# Pass 2 weights
p2_monster = inputs.get("p2_monster", P2_MONSTER_WEIGHT)
p2_brightness = inputs.get("p2_brightness", P2_BRIGHTNESS_WEIGHT)
p2_strength = inputs.get("p2_strength", P2_STRENGTH)
# Overlay params
overlay_opacity = inputs.get("overlay_opacity", OVERLAY_OPACITY)
overlay_blur = inputs.get("overlay_blur", OVERLAY_BLUR_SIGMA)
overlay_feather = inputs.get("overlay_feather", OVERLAY_FEATHER_PX)
enhanced_prompt = f"{prompt}, {QUALITY_TAGS}"
seed = inputs.get("seed", -1)
if seed == -1:
seed = torch.Generator(device=self.device).seed()
generator = torch.Generator(device=self.device).manual_seed(seed)
# === PASS 1: ART (txt2img) ===
logger.info(
f"Pass 1 (ART): monster={p1_monster}, brightness={p1_brightness}, "
f"cfg={params['cfg']}, steps={params['steps']}"
)
result1 = self.pipe_txt2img(
prompt=enhanced_prompt,
negative_prompt=negative_prompt,
image=[qr_conditioning, qr_conditioning],
width=width,
height=height,
guidance_scale=params["cfg"],
controlnet_conditioning_scale=[p1_monster, p1_brightness],
control_guidance_start=[P1_MONSTER_START, BRIGHTNESS_START],
control_guidance_end=[P1_MONSTER_END, BRIGHTNESS_END],
num_inference_steps=params["steps"],
generator=generator,
)
art_p1 = result1.images[0]
p1_time = time.time() - start
if passes >= 2:
# === PASS 2: QR REINFORCEMENT (img2img) ===
p2_start = time.time()
generator2 = torch.Generator(device=self.device).manual_seed(seed + 1)
logger.info(
f"Pass 2 (QR): monster={p2_monster}, brightness={p2_brightness}, "
f"strength={p2_strength}, cfg={P2_CFG}, steps={P2_STEPS}"
)
result2 = self.pipe_img2img(
prompt=enhanced_prompt,
negative_prompt=negative_prompt,
image=art_p1,
control_image=[qr_conditioning, qr_conditioning],
controlnet_conditioning_scale=[p2_monster, p2_brightness],
control_guidance_start=[P2_MONSTER_START, BRIGHTNESS_START],
control_guidance_end=[P2_MONSTER_END, BRIGHTNESS_END],
strength=p2_strength,
guidance_scale=P2_CFG,
num_inference_steps=P2_STEPS,
generator=generator2,
)
art_final = result2.images[0]
p2_time = time.time() - p2_start
else:
art_final = art_p1
p2_time = 0
# === POST-PROCESSING: QR OVERLAY ===
overlay_applied = False
if overlay_opacity > 0:
overlay_start = time.time()
overlay_img = self._create_overlay(
qr_bw, overlay_opacity, overlay_blur, int(overlay_feather)
)
art_rgba = art_final.convert("RGBA")
art_final = Image.alpha_composite(art_rgba, overlay_img).convert("RGB")
overlay_applied = True
overlay_time = time.time() - overlay_start
logger.info(
f"Overlay: opacity={overlay_opacity}, blur={overlay_blur}, "
f"feather={overlay_feather}px, time={overlay_time:.2f}s"
)
else:
overlay_time = 0
# Encode result
buf = io.BytesIO()
art_final.save(buf, format="PNG")
result_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
elapsed = time.time() - start
return {
"image": result_b64,
"seed": seed,
"parameters": {
"pipeline": f"{'two' if passes >= 2 else 'single'}-pass-v12.2-overlay",
"passes": passes,
"category": category,
"p1_monster": p1_monster,
"p1_brightness": p1_brightness,
"p2_monster": p2_monster if passes >= 2 else None,
"p2_brightness": p2_brightness if passes >= 2 else None,
"p2_strength": p2_strength if passes >= 2 else None,
"overlay_opacity": overlay_opacity if overlay_applied else 0,
"overlay_blur": overlay_blur if overlay_applied else None,
"overlay_feather": overlay_feather if overlay_applied else None,
"p1_time": round(p1_time, 2),
"p2_time": round(p2_time, 2) if passes >= 2 else None,
"overlay_time": round(overlay_time, 3) if overlay_applied else None,
"guidance_scale": params["cfg"],
"steps": params["steps"],
"scheduler": "DPM++ 2M SDE Karras",
"width": width,
"height": height,
},
"time_seconds": round(elapsed, 2),
}