Qrverse
/

qr-art-generator

Model card Files Files and versions

xet

Community

Qrverse commited on 19 days ago

Commit

95cf55f

verified ·

1 Parent(s): 75920a8

Initial handler: SD 1.5 + QR Monster v2, adaptive 2/3 pass pipeline

Browse files

Files changed (2) hide show

handler.py +295 -0
requirements.txt +8 -0

handler.py ADDED Viewed

	@@ -0,0 +1,295 @@

+"""
+QR-Verse AI Art Generator — HuggingFace Inference Endpoint Handler
+Adaptive multi-pass pipeline:
+  Pass 1 (ART): txt2img + ControlNet at category-specific cn_weight → creative art
+  Pass 2 (QR FORCE): img2img + ControlNet at higher scale → embed QR pattern
+  Pass 3 (RESCUE, optional): img2img + ControlNet at max scale → force scannable QR
+Models:
+  - Checkpoint: SG161222/Realistic_Vision_V5.1_noVAE (SD 1.5)
+  - ControlNet: monster-labs/control_v1p_sd15_qrcode_monster (v2)
+Key differentiator vs Replicate:
+  - control_guidance_start/end support (0.05 / 0.85)
+  - Category-aware cn_weight (1.38 geometric vs 1.80 texture)
+  - Adaptive pass count based on category difficulty
+"""
+import base64
+import io
+import logging
+import time
+from typing import Any
+import torch
+from diffusers import (
+    ControlNetModel,
+    StableDiffusionControlNetPipeline,
+    StableDiffusionControlNetImg2ImgPipeline,
+    UniPCMultistepScheduler,
+)
+from PIL import Image
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Category parameter presets (extracted from 71K ChromaDB generation learnings)
+# ---------------------------------------------------------------------------
+# Two cn_weight clusters:
+#   1.80 → high-texture categories (food, luxury, wedding, sports)
+#   1.38 → geometric/structural categories (architecture, nature, tech)
+# Categories with <35% accept rate get 3 passes instead of 2.
+CATEGORY_PARAMS = {
+    # High-texture cluster (cn_weight=1.80, 2 passes)
+    "food":       {"cn_weight": 1.80, "cfg": 7.5, "steps": 50, "passes": 2},
+    "luxury":     {"cn_weight": 1.80, "cfg": 7.5, "steps": 50, "passes": 2},
+    "wedding":    {"cn_weight": 1.80, "cfg": 7.5, "steps": 50, "passes": 2},
+    "sports":     {"cn_weight": 1.80, "cfg": 7.5, "steps": 50, "passes": 2},
+    "restaurant": {"cn_weight": 1.80, "cfg": 7.5, "steps": 50, "passes": 2},
+    "retail":     {"cn_weight": 1.80, "cfg": 7.5, "steps": 50, "passes": 2},
+    # Geometric cluster (cn_weight=1.38, 2-3 passes)
+    "architecture": {"cn_weight": 1.38, "cfg": 7.5, "steps": 40, "passes": 3},
+    "nature":       {"cn_weight": 1.38, "cfg": 7.5, "steps": 40, "passes": 2},
+    "social":       {"cn_weight": 1.38, "cfg": 7.5, "steps": 40, "passes": 3},
+    "seasonal":     {"cn_weight": 1.59, "cfg": 7.5, "steps": 40, "passes": 3},
+    "tech":         {"cn_weight": 1.38, "cfg": 7.5, "steps": 40, "passes": 2},
+    "world_wonders": {"cn_weight": 1.38, "cfg": 7.5, "steps": 40, "passes": 2},
+    "medieval":     {"cn_weight": 1.38, "cfg": 7.5, "steps": 40, "passes": 2},
+    "professional": {"cn_weight": 1.80, "cfg": 7.5, "steps": 50, "passes": 2},
+    "real_estate":  {"cn_weight": 1.80, "cfg": 7.5, "steps": 50, "passes": 2},
+    # Default fallback
+    "default":    {"cn_weight": 1.50, "cfg": 7.5, "steps": 40, "passes": 2},
+}
+class EndpointHandler:
+    """Custom handler for HuggingFace Inference Endpoints."""
+    def __init__(self, path: str = ""):
+        """Load models on endpoint startup."""
+        logger.info("Loading QR Art Generator pipeline...")
+        start = time.time()
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        dtype = torch.float16 if device == "cuda" else torch.float32
+        # Load QR Monster ControlNet v2
+        self.controlnet = ControlNetModel.from_pretrained(
+            "monster-labs/control_v1p_sd15_qrcode_monster",
+            subfolder="v2",
+            torch_dtype=dtype,
+        )
+        # Load SD 1.5 txt2img + ControlNet pipeline (Pass 1)
+        self.pipe_txt2img = StableDiffusionControlNetPipeline.from_pretrained(
+            "SG161222/Realistic_Vision_V5.1_noVAE",
+            controlnet=self.controlnet,
+            torch_dtype=dtype,
+            safety_checker=None,
+            requires_safety_checker=False,
+        )
+        self.pipe_txt2img.scheduler = UniPCMultistepScheduler.from_config(
+            self.pipe_txt2img.scheduler.config
+        )
+        # Load img2img + ControlNet pipeline (Pass 2/3)
+        self.pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
+            "SG161222/Realistic_Vision_V5.1_noVAE",
+            controlnet=self.controlnet,
+            torch_dtype=dtype,
+            safety_checker=None,
+            requires_safety_checker=False,
+        )
+        self.pipe_img2img.scheduler = UniPCMultistepScheduler.from_config(
+            self.pipe_img2img.scheduler.config
+        )
+        # Move to device + optimize
+        self.pipe_txt2img.to(device)
+        self.pipe_img2img.to(device)
+        if device == "cuda":
+            try:
+                self.pipe_txt2img.enable_xformers_memory_efficient_attention()
+                self.pipe_img2img.enable_xformers_memory_efficient_attention()
+                logger.info("xformers memory-efficient attention enabled")
+            except Exception:
+                logger.warning("xformers not available, using default attention")
+        self.device = device
+        self.dtype = dtype
+        elapsed = time.time() - start
+        logger.info(f"Pipeline loaded in {elapsed:.1f}s on {device}")
+    def __call__(self, data: dict[str, Any]) -> dict[str, Any]:
+        """
+        Generate QR art from input parameters.
+        Input JSON:
+        {
+            "inputs": {
+                "prompt": str,                   # Required
+                "negative_prompt": str,           # Optional
+                "qr_code_image": str,             # Required — base64 PNG of QR code
+                "category": str,                  # Optional — maps to CATEGORY_PARAMS
+                "seed": int,                      # Optional — -1 for random
+                "width": int,                     # Optional — default 768
+                "height": int,                    # Optional — default 768
+                "num_passes": int,                # Optional — override auto pass count
+                "controlnet_scale": float,        # Optional — override category cn_weight
+                "guidance_scale": float,           # Optional — override category cfg
+                "num_inference_steps": int,        # Optional — override category steps
+                "control_guidance_start": float,  # Optional — default 0.05
+                "control_guidance_end": float,    # Optional — default 0.85
+            }
+        }
+        Output JSON:
+        {
+            "image": str,          # base64 PNG
+            "passes_run": int,
+            "parameters": dict,    # actual parameters used
+            "time_seconds": float,
+        }
+        """
+        start = time.time()
+        inputs = data.get("inputs", data)
+        prompt = inputs.get("prompt", "")
+        negative_prompt = inputs.get(
+            "negative_prompt",
+            "ugly, disfigured, low quality, blurry, nsfw, text, watermark",
+        )
+        qr_b64 = inputs.get("qr_code_image", "")
+        if not prompt:
+            return {"error": "prompt is required"}
+        if not qr_b64:
+            return {"error": "qr_code_image (base64 PNG) is required"}
+        # Decode QR code image
+        try:
+            qr_image = Image.open(io.BytesIO(base64.b64decode(qr_b64))).convert("RGB")
+        except Exception as e:
+            return {"error": f"Failed to decode qr_code_image: {e}"}
+        # Resolve parameters
+        category = inputs.get("category", "default")
+        params = CATEGORY_PARAMS.get(category, CATEGORY_PARAMS["default"])
+        cn_weight = inputs.get("controlnet_scale", params["cn_weight"])
+        cfg = inputs.get("guidance_scale", params["cfg"])
+        steps = inputs.get("num_inference_steps", params["steps"])
+        num_passes = inputs.get("num_passes", params["passes"])
+        width = inputs.get("width", 768)
+        height = inputs.get("height", 768)
+        control_start = inputs.get("control_guidance_start", 0.05)
+        control_end = inputs.get("control_guidance_end", 0.85)
+        # Seed
+        seed = inputs.get("seed", -1)
+        if seed == -1:
+            generator = torch.Generator(device=self.device)
+            seed = generator.seed()
+        else:
+            generator = torch.Generator(device=self.device).manual_seed(seed)
+        # Resize QR code to target dimensions
+        qr_image = qr_image.resize((width, height), Image.LANCZOS)
+        # ---- Pass 1: txt2img + ControlNet (ART pass) ----
+        logger.info(
+            f"Pass 1/{ num_passes}: txt2img cn={cn_weight} cfg={cfg} steps={steps}"
+        )
+        result = self.pipe_txt2img(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            image=qr_image,
+            width=width,
+            height=height,
+            guidance_scale=cfg,
+            controlnet_conditioning_scale=cn_weight,
+            control_guidance_start=control_start,
+            control_guidance_end=control_end,
+            num_inference_steps=steps,
+            generator=generator,
+        )
+        art_image = result.images[0]
+        # ---- Pass 2: img2img + ControlNet (QR FORCE pass) ----
+        if num_passes >= 2:
+            p2_cn = cn_weight + 0.4
+            p2_cfg = 10.0
+            p2_strength = 0.35
+            p2_steps = 30
+            logger.info(
+                f"Pass 2/{num_passes}: img2img cn={p2_cn} cfg={p2_cfg} "
+                f"strength={p2_strength} steps={p2_steps}"
+            )
+            result = self.pipe_img2img(
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                image=art_image,
+                control_image=qr_image,
+                strength=p2_strength,
+                guidance_scale=p2_cfg,
+                controlnet_conditioning_scale=p2_cn,
+                control_guidance_start=control_start,
+                control_guidance_end=control_end,
+                num_inference_steps=p2_steps,
+                generator=generator,
+            )
+            art_image = result.images[0]
+        # ---- Pass 3: img2img + ControlNet (RESCUE pass) ----
+        if num_passes >= 3:
+            p3_cn = cn_weight + 0.8
+            p3_cfg = 13.0
+            p3_strength = 0.45
+            p3_steps = 25
+            logger.info(
+                f"Pass 3/{num_passes}: img2img cn={p3_cn} cfg={p3_cfg} "
+                f"strength={p3_strength} steps={p3_steps}"
+            )
+            result = self.pipe_img2img(
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                image=art_image,
+                control_image=qr_image,
+                strength=p3_strength,
+                guidance_scale=p3_cfg,
+                controlnet_conditioning_scale=p3_cn,
+                control_guidance_start=control_start,
+                control_guidance_end=control_end,
+                num_inference_steps=p3_steps,
+                generator=generator,
+            )
+            art_image = result.images[0]
+        # Encode result to base64 PNG
+        buf = io.BytesIO()
+        art_image.save(buf, format="PNG")
+        result_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+        elapsed = time.time() - start
+        return {
+            "image": result_b64,
+            "passes_run": num_passes,
+            "seed": seed,
+            "parameters": {
+                "category": category,
+                "controlnet_scale_p1": cn_weight,
+                "guidance_scale_p1": cfg,
+                "steps_p1": steps,
+                "control_guidance_start": control_start,
+                "control_guidance_end": control_end,
+                "width": width,
+                "height": height,
+            },
+            "time_seconds": round(elapsed, 2),
+        }

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+diffusers>=0.27.0
+transformers>=4.38.0
+accelerate>=0.27.0
+torch>=2.1.0
+xformers>=0.0.23
+safetensors
+Pillow
+controlnet-aux