Qrverse
/

qr-art-generator

Model card Files Files and versions

xet

Community

Qrverse commited on 16 days ago

Commit

4ccef71

verified ·

1 Parent(s): 7e18d22

v7: stacked ControlNet (1.60+1.20=2.80) matching ComfyUI gold standard

Browse files

Files changed (1) hide show

handler.py +87 -63

handler.py CHANGED Viewed

@@ -1,23 +1,29 @@
 """
-QR-Verse AI Art Generator — HuggingFace Inference Endpoint Handler v6
-Matched to proven ComfyUI v6 gold-standard pipeline:
   - DPM++ 2M SDE Karras sampler (Monster Labs recommended)
-  - ControlNet timing 0.05→0.85 (let composition form, then blend)
   - QR code 512px centered in 768px canvas with 128px gray padding
-  - Pre-blur QR with Gaussian sigma=0.5 for smoother ControlNet integration
-  - CN weight 1.25-1.50 (NOT 2.5-3.0 — that destroys art quality)
   - CFG 7.5, steps 40
   - Quality tags appended to prompt
 Models:
   - Checkpoint: SG161222/Realistic_Vision_V5.1_noVAE (SD 1.5)
   - ControlNet: monster-labs/control_v1p_sd15_qrcode_monster (v2)
-Key insight: The gold QR art images were generated at CN=1.25-1.50 with
-DPM++ 2M SDE Karras + the 0.05→0.85 guidance window. Higher CN (2.0+)
-destroys art quality without improving scannability when the QR code is
-properly sized (512 in 768) and pre-blurred.
 """
 import base64
@@ -32,44 +38,54 @@ from diffusers import (
     ControlNetModel,
     StableDiffusionControlNetPipeline,
     DPMSolverMultistepScheduler,
 )
 from PIL import Image, ImageFilter
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
-# Category parameter presets — matched to ComfyUI v6 config.py CATEGORY_SCALE
 # ---------------------------------------------------------------------------
-# These values produce gold-standard QR art at CN=1.25-1.50.
-# The 0.05→0.85 control_guidance window gives ControlNet influence during
-# the middle 80% of denoising — composition forms first, details blend last.
 CATEGORY_PARAMS = {
-    # Photorealistic scenes (RealisticVision V5.1)
-    "food":         {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    "luxury":       {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    "wedding":      {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    "sports":       {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    "restaurant":   {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    "retail":       {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    "professional": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    "real_estate":  {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    "architecture": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    "nature":       {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    "world_wonders": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    "medieval":     {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    # Social / general — slightly higher weight
-    "social":       {"cn_weight": 1.50, "cfg": 7.5, "steps": 40},
-    "tech":         {"cn_weight": 1.50, "cfg": 7.5, "steps": 40},
-    "seasonal":     {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
-    # Default fallback
-    "default":      {"cn_weight": 1.50, "cfg": 7.5, "steps": 40},
 }
 # Quality tags — appended to every prompt (from ComfyUI gold config)
 QUALITY_TAGS = (
     "highly detailed, 4k, high resolution, sharp focus, "
-    "masterpiece, best quality, ultra detailed, 8k, professional"
 )
 # QR structure tags — help model maintain scannable QR pattern
@@ -88,7 +104,7 @@ DEFAULT_NEGATIVE = (
 QR_CODE_SIZE = 512
 QR_CANVAS_SIZE = 768
 QR_PADDING = (QR_CANVAS_SIZE - QR_CODE_SIZE) // 2  # 128px
-QR_BLUR_SIGMA = 0.5  # Pre-blur for smoother ControlNet integration
 class EndpointHandler:
@@ -96,31 +112,33 @@ class EndpointHandler:
     def __init__(self, path: str = ""):
         """Load models on endpoint startup."""
-        logger.info("Loading QR Art Generator pipeline v6 (ComfyUI-matched)...")
         start = time.time()
         device = "cuda" if torch.cuda.is_available() else "cpu"
         dtype = torch.float16 if device == "cuda" else torch.float32
         # Load QR Monster ControlNet v2
-        self.controlnet = ControlNetModel.from_pretrained(
             "monster-labs/control_v1p_sd15_qrcode_monster",
             subfolder="v2",
             torch_dtype=dtype,
         )
-        # Load SD 1.5 txt2img + ControlNet pipeline
         self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
             "SG161222/Realistic_Vision_V5.1_noVAE",
-            controlnet=self.controlnet,
             torch_dtype=dtype,
             safety_checker=None,
             requires_safety_checker=False,
         )
-        # CRITICAL: Use DPM++ 2M SDE Karras (Monster Labs recommended)
-        # This is what the gold ComfyUI pipeline uses.
-        # UniPCMultistep produces different noise patterns.
         self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
             self.pipe.scheduler.config,
             use_karras_sigmas=True,
@@ -140,7 +158,7 @@ class EndpointHandler:
         self.device = device
         self.dtype = dtype
         elapsed = time.time() - start
-        logger.info(f"Pipeline v6 loaded in {elapsed:.1f}s on {device}")
     def _prepare_qr_conditioning(self, qr_image: Image.Image) -> Image.Image:
         """
@@ -183,14 +201,13 @@ class EndpointHandler:
             "inputs": {
                 "prompt": str,                   # Required
                 "negative_prompt": str,           # Optional
-                "qr_code_image": str,             # Required - base64 PNG of QR code
-                "category": str,                  # Optional - maps to CATEGORY_PARAMS
                 "seed": int,                      # Optional - -1 for random
                 "width": int,                     # Optional - default 768
                 "height": int,                    # Optional - default 768
-                "controlnet_scale": float,        # Optional - override cn_weight
-                "guidance_scale": float,           # Optional - override cfg
-                "num_inference_steps": int,        # Optional - override steps
             }
         }
         """
@@ -219,13 +236,16 @@ class EndpointHandler:
         category = inputs.get("category", "default")
         params = CATEGORY_PARAMS.get(category, CATEGORY_PARAMS["default"])
-        cn_weight = inputs.get("controlnet_scale", params["cn_weight"])
         cfg = inputs.get("guidance_scale", params["cfg"])
         steps = inputs.get("num_inference_steps", params["steps"])
         width = inputs.get("width", QR_CANVAS_SIZE)
         height = inputs.get("height", QR_CANVAS_SIZE)
-        # Enhance prompt with quality + QR tags (like gold ComfyUI pipeline)
         enhanced_prompt = f"{prompt}, {QUALITY_TAGS}, {QR_TAGS}"
         # Seed
@@ -235,22 +255,25 @@ class EndpointHandler:
         generator = torch.Generator(device=self.device).manual_seed(seed)
-        # ---- Single-pass txt2img + ControlNet ----
-        # Gold ComfyUI config: DPM++ 2M SDE Karras, cn 0.05→0.85
         logger.info(
-            f"Generating: cn={cn_weight} cfg={cfg} steps={steps} "
-            f"guidance=0.05→0.85 category={category}"
         )
         result = self.pipe(
             prompt=enhanced_prompt,
             negative_prompt=negative_prompt,
-            image=qr_conditioning,
             width=width,
             height=height,
             guidance_scale=cfg,
-            controlnet_conditioning_scale=cn_weight,
-            control_guidance_start=0.05,
-            control_guidance_end=0.85,
             num_inference_steps=steps,
             generator=generator,
         )
@@ -265,16 +288,17 @@ class EndpointHandler:
         return {
             "image": result_b64,
-            "passes_run": 1,
             "seed": seed,
             "parameters": {
-                "pipeline": "comfyui-matched-v6",
                 "category": category,
-                "controlnet_scale": cn_weight,
                 "guidance_scale": cfg,
                 "steps": steps,
-                "control_guidance_start": 0.05,
-                "control_guidance_end": 0.85,
                 "scheduler": "DPM++ 2M SDE Karras",
                 "qr_size": f"{QR_CODE_SIZE}-in-{QR_CANVAS_SIZE}",
                 "qr_blur_sigma": QR_BLUR_SIGMA,

 """
+QR-Verse AI Art Generator — HuggingFace Inference Endpoint Handler v7
+Matched to proven ComfyUI v6 gold-standard pipeline.
+CRITICAL DISCOVERY (v7):
+  ComfyUI's "masked ControlNet" workflow applies the SAME ControlNet TWICE
+  on the SAME QR image, stacked (chained ControlNetApplyAdvanced nodes):
+    - Unit 1: weight=1.60, timing 0.00→0.90 (marker emphasis)
+    - Unit 2: weight=1.20, timing 0.05→0.85 (data reinforcement)
+  Effective CN weight in overlapping range = 1.60 + 1.20 = 2.80!
+  v6 used CN=1.45 (single pass) → QR barely visible, unscannable.
+  v7 uses MultiControlNetModel to replicate the stacked behavior → CN≈2.80.
+Pipeline:
   - DPM++ 2M SDE Karras sampler (Monster Labs recommended)
+  - Stacked ControlNet: same model twice, different weights/timing
   - QR code 512px centered in 768px canvas with 128px gray padding
+  - Pre-blur QR with Gaussian sigma=0.5
   - CFG 7.5, steps 40
   - Quality tags appended to prompt
 Models:
   - Checkpoint: SG161222/Realistic_Vision_V5.1_noVAE (SD 1.5)
   - ControlNet: monster-labs/control_v1p_sd15_qrcode_monster (v2)
 """
 import base64
     ControlNetModel,
     StableDiffusionControlNetPipeline,
     DPMSolverMultistepScheduler,
+    MultiControlNetModel,
 )
 from PIL import Image, ImageFilter
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
+# Stacked ControlNet — matched to ComfyUI masked CN workflow
 # ---------------------------------------------------------------------------
+# ComfyUI applies ControlNetApplyAdvanced TWICE on the same QR image:
+#   Unit 1 (markers):  strength=1.60  start=0.00  end=0.90
+#   Unit 2 (data):     strength=1.20  start=0.05  end=0.85
+# These stack additively. Effective weight at steps 0.05-0.85 = 2.80.
+UNIT1_WEIGHT = 1.60    # "marker" unit — high weight, early start, late end
+UNIT1_START = 0.00
+UNIT1_END = 0.90
+UNIT2_WEIGHT = 1.20    # "data" unit — lower weight, standard timing
+UNIT2_START = 0.05
+UNIT2_END = 0.85
+# ---------------------------------------------------------------------------
+# Category parameter presets
+# ---------------------------------------------------------------------------
 CATEGORY_PARAMS = {
+    "food":         {"cfg": 7.5, "steps": 40},
+    "luxury":       {"cfg": 7.5, "steps": 40},
+    "wedding":      {"cfg": 7.5, "steps": 40},
+    "sports":       {"cfg": 7.5, "steps": 40},
+    "restaurant":   {"cfg": 7.5, "steps": 40},
+    "retail":       {"cfg": 7.5, "steps": 40},
+    "professional": {"cfg": 7.5, "steps": 40},
+    "real_estate":  {"cfg": 7.5, "steps": 40},
+    "architecture": {"cfg": 7.5, "steps": 40},
+    "nature":       {"cfg": 7.5, "steps": 40},
+    "world_wonders":{"cfg": 7.5, "steps": 40},
+    "medieval":     {"cfg": 7.5, "steps": 40},
+    "social":       {"cfg": 7.5, "steps": 40},
+    "tech":         {"cfg": 7.5, "steps": 40},
+    "seasonal":     {"cfg": 7.5, "steps": 40},
+    "default":      {"cfg": 7.5, "steps": 40},
 }
 # Quality tags — appended to every prompt (from ComfyUI gold config)
 QUALITY_TAGS = (
     "highly detailed, 4k, high resolution, sharp focus, "
+    "masterpiece, best quality, ultra detailed, 8k, professional, award-winning"
 )
 # QR structure tags — help model maintain scannable QR pattern
 QR_CODE_SIZE = 512
 QR_CANVAS_SIZE = 768
 QR_PADDING = (QR_CANVAS_SIZE - QR_CODE_SIZE) // 2  # 128px
+QR_BLUR_SIGMA = 0.5
 class EndpointHandler:
     def __init__(self, path: str = ""):
         """Load models on endpoint startup."""
+        logger.info("Loading QR Art Generator pipeline v7 (stacked ControlNet)...")
         start = time.time()
         device = "cuda" if torch.cuda.is_available() else "cpu"
         dtype = torch.float16 if device == "cuda" else torch.float32
         # Load QR Monster ControlNet v2
+        controlnet = ControlNetModel.from_pretrained(
             "monster-labs/control_v1p_sd15_qrcode_monster",
             subfolder="v2",
             torch_dtype=dtype,
         )
+        # Create MultiControlNetModel with the SAME model twice
+        # This replicates ComfyUI's stacked ControlNetApplyAdvanced behavior
+        multi_controlnet = MultiControlNetModel([controlnet, controlnet])
+        # Load SD 1.5 txt2img + MultiControlNet pipeline
         self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
             "SG161222/Realistic_Vision_V5.1_noVAE",
+            controlnet=multi_controlnet,
             torch_dtype=dtype,
             safety_checker=None,
             requires_safety_checker=False,
         )
+        # CRITICAL: DPM++ 2M SDE Karras (Monster Labs recommended)
         self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
             self.pipe.scheduler.config,
             use_karras_sigmas=True,
         self.device = device
         self.dtype = dtype
         elapsed = time.time() - start
+        logger.info(f"Pipeline v7 loaded in {elapsed:.1f}s on {device}")
     def _prepare_qr_conditioning(self, qr_image: Image.Image) -> Image.Image:
         """
             "inputs": {
                 "prompt": str,                   # Required
                 "negative_prompt": str,           # Optional
+                "qr_code_image": str,             # Required - base64 PNG
+                "category": str,                  # Optional
                 "seed": int,                      # Optional - -1 for random
                 "width": int,                     # Optional - default 768
                 "height": int,                    # Optional - default 768
+                "unit1_weight": float,            # Optional - override
+                "unit2_weight": float,            # Optional - override
             }
         }
         """
         category = inputs.get("category", "default")
         params = CATEGORY_PARAMS.get(category, CATEGORY_PARAMS["default"])
         cfg = inputs.get("guidance_scale", params["cfg"])
         steps = inputs.get("num_inference_steps", params["steps"])
         width = inputs.get("width", QR_CANVAS_SIZE)
         height = inputs.get("height", QR_CANVAS_SIZE)
+        # Stacked CN weights (override-able for testing)
+        u1_weight = inputs.get("unit1_weight", UNIT1_WEIGHT)
+        u2_weight = inputs.get("unit2_weight", UNIT2_WEIGHT)
+        # Enhance prompt with quality + QR tags
         enhanced_prompt = f"{prompt}, {QUALITY_TAGS}, {QR_TAGS}"
         # Seed
         generator = torch.Generator(device=self.device).manual_seed(seed)
+        # ---- Stacked ControlNet (same QR image twice) ----
+        # Replicates ComfyUI's chained ControlNetApplyAdvanced
         logger.info(
+            f"Generating: u1={u1_weight}@{UNIT1_START}-{UNIT1_END} "
+            f"u2={u2_weight}@{UNIT2_START}-{UNIT2_END} "
+            f"effective={u1_weight + u2_weight:.2f} "
+            f"cfg={cfg} steps={steps} category={category}"
         )
         result = self.pipe(
             prompt=enhanced_prompt,
             negative_prompt=negative_prompt,
+            image=[qr_conditioning, qr_conditioning],
             width=width,
             height=height,
             guidance_scale=cfg,
+            controlnet_conditioning_scale=[u1_weight, u2_weight],
+            control_guidance_start=[UNIT1_START, UNIT2_START],
+            control_guidance_end=[UNIT1_END, UNIT2_END],
             num_inference_steps=steps,
             generator=generator,
         )
         return {
             "image": result_b64,
             "seed": seed,
             "parameters": {
+                "pipeline": "stacked-cn-v7",
                 "category": category,
+                "unit1_weight": u1_weight,
+                "unit1_timing": f"{UNIT1_START}-{UNIT1_END}",
+                "unit2_weight": u2_weight,
+                "unit2_timing": f"{UNIT2_START}-{UNIT2_END}",
+                "effective_cn": round(u1_weight + u2_weight, 2),
                 "guidance_scale": cfg,
                 "steps": steps,
                 "scheduler": "DPM++ 2M SDE Karras",
                 "qr_size": f"{QR_CODE_SIZE}-in-{QR_CANVAS_SIZE}",
                 "qr_blur_sigma": QR_BLUR_SIGMA,