Qrverse
/

qr-art-generator

Model card Files Files and versions

xet

Community

Qrverse commited on 16 days ago

Commit

bfd4d2e

verified ·

1 Parent(s): bc6829f

v9: Dual ControlNet (Monster + Brightness) for scannable art

Browse files

Files changed (1) hide show

handler.py +71 -74

handler.py CHANGED Viewed

@@ -1,27 +1,22 @@
 """
-QR-Verse AI Art Generator — HuggingFace Inference Endpoint Handler v8
-Single ControlNet — calibrated for diffusers parallel processing.
-CRITICAL DISCOVERY (v7→v8):
-  ComfyUI's "masked ControlNet" chains two ControlNetApplyAdvanced nodes
-  SEQUENTIALLY: Unit 2 modifies already-modified conditioning from Unit 1.
-  This is fundamentally different from diffusers' MultiControlNetModel which
-  processes units in PARALLEL and sums their outputs.
-  Result: stacked CN 1.60+1.20=2.80 in diffusers is MUCH stronger than
-  the equivalent ComfyUI sequential chaining. Weight sweep showed:
-    - CN 1.60: Beautiful art, QR barely visible (underscannable)
-    - CN 1.80: Good art + moderate QR structure (sweet spot)
-    - CN 2.00: QR dominant, art becoming blocky
-    - CN 2.80: Just colored QR codes (way too high)
-  v8 uses SINGLE ControlNet at CN 1.80 (default) — matching the effective
-  strength of ComfyUI's sequential stacking at 1.60+1.20.
 Pipeline:
-  - DPM++ 2M SDE Karras sampler (Monster Labs recommended)
-  - Single ControlNet at CN 1.80, timing 0.00→0.90
   - QR code 512px centered in 768px canvas with 128px gray padding
   - Pre-blur QR with Gaussian sigma=0.5
   - CFG 7.5, steps 40
@@ -29,7 +24,8 @@ Pipeline:
 Models:
   - Checkpoint: SG161222/Realistic_Vision_V5.1_noVAE (SD 1.5)
-  - ControlNet: monster-labs/control_v1p_sd15_qrcode_monster (v2)
 """
 import base64
@@ -44,17 +40,25 @@ from diffusers import (
     ControlNetModel,
     StableDiffusionControlNetPipeline,
     DPMSolverMultistepScheduler,
 )
 from PIL import Image, ImageFilter
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
-# ControlNet defaults — calibrated from weight sweep
 # ---------------------------------------------------------------------------
-DEFAULT_CN_WEIGHT = 1.80   # Sweet spot: art quality + QR structure
-CN_START = 0.00            # ControlNet active from first step
-CN_END = 0.90              # ControlNet deactivates at 90% (detail blending)
 # ---------------------------------------------------------------------------
 # Category parameter presets
@@ -78,22 +82,18 @@ CATEGORY_PARAMS = {
     "default":      {"cfg": 7.5, "steps": 40},
 }
-# Quality tags — appended to every prompt (from ComfyUI gold config)
-# NOTE: NO QR tags! QR structure comes 100% from ControlNet.
-# Adding QR tags to the prompt makes the model generate literal blocky QR
-# instead of artistic imagery woven into QR structure.
 QUALITY_TAGS = (
     "highly detailed, 4k, high resolution, sharp focus, "
     "masterpiece, best quality, ultra detailed, 8k, professional, award-winning"
 )
-# Default negative prompt
 DEFAULT_NEGATIVE = (
     "blurry, low quality, nsfw, watermark, text, deformed, ugly, amateur, "
     "oversaturated, grainy, bad anatomy, bad hands, multiple views"
 )
-# QR code sizing — 512px QR centered in 768px canvas (128px padding)
 QR_CODE_SIZE = 512
 QR_CANVAS_SIZE = 768
 QR_PADDING = (QR_CANVAS_SIZE - QR_CODE_SIZE) // 2  # 128px
@@ -105,36 +105,46 @@ class EndpointHandler:
     def __init__(self, path: str = ""):
         """Load models on endpoint startup."""
-        logger.info("Loading QR Art Generator pipeline v8 (single CN 1.80)...")
         start = time.time()
         device = "cuda" if torch.cuda.is_available() else "cpu"
         dtype = torch.float16 if device == "cuda" else torch.float32
-        # Load QR Monster ControlNet v2
-        controlnet = ControlNetModel.from_pretrained(
             "monster-labs/control_v1p_sd15_qrcode_monster",
             subfolder="v2",
             torch_dtype=dtype,
         )
-        # Load SD 1.5 txt2img + single ControlNet pipeline
         self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
             "SG161222/Realistic_Vision_V5.1_noVAE",
-            controlnet=controlnet,
             torch_dtype=dtype,
             safety_checker=None,
             requires_safety_checker=False,
         )
-        # CRITICAL: DPM++ 2M SDE Karras (Monster Labs recommended)
         self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
             self.pipe.scheduler.config,
             use_karras_sigmas=True,
             algorithm_type="sde-dpmsolver++",
         )
-        # Move to device + optimize
         self.pipe.to(device)
         if device == "cuda":
@@ -147,19 +157,10 @@ class EndpointHandler:
         self.device = device
         self.dtype = dtype
         elapsed = time.time() - start
-        logger.info(f"Pipeline v8 loaded in {elapsed:.1f}s on {device}")
     def _prepare_qr_conditioning(self, qr_image: Image.Image) -> Image.Image:
-        """
-        Prepare QR code as ControlNet conditioning image.
-        Gold standard technique from ComfyUI v6:
-        1. Ensure gray background (#808080)
-        2. Resize QR to 512x512
-        3. Center in 768x768 gray canvas (128px padding)
-        4. Apply Gaussian blur (sigma=0.5) for smoother ControlNet integration
-        """
-        # Step 1: Ensure gray background
         qr_array = np.array(qr_image)
         white_mask = np.all(qr_array > 200, axis=2)
         if np.sum(white_mask) > 0:
@@ -167,16 +168,12 @@ class EndpointHandler:
             qr_array[white_mask] = [128, 128, 128]
             qr_image = Image.fromarray(qr_array)
-        # Step 2: Resize QR to 512x512
         qr_resized = qr_image.resize(
             (QR_CODE_SIZE, QR_CODE_SIZE), Image.LANCZOS
         )
-        # Step 3: Center in 768x768 gray canvas
         canvas = Image.new("RGB", (QR_CANVAS_SIZE, QR_CANVAS_SIZE), (128, 128, 128))
         canvas.paste(qr_resized, (QR_PADDING, QR_PADDING))
-        # Step 4: Pre-blur for smoother ControlNet integration
         canvas = canvas.filter(ImageFilter.GaussianBlur(radius=QR_BLUR_SIGMA))
         return canvas
@@ -195,9 +192,10 @@ class EndpointHandler:
                 "seed": int,                      # Optional - -1 for random
                 "width": int,                     # Optional - default 768
                 "height": int,                    # Optional - default 768
-                "controlnet_scale": float,        # Optional - default 1.80
-                "cn_start": float,                # Optional - default 0.00
-                "cn_end": float,                  # Optional - default 0.90
             }
         }
         """
@@ -213,13 +211,11 @@ class EndpointHandler:
         if not qr_b64:
             return {"error": "qr_code_image (base64 PNG) is required"}
-        # Decode QR code image
         try:
             qr_image = Image.open(io.BytesIO(base64.b64decode(qr_b64))).convert("RGB")
         except Exception as e:
             return {"error": f"Failed to decode qr_code_image: {e}"}
-        # Prepare QR conditioning image (gray bg, 512-in-768, pre-blur)
         qr_conditioning = self._prepare_qr_conditioning(qr_image)
         # Resolve parameters
@@ -231,19 +227,17 @@ class EndpointHandler:
         width = inputs.get("width", QR_CANVAS_SIZE)
         height = inputs.get("height", QR_CANVAS_SIZE)
-        # ControlNet weight (override-able per request)
-        cn_weight = inputs.get("controlnet_scale", DEFAULT_CN_WEIGHT)
-        cn_start = inputs.get("cn_start", CN_START)
-        cn_end = inputs.get("cn_end", CN_END)
-        # Backward compat: accept unit1_weight from older clients
-        if "unit1_weight" in inputs:
-            cn_weight = inputs["unit1_weight"]
-        # Enhance prompt with quality tags only (NO QR tags)
         enhanced_prompt = f"{prompt}, {QUALITY_TAGS}"
-        # Seed
         seed = inputs.get("seed", -1)
         if seed == -1:
             seed = torch.Generator(device=self.device).seed()
@@ -251,26 +245,27 @@ class EndpointHandler:
         generator = torch.Generator(device=self.device).manual_seed(seed)
         logger.info(
-            f"Generating: cn={cn_weight} timing={cn_start}-{cn_end} "
             f"cfg={cfg} steps={steps} category={category}"
         )
         result = self.pipe(
             prompt=enhanced_prompt,
             negative_prompt=negative_prompt,
-            image=qr_conditioning,
             width=width,
             height=height,
             guidance_scale=cfg,
-            controlnet_conditioning_scale=cn_weight,
-            control_guidance_start=cn_start,
-            control_guidance_end=cn_end,
             num_inference_steps=steps,
             generator=generator,
         )
         art_image = result.images[0]
-        # Encode result to base64 PNG
         buf = io.BytesIO()
         art_image.save(buf, format="PNG")
         result_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
@@ -281,10 +276,12 @@ class EndpointHandler:
             "image": result_b64,
             "seed": seed,
             "parameters": {
-                "pipeline": "single-cn-v8",
                 "category": category,
-                "controlnet_scale": cn_weight,
-                "cn_timing": f"{cn_start}-{cn_end}",
                 "guidance_scale": cfg,
                 "steps": steps,
                 "scheduler": "DPM++ 2M SDE Karras",

 """
+QR-Verse AI Art Generator — HuggingFace Inference Endpoint Handler v9
+Dual ControlNet: QR Monster v2 + IoC Lab Brightness.
+Key insight from community (antfu.me/posts/ai-qrcode-101):
+  Monster alone at any single weight trades art vs scannability.
+  Adding Brightness ControlNet as auxiliary reinforces dark/light
+  contrast needed for scanning WITHOUT adding more QR structure.
+  "Even if we reduce the weight of the Monster Model to 1.0, the
+   recognizability is as good as the single model with Weight 1.5,
+   while the composition is closer to the original image."
 Pipeline:
+  - DPM++ 2M SDE Karras sampler
+  - Dual ControlNet (MultiControlNetModel with TWO DIFFERENT models):
+    - QR Monster v2: weight 1.35, timing 0.00→0.85 (QR structure)
+    - Brightness:     weight 0.20, timing 0.00→1.00 (dark/light contrast)
   - QR code 512px centered in 768px canvas with 128px gray padding
   - Pre-blur QR with Gaussian sigma=0.5
   - CFG 7.5, steps 40
 Models:
   - Checkpoint: SG161222/Realistic_Vision_V5.1_noVAE (SD 1.5)
+  - ControlNet 1: monster-labs/control_v1p_sd15_qrcode_monster (v2)
+  - ControlNet 2: ioclab/control_v1p_sd15_brightness
 """
 import base64
     ControlNetModel,
     StableDiffusionControlNetPipeline,
     DPMSolverMultistepScheduler,
+    MultiControlNetModel,
 )
 from PIL import Image, ImageFilter
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
+# Dual ControlNet defaults — Monster (structure) + Brightness (contrast)
 # ---------------------------------------------------------------------------
+# Monster: Provides QR structure. Weight 1.35 = good art + QR pattern.
+DEFAULT_MONSTER_WEIGHT = 1.35
+MONSTER_START = 0.00
+MONSTER_END = 0.85
+# Brightness: Reinforces dark/light contrast for scanning.
+# Interprets QR as brightness map: black modules→dark, gray bg→medium.
+DEFAULT_BRIGHTNESS_WEIGHT = 0.20
+BRIGHTNESS_START = 0.00
+BRIGHTNESS_END = 1.00
 # ---------------------------------------------------------------------------
 # Category parameter presets
     "default":      {"cfg": 7.5, "steps": 40},
 }
+# Quality tags — NO QR tags (QR structure from ControlNet only)
 QUALITY_TAGS = (
     "highly detailed, 4k, high resolution, sharp focus, "
     "masterpiece, best quality, ultra detailed, 8k, professional, award-winning"
 )
 DEFAULT_NEGATIVE = (
     "blurry, low quality, nsfw, watermark, text, deformed, ugly, amateur, "
     "oversaturated, grainy, bad anatomy, bad hands, multiple views"
 )
+# QR code sizing
 QR_CODE_SIZE = 512
 QR_CANVAS_SIZE = 768
 QR_PADDING = (QR_CANVAS_SIZE - QR_CODE_SIZE) // 2  # 128px
     def __init__(self, path: str = ""):
         """Load models on endpoint startup."""
+        logger.info("Loading QR Art Generator pipeline v9 (Monster + Brightness)...")
         start = time.time()
         device = "cuda" if torch.cuda.is_available() else "cpu"
         dtype = torch.float16 if device == "cuda" else torch.float32
+        # Load QR Monster ControlNet v2 (structure)
+        logger.info("Loading QR Monster ControlNet v2...")
+        monster_cn = ControlNetModel.from_pretrained(
             "monster-labs/control_v1p_sd15_qrcode_monster",
             subfolder="v2",
             torch_dtype=dtype,
         )
+        # Load Brightness ControlNet (contrast enforcement)
+        logger.info("Loading IoC Lab Brightness ControlNet...")
+        brightness_cn = ControlNetModel.from_pretrained(
+            "ioclab/control_v1p_sd15_brightness",
+            torch_dtype=dtype,
+        )
+        # Dual ControlNet: Monster (QR) + Brightness (contrast)
+        multi_controlnet = MultiControlNetModel([monster_cn, brightness_cn])
+        # Load SD 1.5 txt2img pipeline
         self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
             "SG161222/Realistic_Vision_V5.1_noVAE",
+            controlnet=multi_controlnet,
             torch_dtype=dtype,
             safety_checker=None,
             requires_safety_checker=False,
         )
+        # CRITICAL: DPM++ 2M SDE Karras
         self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
             self.pipe.scheduler.config,
             use_karras_sigmas=True,
             algorithm_type="sde-dpmsolver++",
         )
         self.pipe.to(device)
         if device == "cuda":
         self.device = device
         self.dtype = dtype
         elapsed = time.time() - start
+        logger.info(f"Pipeline v9 loaded in {elapsed:.1f}s on {device}")
     def _prepare_qr_conditioning(self, qr_image: Image.Image) -> Image.Image:
+        """Prepare QR code as ControlNet conditioning image."""
         qr_array = np.array(qr_image)
         white_mask = np.all(qr_array > 200, axis=2)
         if np.sum(white_mask) > 0:
             qr_array[white_mask] = [128, 128, 128]
             qr_image = Image.fromarray(qr_array)
         qr_resized = qr_image.resize(
             (QR_CODE_SIZE, QR_CODE_SIZE), Image.LANCZOS
         )
         canvas = Image.new("RGB", (QR_CANVAS_SIZE, QR_CANVAS_SIZE), (128, 128, 128))
         canvas.paste(qr_resized, (QR_PADDING, QR_PADDING))
         canvas = canvas.filter(ImageFilter.GaussianBlur(radius=QR_BLUR_SIGMA))
         return canvas
                 "seed": int,                      # Optional - -1 for random
                 "width": int,                     # Optional - default 768
                 "height": int,                    # Optional - default 768
+                "controlnet_scale": float,        # Optional - Monster weight
+                "brightness_scale": float,        # Optional - Brightness weight
+                "cn_start": float,                # Optional - Monster start
+                "cn_end": float,                  # Optional - Monster end
             }
         }
         """
         if not qr_b64:
             return {"error": "qr_code_image (base64 PNG) is required"}
         try:
             qr_image = Image.open(io.BytesIO(base64.b64decode(qr_b64))).convert("RGB")
         except Exception as e:
             return {"error": f"Failed to decode qr_code_image: {e}"}
         qr_conditioning = self._prepare_qr_conditioning(qr_image)
         # Resolve parameters
         width = inputs.get("width", QR_CANVAS_SIZE)
         height = inputs.get("height", QR_CANVAS_SIZE)
+        # Dual CN weights (override-able)
+        monster_weight = inputs.get("controlnet_scale",
+                         inputs.get("unit1_weight", DEFAULT_MONSTER_WEIGHT))
+        brightness_weight = inputs.get("brightness_scale",
+                            inputs.get("unit2_weight", DEFAULT_BRIGHTNESS_WEIGHT))
+        monster_start = inputs.get("cn_start", MONSTER_START)
+        monster_end = inputs.get("cn_end", MONSTER_END)
         enhanced_prompt = f"{prompt}, {QUALITY_TAGS}"
         seed = inputs.get("seed", -1)
         if seed == -1:
             seed = torch.Generator(device=self.device).seed()
         generator = torch.Generator(device=self.device).manual_seed(seed)
         logger.info(
+            f"Generating: monster={monster_weight}@{monster_start}-{monster_end} "
+            f"brightness={brightness_weight}@{BRIGHTNESS_START}-{BRIGHTNESS_END} "
             f"cfg={cfg} steps={steps} category={category}"
         )
+        # Same QR image for both: Monster reads structure, Brightness reads contrast
         result = self.pipe(
             prompt=enhanced_prompt,
             negative_prompt=negative_prompt,
+            image=[qr_conditioning, qr_conditioning],
             width=width,
             height=height,
             guidance_scale=cfg,
+            controlnet_conditioning_scale=[monster_weight, brightness_weight],
+            control_guidance_start=[monster_start, BRIGHTNESS_START],
+            control_guidance_end=[monster_end, BRIGHTNESS_END],
             num_inference_steps=steps,
             generator=generator,
         )
         art_image = result.images[0]
         buf = io.BytesIO()
         art_image.save(buf, format="PNG")
         result_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
             "image": result_b64,
             "seed": seed,
             "parameters": {
+                "pipeline": "dual-cn-v9",
                 "category": category,
+                "monster_weight": monster_weight,
+                "monster_timing": f"{monster_start}-{monster_end}",
+                "brightness_weight": brightness_weight,
+                "brightness_timing": f"{BRIGHTNESS_START}-{BRIGHTNESS_END}",
                 "guidance_scale": cfg,
                 "steps": steps,
                 "scheduler": "DPM++ 2M SDE Karras",