Qrverse commited on
Commit
bfd4d2e
·
verified ·
1 Parent(s): bc6829f

v9: Dual ControlNet (Monster + Brightness) for scannable art

Browse files
Files changed (1) hide show
  1. handler.py +71 -74
handler.py CHANGED
@@ -1,27 +1,22 @@
1
  """
2
- QR-Verse AI Art Generator — HuggingFace Inference Endpoint Handler v8
3
 
4
- Single ControlNet calibrated for diffusers parallel processing.
5
 
6
- CRITICAL DISCOVERY (v7→v8):
7
- ComfyUI's "masked ControlNet" chains two ControlNetApplyAdvanced nodes
8
- SEQUENTIALLY: Unit 2 modifies already-modified conditioning from Unit 1.
9
- This is fundamentally different from diffusers' MultiControlNetModel which
10
- processes units in PARALLEL and sums their outputs.
11
 
12
- Result: stacked CN 1.60+1.20=2.80 in diffusers is MUCH stronger than
13
- the equivalent ComfyUI sequential chaining. Weight sweep showed:
14
- - CN 1.60: Beautiful art, QR barely visible (underscannable)
15
- - CN 1.80: Good art + moderate QR structure (sweet spot)
16
- - CN 2.00: QR dominant, art becoming blocky
17
- - CN 2.80: Just colored QR codes (way too high)
18
-
19
- v8 uses SINGLE ControlNet at CN 1.80 (default) — matching the effective
20
- strength of ComfyUI's sequential stacking at 1.60+1.20.
21
 
22
  Pipeline:
23
- - DPM++ 2M SDE Karras sampler (Monster Labs recommended)
24
- - Single ControlNet at CN 1.80, timing 0.00→0.90
 
 
25
  - QR code 512px centered in 768px canvas with 128px gray padding
26
  - Pre-blur QR with Gaussian sigma=0.5
27
  - CFG 7.5, steps 40
@@ -29,7 +24,8 @@ Pipeline:
29
 
30
  Models:
31
  - Checkpoint: SG161222/Realistic_Vision_V5.1_noVAE (SD 1.5)
32
- - ControlNet: monster-labs/control_v1p_sd15_qrcode_monster (v2)
 
33
  """
34
 
35
  import base64
@@ -44,17 +40,25 @@ from diffusers import (
44
  ControlNetModel,
45
  StableDiffusionControlNetPipeline,
46
  DPMSolverMultistepScheduler,
 
47
  )
48
  from PIL import Image, ImageFilter
49
 
50
  logger = logging.getLogger(__name__)
51
 
52
  # ---------------------------------------------------------------------------
53
- # ControlNet defaults — calibrated from weight sweep
54
  # ---------------------------------------------------------------------------
55
- DEFAULT_CN_WEIGHT = 1.80 # Sweet spot: art quality + QR structure
56
- CN_START = 0.00 # ControlNet active from first step
57
- CN_END = 0.90 # ControlNet deactivates at 90% (detail blending)
 
 
 
 
 
 
 
58
 
59
  # ---------------------------------------------------------------------------
60
  # Category parameter presets
@@ -78,22 +82,18 @@ CATEGORY_PARAMS = {
78
  "default": {"cfg": 7.5, "steps": 40},
79
  }
80
 
81
- # Quality tags — appended to every prompt (from ComfyUI gold config)
82
- # NOTE: NO QR tags! QR structure comes 100% from ControlNet.
83
- # Adding QR tags to the prompt makes the model generate literal blocky QR
84
- # instead of artistic imagery woven into QR structure.
85
  QUALITY_TAGS = (
86
  "highly detailed, 4k, high resolution, sharp focus, "
87
  "masterpiece, best quality, ultra detailed, 8k, professional, award-winning"
88
  )
89
 
90
- # Default negative prompt
91
  DEFAULT_NEGATIVE = (
92
  "blurry, low quality, nsfw, watermark, text, deformed, ugly, amateur, "
93
  "oversaturated, grainy, bad anatomy, bad hands, multiple views"
94
  )
95
 
96
- # QR code sizing — 512px QR centered in 768px canvas (128px padding)
97
  QR_CODE_SIZE = 512
98
  QR_CANVAS_SIZE = 768
99
  QR_PADDING = (QR_CANVAS_SIZE - QR_CODE_SIZE) // 2 # 128px
@@ -105,36 +105,46 @@ class EndpointHandler:
105
 
106
  def __init__(self, path: str = ""):
107
  """Load models on endpoint startup."""
108
- logger.info("Loading QR Art Generator pipeline v8 (single CN 1.80)...")
109
  start = time.time()
110
 
111
  device = "cuda" if torch.cuda.is_available() else "cpu"
112
  dtype = torch.float16 if device == "cuda" else torch.float32
113
 
114
- # Load QR Monster ControlNet v2
115
- controlnet = ControlNetModel.from_pretrained(
 
116
  "monster-labs/control_v1p_sd15_qrcode_monster",
117
  subfolder="v2",
118
  torch_dtype=dtype,
119
  )
120
 
121
- # Load SD 1.5 txt2img + single ControlNet pipeline
 
 
 
 
 
 
 
 
 
 
122
  self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
123
  "SG161222/Realistic_Vision_V5.1_noVAE",
124
- controlnet=controlnet,
125
  torch_dtype=dtype,
126
  safety_checker=None,
127
  requires_safety_checker=False,
128
  )
129
 
130
- # CRITICAL: DPM++ 2M SDE Karras (Monster Labs recommended)
131
  self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
132
  self.pipe.scheduler.config,
133
  use_karras_sigmas=True,
134
  algorithm_type="sde-dpmsolver++",
135
  )
136
 
137
- # Move to device + optimize
138
  self.pipe.to(device)
139
 
140
  if device == "cuda":
@@ -147,19 +157,10 @@ class EndpointHandler:
147
  self.device = device
148
  self.dtype = dtype
149
  elapsed = time.time() - start
150
- logger.info(f"Pipeline v8 loaded in {elapsed:.1f}s on {device}")
151
 
152
  def _prepare_qr_conditioning(self, qr_image: Image.Image) -> Image.Image:
153
- """
154
- Prepare QR code as ControlNet conditioning image.
155
-
156
- Gold standard technique from ComfyUI v6:
157
- 1. Ensure gray background (#808080)
158
- 2. Resize QR to 512x512
159
- 3. Center in 768x768 gray canvas (128px padding)
160
- 4. Apply Gaussian blur (sigma=0.5) for smoother ControlNet integration
161
- """
162
- # Step 1: Ensure gray background
163
  qr_array = np.array(qr_image)
164
  white_mask = np.all(qr_array > 200, axis=2)
165
  if np.sum(white_mask) > 0:
@@ -167,16 +168,12 @@ class EndpointHandler:
167
  qr_array[white_mask] = [128, 128, 128]
168
  qr_image = Image.fromarray(qr_array)
169
 
170
- # Step 2: Resize QR to 512x512
171
  qr_resized = qr_image.resize(
172
  (QR_CODE_SIZE, QR_CODE_SIZE), Image.LANCZOS
173
  )
174
 
175
- # Step 3: Center in 768x768 gray canvas
176
  canvas = Image.new("RGB", (QR_CANVAS_SIZE, QR_CANVAS_SIZE), (128, 128, 128))
177
  canvas.paste(qr_resized, (QR_PADDING, QR_PADDING))
178
-
179
- # Step 4: Pre-blur for smoother ControlNet integration
180
  canvas = canvas.filter(ImageFilter.GaussianBlur(radius=QR_BLUR_SIGMA))
181
 
182
  return canvas
@@ -195,9 +192,10 @@ class EndpointHandler:
195
  "seed": int, # Optional - -1 for random
196
  "width": int, # Optional - default 768
197
  "height": int, # Optional - default 768
198
- "controlnet_scale": float, # Optional - default 1.80
199
- "cn_start": float, # Optional - default 0.00
200
- "cn_end": float, # Optional - default 0.90
 
201
  }
202
  }
203
  """
@@ -213,13 +211,11 @@ class EndpointHandler:
213
  if not qr_b64:
214
  return {"error": "qr_code_image (base64 PNG) is required"}
215
 
216
- # Decode QR code image
217
  try:
218
  qr_image = Image.open(io.BytesIO(base64.b64decode(qr_b64))).convert("RGB")
219
  except Exception as e:
220
  return {"error": f"Failed to decode qr_code_image: {e}"}
221
 
222
- # Prepare QR conditioning image (gray bg, 512-in-768, pre-blur)
223
  qr_conditioning = self._prepare_qr_conditioning(qr_image)
224
 
225
  # Resolve parameters
@@ -231,19 +227,17 @@ class EndpointHandler:
231
  width = inputs.get("width", QR_CANVAS_SIZE)
232
  height = inputs.get("height", QR_CANVAS_SIZE)
233
 
234
- # ControlNet weight (override-able per request)
235
- cn_weight = inputs.get("controlnet_scale", DEFAULT_CN_WEIGHT)
236
- cn_start = inputs.get("cn_start", CN_START)
237
- cn_end = inputs.get("cn_end", CN_END)
 
238
 
239
- # Backward compat: accept unit1_weight from older clients
240
- if "unit1_weight" in inputs:
241
- cn_weight = inputs["unit1_weight"]
242
 
243
- # Enhance prompt with quality tags only (NO QR tags)
244
  enhanced_prompt = f"{prompt}, {QUALITY_TAGS}"
245
 
246
- # Seed
247
  seed = inputs.get("seed", -1)
248
  if seed == -1:
249
  seed = torch.Generator(device=self.device).seed()
@@ -251,26 +245,27 @@ class EndpointHandler:
251
  generator = torch.Generator(device=self.device).manual_seed(seed)
252
 
253
  logger.info(
254
- f"Generating: cn={cn_weight} timing={cn_start}-{cn_end} "
 
255
  f"cfg={cfg} steps={steps} category={category}"
256
  )
257
 
 
258
  result = self.pipe(
259
  prompt=enhanced_prompt,
260
  negative_prompt=negative_prompt,
261
- image=qr_conditioning,
262
  width=width,
263
  height=height,
264
  guidance_scale=cfg,
265
- controlnet_conditioning_scale=cn_weight,
266
- control_guidance_start=cn_start,
267
- control_guidance_end=cn_end,
268
  num_inference_steps=steps,
269
  generator=generator,
270
  )
271
  art_image = result.images[0]
272
 
273
- # Encode result to base64 PNG
274
  buf = io.BytesIO()
275
  art_image.save(buf, format="PNG")
276
  result_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
@@ -281,10 +276,12 @@ class EndpointHandler:
281
  "image": result_b64,
282
  "seed": seed,
283
  "parameters": {
284
- "pipeline": "single-cn-v8",
285
  "category": category,
286
- "controlnet_scale": cn_weight,
287
- "cn_timing": f"{cn_start}-{cn_end}",
 
 
288
  "guidance_scale": cfg,
289
  "steps": steps,
290
  "scheduler": "DPM++ 2M SDE Karras",
 
1
  """
2
+ QR-Verse AI Art Generator — HuggingFace Inference Endpoint Handler v9
3
 
4
+ Dual ControlNet: QR Monster v2 + IoC Lab Brightness.
5
 
6
+ Key insight from community (antfu.me/posts/ai-qrcode-101):
7
+ Monster alone at any single weight trades art vs scannability.
8
+ Adding Brightness ControlNet as auxiliary reinforces dark/light
9
+ contrast needed for scanning WITHOUT adding more QR structure.
 
10
 
11
+ "Even if we reduce the weight of the Monster Model to 1.0, the
12
+ recognizability is as good as the single model with Weight 1.5,
13
+ while the composition is closer to the original image."
 
 
 
 
 
 
14
 
15
  Pipeline:
16
+ - DPM++ 2M SDE Karras sampler
17
+ - Dual ControlNet (MultiControlNetModel with TWO DIFFERENT models):
18
+ - QR Monster v2: weight 1.35, timing 0.00→0.85 (QR structure)
19
+ - Brightness: weight 0.20, timing 0.00→1.00 (dark/light contrast)
20
  - QR code 512px centered in 768px canvas with 128px gray padding
21
  - Pre-blur QR with Gaussian sigma=0.5
22
  - CFG 7.5, steps 40
 
24
 
25
  Models:
26
  - Checkpoint: SG161222/Realistic_Vision_V5.1_noVAE (SD 1.5)
27
+ - ControlNet 1: monster-labs/control_v1p_sd15_qrcode_monster (v2)
28
+ - ControlNet 2: ioclab/control_v1p_sd15_brightness
29
  """
30
 
31
  import base64
 
40
  ControlNetModel,
41
  StableDiffusionControlNetPipeline,
42
  DPMSolverMultistepScheduler,
43
+ MultiControlNetModel,
44
  )
45
  from PIL import Image, ImageFilter
46
 
47
  logger = logging.getLogger(__name__)
48
 
49
  # ---------------------------------------------------------------------------
50
+ # Dual ControlNet defaults — Monster (structure) + Brightness (contrast)
51
  # ---------------------------------------------------------------------------
52
+ # Monster: Provides QR structure. Weight 1.35 = good art + QR pattern.
53
+ DEFAULT_MONSTER_WEIGHT = 1.35
54
+ MONSTER_START = 0.00
55
+ MONSTER_END = 0.85
56
+
57
+ # Brightness: Reinforces dark/light contrast for scanning.
58
+ # Interprets QR as brightness map: black modules→dark, gray bg→medium.
59
+ DEFAULT_BRIGHTNESS_WEIGHT = 0.20
60
+ BRIGHTNESS_START = 0.00
61
+ BRIGHTNESS_END = 1.00
62
 
63
  # ---------------------------------------------------------------------------
64
  # Category parameter presets
 
82
  "default": {"cfg": 7.5, "steps": 40},
83
  }
84
 
85
+ # Quality tags — NO QR tags (QR structure from ControlNet only)
 
 
 
86
  QUALITY_TAGS = (
87
  "highly detailed, 4k, high resolution, sharp focus, "
88
  "masterpiece, best quality, ultra detailed, 8k, professional, award-winning"
89
  )
90
 
 
91
  DEFAULT_NEGATIVE = (
92
  "blurry, low quality, nsfw, watermark, text, deformed, ugly, amateur, "
93
  "oversaturated, grainy, bad anatomy, bad hands, multiple views"
94
  )
95
 
96
+ # QR code sizing
97
  QR_CODE_SIZE = 512
98
  QR_CANVAS_SIZE = 768
99
  QR_PADDING = (QR_CANVAS_SIZE - QR_CODE_SIZE) // 2 # 128px
 
105
 
106
  def __init__(self, path: str = ""):
107
  """Load models on endpoint startup."""
108
+ logger.info("Loading QR Art Generator pipeline v9 (Monster + Brightness)...")
109
  start = time.time()
110
 
111
  device = "cuda" if torch.cuda.is_available() else "cpu"
112
  dtype = torch.float16 if device == "cuda" else torch.float32
113
 
114
+ # Load QR Monster ControlNet v2 (structure)
115
+ logger.info("Loading QR Monster ControlNet v2...")
116
+ monster_cn = ControlNetModel.from_pretrained(
117
  "monster-labs/control_v1p_sd15_qrcode_monster",
118
  subfolder="v2",
119
  torch_dtype=dtype,
120
  )
121
 
122
+ # Load Brightness ControlNet (contrast enforcement)
123
+ logger.info("Loading IoC Lab Brightness ControlNet...")
124
+ brightness_cn = ControlNetModel.from_pretrained(
125
+ "ioclab/control_v1p_sd15_brightness",
126
+ torch_dtype=dtype,
127
+ )
128
+
129
+ # Dual ControlNet: Monster (QR) + Brightness (contrast)
130
+ multi_controlnet = MultiControlNetModel([monster_cn, brightness_cn])
131
+
132
+ # Load SD 1.5 txt2img pipeline
133
  self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
134
  "SG161222/Realistic_Vision_V5.1_noVAE",
135
+ controlnet=multi_controlnet,
136
  torch_dtype=dtype,
137
  safety_checker=None,
138
  requires_safety_checker=False,
139
  )
140
 
141
+ # CRITICAL: DPM++ 2M SDE Karras
142
  self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
143
  self.pipe.scheduler.config,
144
  use_karras_sigmas=True,
145
  algorithm_type="sde-dpmsolver++",
146
  )
147
 
 
148
  self.pipe.to(device)
149
 
150
  if device == "cuda":
 
157
  self.device = device
158
  self.dtype = dtype
159
  elapsed = time.time() - start
160
+ logger.info(f"Pipeline v9 loaded in {elapsed:.1f}s on {device}")
161
 
162
  def _prepare_qr_conditioning(self, qr_image: Image.Image) -> Image.Image:
163
+ """Prepare QR code as ControlNet conditioning image."""
 
 
 
 
 
 
 
 
 
164
  qr_array = np.array(qr_image)
165
  white_mask = np.all(qr_array > 200, axis=2)
166
  if np.sum(white_mask) > 0:
 
168
  qr_array[white_mask] = [128, 128, 128]
169
  qr_image = Image.fromarray(qr_array)
170
 
 
171
  qr_resized = qr_image.resize(
172
  (QR_CODE_SIZE, QR_CODE_SIZE), Image.LANCZOS
173
  )
174
 
 
175
  canvas = Image.new("RGB", (QR_CANVAS_SIZE, QR_CANVAS_SIZE), (128, 128, 128))
176
  canvas.paste(qr_resized, (QR_PADDING, QR_PADDING))
 
 
177
  canvas = canvas.filter(ImageFilter.GaussianBlur(radius=QR_BLUR_SIGMA))
178
 
179
  return canvas
 
192
  "seed": int, # Optional - -1 for random
193
  "width": int, # Optional - default 768
194
  "height": int, # Optional - default 768
195
+ "controlnet_scale": float, # Optional - Monster weight
196
+ "brightness_scale": float, # Optional - Brightness weight
197
+ "cn_start": float, # Optional - Monster start
198
+ "cn_end": float, # Optional - Monster end
199
  }
200
  }
201
  """
 
211
  if not qr_b64:
212
  return {"error": "qr_code_image (base64 PNG) is required"}
213
 
 
214
  try:
215
  qr_image = Image.open(io.BytesIO(base64.b64decode(qr_b64))).convert("RGB")
216
  except Exception as e:
217
  return {"error": f"Failed to decode qr_code_image: {e}"}
218
 
 
219
  qr_conditioning = self._prepare_qr_conditioning(qr_image)
220
 
221
  # Resolve parameters
 
227
  width = inputs.get("width", QR_CANVAS_SIZE)
228
  height = inputs.get("height", QR_CANVAS_SIZE)
229
 
230
+ # Dual CN weights (override-able)
231
+ monster_weight = inputs.get("controlnet_scale",
232
+ inputs.get("unit1_weight", DEFAULT_MONSTER_WEIGHT))
233
+ brightness_weight = inputs.get("brightness_scale",
234
+ inputs.get("unit2_weight", DEFAULT_BRIGHTNESS_WEIGHT))
235
 
236
+ monster_start = inputs.get("cn_start", MONSTER_START)
237
+ monster_end = inputs.get("cn_end", MONSTER_END)
 
238
 
 
239
  enhanced_prompt = f"{prompt}, {QUALITY_TAGS}"
240
 
 
241
  seed = inputs.get("seed", -1)
242
  if seed == -1:
243
  seed = torch.Generator(device=self.device).seed()
 
245
  generator = torch.Generator(device=self.device).manual_seed(seed)
246
 
247
  logger.info(
248
+ f"Generating: monster={monster_weight}@{monster_start}-{monster_end} "
249
+ f"brightness={brightness_weight}@{BRIGHTNESS_START}-{BRIGHTNESS_END} "
250
  f"cfg={cfg} steps={steps} category={category}"
251
  )
252
 
253
+ # Same QR image for both: Monster reads structure, Brightness reads contrast
254
  result = self.pipe(
255
  prompt=enhanced_prompt,
256
  negative_prompt=negative_prompt,
257
+ image=[qr_conditioning, qr_conditioning],
258
  width=width,
259
  height=height,
260
  guidance_scale=cfg,
261
+ controlnet_conditioning_scale=[monster_weight, brightness_weight],
262
+ control_guidance_start=[monster_start, BRIGHTNESS_START],
263
+ control_guidance_end=[monster_end, BRIGHTNESS_END],
264
  num_inference_steps=steps,
265
  generator=generator,
266
  )
267
  art_image = result.images[0]
268
 
 
269
  buf = io.BytesIO()
270
  art_image.save(buf, format="PNG")
271
  result_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
 
276
  "image": result_b64,
277
  "seed": seed,
278
  "parameters": {
279
+ "pipeline": "dual-cn-v9",
280
  "category": category,
281
+ "monster_weight": monster_weight,
282
+ "monster_timing": f"{monster_start}-{monster_end}",
283
+ "brightness_weight": brightness_weight,
284
+ "brightness_timing": f"{BRIGHTNESS_START}-{BRIGHTNESS_END}",
285
  "guidance_scale": cfg,
286
  "steps": steps,
287
  "scheduler": "DPM++ 2M SDE Karras",