Qrverse commited on
Commit
4ccef71
Β·
verified Β·
1 Parent(s): 7e18d22

v7: stacked ControlNet (1.60+1.20=2.80) matching ComfyUI gold standard

Browse files
Files changed (1) hide show
  1. handler.py +87 -63
handler.py CHANGED
@@ -1,23 +1,29 @@
1
  """
2
- QR-Verse AI Art Generator β€” HuggingFace Inference Endpoint Handler v6
3
 
4
- Matched to proven ComfyUI v6 gold-standard pipeline:
 
 
 
 
 
 
 
 
 
 
 
 
5
  - DPM++ 2M SDE Karras sampler (Monster Labs recommended)
6
- - ControlNet timing 0.05β†’0.85 (let composition form, then blend)
7
  - QR code 512px centered in 768px canvas with 128px gray padding
8
- - Pre-blur QR with Gaussian sigma=0.5 for smoother ControlNet integration
9
- - CN weight 1.25-1.50 (NOT 2.5-3.0 β€” that destroys art quality)
10
  - CFG 7.5, steps 40
11
  - Quality tags appended to prompt
12
 
13
  Models:
14
  - Checkpoint: SG161222/Realistic_Vision_V5.1_noVAE (SD 1.5)
15
  - ControlNet: monster-labs/control_v1p_sd15_qrcode_monster (v2)
16
-
17
- Key insight: The gold QR art images were generated at CN=1.25-1.50 with
18
- DPM++ 2M SDE Karras + the 0.05β†’0.85 guidance window. Higher CN (2.0+)
19
- destroys art quality without improving scannability when the QR code is
20
- properly sized (512 in 768) and pre-blurred.
21
  """
22
 
23
  import base64
@@ -32,44 +38,54 @@ from diffusers import (
32
  ControlNetModel,
33
  StableDiffusionControlNetPipeline,
34
  DPMSolverMultistepScheduler,
 
35
  )
36
  from PIL import Image, ImageFilter
37
 
38
  logger = logging.getLogger(__name__)
39
 
40
  # ---------------------------------------------------------------------------
41
- # Category parameter presets β€” matched to ComfyUI v6 config.py CATEGORY_SCALE
42
  # ---------------------------------------------------------------------------
43
- # These values produce gold-standard QR art at CN=1.25-1.50.
44
- # The 0.05β†’0.85 control_guidance window gives ControlNet influence during
45
- # the middle 80% of denoising β€” composition forms first, details blend last.
 
 
 
 
 
 
 
 
 
46
 
 
 
 
47
  CATEGORY_PARAMS = {
48
- # Photorealistic scenes (RealisticVision V5.1)
49
- "food": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
50
- "luxury": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
51
- "wedding": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
52
- "sports": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
53
- "restaurant": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
54
- "retail": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
55
- "professional": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
56
- "real_estate": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
57
- "architecture": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
58
- "nature": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
59
- "world_wonders": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
60
- "medieval": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
61
- # Social / general β€” slightly higher weight
62
- "social": {"cn_weight": 1.50, "cfg": 7.5, "steps": 40},
63
- "tech": {"cn_weight": 1.50, "cfg": 7.5, "steps": 40},
64
- "seasonal": {"cn_weight": 1.45, "cfg": 7.5, "steps": 40},
65
- # Default fallback
66
- "default": {"cn_weight": 1.50, "cfg": 7.5, "steps": 40},
67
  }
68
 
69
  # Quality tags β€” appended to every prompt (from ComfyUI gold config)
70
  QUALITY_TAGS = (
71
  "highly detailed, 4k, high resolution, sharp focus, "
72
- "masterpiece, best quality, ultra detailed, 8k, professional"
73
  )
74
 
75
  # QR structure tags β€” help model maintain scannable QR pattern
@@ -88,7 +104,7 @@ DEFAULT_NEGATIVE = (
88
  QR_CODE_SIZE = 512
89
  QR_CANVAS_SIZE = 768
90
  QR_PADDING = (QR_CANVAS_SIZE - QR_CODE_SIZE) // 2 # 128px
91
- QR_BLUR_SIGMA = 0.5 # Pre-blur for smoother ControlNet integration
92
 
93
 
94
  class EndpointHandler:
@@ -96,31 +112,33 @@ class EndpointHandler:
96
 
97
  def __init__(self, path: str = ""):
98
  """Load models on endpoint startup."""
99
- logger.info("Loading QR Art Generator pipeline v6 (ComfyUI-matched)...")
100
  start = time.time()
101
 
102
  device = "cuda" if torch.cuda.is_available() else "cpu"
103
  dtype = torch.float16 if device == "cuda" else torch.float32
104
 
105
  # Load QR Monster ControlNet v2
106
- self.controlnet = ControlNetModel.from_pretrained(
107
  "monster-labs/control_v1p_sd15_qrcode_monster",
108
  subfolder="v2",
109
  torch_dtype=dtype,
110
  )
111
 
112
- # Load SD 1.5 txt2img + ControlNet pipeline
 
 
 
 
113
  self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
114
  "SG161222/Realistic_Vision_V5.1_noVAE",
115
- controlnet=self.controlnet,
116
  torch_dtype=dtype,
117
  safety_checker=None,
118
  requires_safety_checker=False,
119
  )
120
 
121
- # CRITICAL: Use DPM++ 2M SDE Karras (Monster Labs recommended)
122
- # This is what the gold ComfyUI pipeline uses.
123
- # UniPCMultistep produces different noise patterns.
124
  self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
125
  self.pipe.scheduler.config,
126
  use_karras_sigmas=True,
@@ -140,7 +158,7 @@ class EndpointHandler:
140
  self.device = device
141
  self.dtype = dtype
142
  elapsed = time.time() - start
143
- logger.info(f"Pipeline v6 loaded in {elapsed:.1f}s on {device}")
144
 
145
  def _prepare_qr_conditioning(self, qr_image: Image.Image) -> Image.Image:
146
  """
@@ -183,14 +201,13 @@ class EndpointHandler:
183
  "inputs": {
184
  "prompt": str, # Required
185
  "negative_prompt": str, # Optional
186
- "qr_code_image": str, # Required - base64 PNG of QR code
187
- "category": str, # Optional - maps to CATEGORY_PARAMS
188
  "seed": int, # Optional - -1 for random
189
  "width": int, # Optional - default 768
190
  "height": int, # Optional - default 768
191
- "controlnet_scale": float, # Optional - override cn_weight
192
- "guidance_scale": float, # Optional - override cfg
193
- "num_inference_steps": int, # Optional - override steps
194
  }
195
  }
196
  """
@@ -219,13 +236,16 @@ class EndpointHandler:
219
  category = inputs.get("category", "default")
220
  params = CATEGORY_PARAMS.get(category, CATEGORY_PARAMS["default"])
221
 
222
- cn_weight = inputs.get("controlnet_scale", params["cn_weight"])
223
  cfg = inputs.get("guidance_scale", params["cfg"])
224
  steps = inputs.get("num_inference_steps", params["steps"])
225
  width = inputs.get("width", QR_CANVAS_SIZE)
226
  height = inputs.get("height", QR_CANVAS_SIZE)
227
 
228
- # Enhance prompt with quality + QR tags (like gold ComfyUI pipeline)
 
 
 
 
229
  enhanced_prompt = f"{prompt}, {QUALITY_TAGS}, {QR_TAGS}"
230
 
231
  # Seed
@@ -235,22 +255,25 @@ class EndpointHandler:
235
 
236
  generator = torch.Generator(device=self.device).manual_seed(seed)
237
 
238
- # ---- Single-pass txt2img + ControlNet ----
239
- # Gold ComfyUI config: DPM++ 2M SDE Karras, cn 0.05β†’0.85
240
  logger.info(
241
- f"Generating: cn={cn_weight} cfg={cfg} steps={steps} "
242
- f"guidance=0.05β†’0.85 category={category}"
 
 
243
  )
 
244
  result = self.pipe(
245
  prompt=enhanced_prompt,
246
  negative_prompt=negative_prompt,
247
- image=qr_conditioning,
248
  width=width,
249
  height=height,
250
  guidance_scale=cfg,
251
- controlnet_conditioning_scale=cn_weight,
252
- control_guidance_start=0.05,
253
- control_guidance_end=0.85,
254
  num_inference_steps=steps,
255
  generator=generator,
256
  )
@@ -265,16 +288,17 @@ class EndpointHandler:
265
 
266
  return {
267
  "image": result_b64,
268
- "passes_run": 1,
269
  "seed": seed,
270
  "parameters": {
271
- "pipeline": "comfyui-matched-v6",
272
  "category": category,
273
- "controlnet_scale": cn_weight,
 
 
 
 
274
  "guidance_scale": cfg,
275
  "steps": steps,
276
- "control_guidance_start": 0.05,
277
- "control_guidance_end": 0.85,
278
  "scheduler": "DPM++ 2M SDE Karras",
279
  "qr_size": f"{QR_CODE_SIZE}-in-{QR_CANVAS_SIZE}",
280
  "qr_blur_sigma": QR_BLUR_SIGMA,
 
1
  """
2
+ QR-Verse AI Art Generator β€” HuggingFace Inference Endpoint Handler v7
3
 
4
+ Matched to proven ComfyUI v6 gold-standard pipeline.
5
+
6
+ CRITICAL DISCOVERY (v7):
7
+ ComfyUI's "masked ControlNet" workflow applies the SAME ControlNet TWICE
8
+ on the SAME QR image, stacked (chained ControlNetApplyAdvanced nodes):
9
+ - Unit 1: weight=1.60, timing 0.00β†’0.90 (marker emphasis)
10
+ - Unit 2: weight=1.20, timing 0.05β†’0.85 (data reinforcement)
11
+ Effective CN weight in overlapping range = 1.60 + 1.20 = 2.80!
12
+
13
+ v6 used CN=1.45 (single pass) β†’ QR barely visible, unscannable.
14
+ v7 uses MultiControlNetModel to replicate the stacked behavior β†’ CNβ‰ˆ2.80.
15
+
16
+ Pipeline:
17
  - DPM++ 2M SDE Karras sampler (Monster Labs recommended)
18
+ - Stacked ControlNet: same model twice, different weights/timing
19
  - QR code 512px centered in 768px canvas with 128px gray padding
20
+ - Pre-blur QR with Gaussian sigma=0.5
 
21
  - CFG 7.5, steps 40
22
  - Quality tags appended to prompt
23
 
24
  Models:
25
  - Checkpoint: SG161222/Realistic_Vision_V5.1_noVAE (SD 1.5)
26
  - ControlNet: monster-labs/control_v1p_sd15_qrcode_monster (v2)
 
 
 
 
 
27
  """
28
 
29
  import base64
 
38
  ControlNetModel,
39
  StableDiffusionControlNetPipeline,
40
  DPMSolverMultistepScheduler,
41
+ MultiControlNetModel,
42
  )
43
  from PIL import Image, ImageFilter
44
 
45
  logger = logging.getLogger(__name__)
46
 
47
  # ---------------------------------------------------------------------------
48
+ # Stacked ControlNet β€” matched to ComfyUI masked CN workflow
49
  # ---------------------------------------------------------------------------
50
+ # ComfyUI applies ControlNetApplyAdvanced TWICE on the same QR image:
51
+ # Unit 1 (markers): strength=1.60 start=0.00 end=0.90
52
+ # Unit 2 (data): strength=1.20 start=0.05 end=0.85
53
+ # These stack additively. Effective weight at steps 0.05-0.85 = 2.80.
54
+
55
+ UNIT1_WEIGHT = 1.60 # "marker" unit β€” high weight, early start, late end
56
+ UNIT1_START = 0.00
57
+ UNIT1_END = 0.90
58
+
59
+ UNIT2_WEIGHT = 1.20 # "data" unit β€” lower weight, standard timing
60
+ UNIT2_START = 0.05
61
+ UNIT2_END = 0.85
62
 
63
+ # ---------------------------------------------------------------------------
64
+ # Category parameter presets
65
+ # ---------------------------------------------------------------------------
66
  CATEGORY_PARAMS = {
67
+ "food": {"cfg": 7.5, "steps": 40},
68
+ "luxury": {"cfg": 7.5, "steps": 40},
69
+ "wedding": {"cfg": 7.5, "steps": 40},
70
+ "sports": {"cfg": 7.5, "steps": 40},
71
+ "restaurant": {"cfg": 7.5, "steps": 40},
72
+ "retail": {"cfg": 7.5, "steps": 40},
73
+ "professional": {"cfg": 7.5, "steps": 40},
74
+ "real_estate": {"cfg": 7.5, "steps": 40},
75
+ "architecture": {"cfg": 7.5, "steps": 40},
76
+ "nature": {"cfg": 7.5, "steps": 40},
77
+ "world_wonders":{"cfg": 7.5, "steps": 40},
78
+ "medieval": {"cfg": 7.5, "steps": 40},
79
+ "social": {"cfg": 7.5, "steps": 40},
80
+ "tech": {"cfg": 7.5, "steps": 40},
81
+ "seasonal": {"cfg": 7.5, "steps": 40},
82
+ "default": {"cfg": 7.5, "steps": 40},
 
 
 
83
  }
84
 
85
  # Quality tags β€” appended to every prompt (from ComfyUI gold config)
86
  QUALITY_TAGS = (
87
  "highly detailed, 4k, high resolution, sharp focus, "
88
+ "masterpiece, best quality, ultra detailed, 8k, professional, award-winning"
89
  )
90
 
91
  # QR structure tags β€” help model maintain scannable QR pattern
 
104
  QR_CODE_SIZE = 512
105
  QR_CANVAS_SIZE = 768
106
  QR_PADDING = (QR_CANVAS_SIZE - QR_CODE_SIZE) // 2 # 128px
107
+ QR_BLUR_SIGMA = 0.5
108
 
109
 
110
  class EndpointHandler:
 
112
 
113
  def __init__(self, path: str = ""):
114
  """Load models on endpoint startup."""
115
+ logger.info("Loading QR Art Generator pipeline v7 (stacked ControlNet)...")
116
  start = time.time()
117
 
118
  device = "cuda" if torch.cuda.is_available() else "cpu"
119
  dtype = torch.float16 if device == "cuda" else torch.float32
120
 
121
  # Load QR Monster ControlNet v2
122
+ controlnet = ControlNetModel.from_pretrained(
123
  "monster-labs/control_v1p_sd15_qrcode_monster",
124
  subfolder="v2",
125
  torch_dtype=dtype,
126
  )
127
 
128
+ # Create MultiControlNetModel with the SAME model twice
129
+ # This replicates ComfyUI's stacked ControlNetApplyAdvanced behavior
130
+ multi_controlnet = MultiControlNetModel([controlnet, controlnet])
131
+
132
+ # Load SD 1.5 txt2img + MultiControlNet pipeline
133
  self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
134
  "SG161222/Realistic_Vision_V5.1_noVAE",
135
+ controlnet=multi_controlnet,
136
  torch_dtype=dtype,
137
  safety_checker=None,
138
  requires_safety_checker=False,
139
  )
140
 
141
+ # CRITICAL: DPM++ 2M SDE Karras (Monster Labs recommended)
 
 
142
  self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
143
  self.pipe.scheduler.config,
144
  use_karras_sigmas=True,
 
158
  self.device = device
159
  self.dtype = dtype
160
  elapsed = time.time() - start
161
+ logger.info(f"Pipeline v7 loaded in {elapsed:.1f}s on {device}")
162
 
163
  def _prepare_qr_conditioning(self, qr_image: Image.Image) -> Image.Image:
164
  """
 
201
  "inputs": {
202
  "prompt": str, # Required
203
  "negative_prompt": str, # Optional
204
+ "qr_code_image": str, # Required - base64 PNG
205
+ "category": str, # Optional
206
  "seed": int, # Optional - -1 for random
207
  "width": int, # Optional - default 768
208
  "height": int, # Optional - default 768
209
+ "unit1_weight": float, # Optional - override
210
+ "unit2_weight": float, # Optional - override
 
211
  }
212
  }
213
  """
 
236
  category = inputs.get("category", "default")
237
  params = CATEGORY_PARAMS.get(category, CATEGORY_PARAMS["default"])
238
 
 
239
  cfg = inputs.get("guidance_scale", params["cfg"])
240
  steps = inputs.get("num_inference_steps", params["steps"])
241
  width = inputs.get("width", QR_CANVAS_SIZE)
242
  height = inputs.get("height", QR_CANVAS_SIZE)
243
 
244
+ # Stacked CN weights (override-able for testing)
245
+ u1_weight = inputs.get("unit1_weight", UNIT1_WEIGHT)
246
+ u2_weight = inputs.get("unit2_weight", UNIT2_WEIGHT)
247
+
248
+ # Enhance prompt with quality + QR tags
249
  enhanced_prompt = f"{prompt}, {QUALITY_TAGS}, {QR_TAGS}"
250
 
251
  # Seed
 
255
 
256
  generator = torch.Generator(device=self.device).manual_seed(seed)
257
 
258
+ # ---- Stacked ControlNet (same QR image twice) ----
259
+ # Replicates ComfyUI's chained ControlNetApplyAdvanced
260
  logger.info(
261
+ f"Generating: u1={u1_weight}@{UNIT1_START}-{UNIT1_END} "
262
+ f"u2={u2_weight}@{UNIT2_START}-{UNIT2_END} "
263
+ f"effective={u1_weight + u2_weight:.2f} "
264
+ f"cfg={cfg} steps={steps} category={category}"
265
  )
266
+
267
  result = self.pipe(
268
  prompt=enhanced_prompt,
269
  negative_prompt=negative_prompt,
270
+ image=[qr_conditioning, qr_conditioning],
271
  width=width,
272
  height=height,
273
  guidance_scale=cfg,
274
+ controlnet_conditioning_scale=[u1_weight, u2_weight],
275
+ control_guidance_start=[UNIT1_START, UNIT2_START],
276
+ control_guidance_end=[UNIT1_END, UNIT2_END],
277
  num_inference_steps=steps,
278
  generator=generator,
279
  )
 
288
 
289
  return {
290
  "image": result_b64,
 
291
  "seed": seed,
292
  "parameters": {
293
+ "pipeline": "stacked-cn-v7",
294
  "category": category,
295
+ "unit1_weight": u1_weight,
296
+ "unit1_timing": f"{UNIT1_START}-{UNIT1_END}",
297
+ "unit2_weight": u2_weight,
298
+ "unit2_timing": f"{UNIT2_START}-{UNIT2_END}",
299
+ "effective_cn": round(u1_weight + u2_weight, 2),
300
  "guidance_scale": cfg,
301
  "steps": steps,
 
 
302
  "scheduler": "DPM++ 2M SDE Karras",
303
  "qr_size": f"{QR_CODE_SIZE}-in-{QR_CANVAS_SIZE}",
304
  "qr_blur_sigma": QR_BLUR_SIGMA,