oliveryanzuolu commited on
Commit
834f6ad
·
verified ·
1 Parent(s): d6df1df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -74
app.py CHANGED
@@ -7,64 +7,77 @@ from PIL import Image
7
  import os
8
 
9
  # Diffusers and ControlNet imports
10
- from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
11
 
12
  # -----------------------------------------------------------------------------
13
- # 1. Configuration & Registry
14
  # -----------------------------------------------------------------------------
15
  LORA_REGISTRY = {
16
- "None (Base SD1.5)": {
17
  "repo": None,
18
  "trigger": "",
19
  "weight": 0.0
20
  },
21
- "Lego Style": {
22
  "repo": "lordjia/lelo-lego-lora-for-xl-sd1-5",
23
  "trigger": "LEGO Creator, LEGO MiniFig, ",
24
  "weight": 0.8,
25
  "file": "Lego_XL_v2.1.safetensors"
26
  },
27
- "Claymation Style": {
28
  "repo": "DoctorDiffusion/doctor-diffusion-s-claymation-style-lora",
29
  "trigger": "made-of-clay, claymation style, ",
30
- "weight": 1.0
 
31
  },
32
- "Pixel Art (SD1.5)": {
33
- "repo": "artificialguybr/pixelartredmond-1-5v-pixel-art-loras-for-sd-1-5",
34
- "trigger": "pixel art, PixArFK, ",
35
- "weight": 1.0
 
36
  }
37
  }
38
 
39
  # -----------------------------------------------------------------------------
40
  # 2. Model Initialization
41
  # -----------------------------------------------------------------------------
42
- print("Initializing Inference Pipeline...")
43
 
44
  device = "cuda" if torch.cuda.is_available() else "cpu"
45
  dtype = torch.float16 if device == "cuda" else torch.float32
46
 
47
- # Load ControlNet
 
 
 
 
 
 
48
  controlnet = ControlNetModel.from_pretrained(
49
- "lllyasviel/sd-controlnet-canny",
50
  torch_dtype=dtype,
51
  use_safetensors=True
52
  )
53
 
54
- # Load Base Stable Diffusion 1.5
55
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
56
- "stable-diffusion-v1-5/stable-diffusion-v1-5",
57
  controlnet=controlnet,
 
58
  torch_dtype=dtype,
59
  use_safetensors=True
60
  )
61
 
 
62
  pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
63
 
64
- if device == "cuda":
 
 
 
65
  pipe.to(device)
66
 
67
- print("Base Pipeline Loaded Successfully.")
68
 
69
  # -----------------------------------------------------------------------------
70
  # 3. Computer Vision Helper Functions
@@ -94,8 +107,8 @@ def generate_controlled_image(
94
  if input_image is None:
95
  raise gr.Error("Validation Error: Please upload an image first!")
96
 
97
- # 1. Preprocess Image
98
- width, height = 512, 512
99
  input_image = input_image.resize((width, height))
100
  canny_image = get_canny_image(input_image)
101
 
@@ -106,13 +119,18 @@ def generate_controlled_image(
106
  repo_id = style_config["repo"]
107
  trigger_text = style_config["trigger"]
108
  lora_weight = style_config["weight"]
 
109
 
110
  final_prompt = f"{trigger_text}{prompt}"
111
 
112
  try:
113
  if repo_id:
114
  print(f"Loading LoRA: {repo_id}")
115
- pipe.load_lora_weights(repo_id)
 
 
 
 
116
  pipe.fuse_lora(lora_scale=lora_weight)
117
  print("LoRA fused successfully.")
118
 
@@ -132,7 +150,7 @@ def generate_controlled_image(
132
  image=canny_image,
133
  num_inference_steps=int(steps),
134
  controlnet_conditioning_scale=float(controlnet_conditioning_scale),
135
- guidance_scale=7.5,
136
  generator=generator,
137
  ).images
138
  except Exception as e:
@@ -159,77 +177,48 @@ css = """
159
  .guide-text {font-size: 1.1em; color: #4a5568;}
160
  """
161
 
162
- # Example Data (Using resolve URLs)
163
  examples = [
164
  [
165
  "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_canny.png",
166
- "a colorful exotic bird sitting on a branch, detailed feathers, masterpiece",
167
- "blurry, low quality, deformed",
168
- "None (Base SD1.5)",
169
  1.0, 30, 42
170
  ],
171
  [
172
  "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_vermeer_depth.png",
173
- "portrait of a girl with a pearl earring, made of plastic blocks, interlocking bricks, toy aesthetic",
174
- "human skin, realistic, painting, blurry",
175
- "Lego Style",
176
  0.8, 30, 101
177
  ],
178
  [
179
  "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_hed.png",
180
- "a cute bird, isometric view, retro game asset, 8-bit graphics",
181
- "photorealistic, vector, high resolution, smooth",
182
- "Pixel Art (SD1.5)",
183
  1.0, 30, 202
184
  ],
185
  [
186
  "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_room_mlsd.png",
187
- "interior of a modern living room, stop motion animation, plasticine texture, fingerprint textures",
188
  "cgi, 3d render, glossy, architectural visualization",
189
- "Claymation Style",
190
  1.0, 30, 303
191
  ],
192
- [
193
- "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_human_normal.png",
194
- "a man in a suit walking, built with lego bricks, 3d render, studio lighting",
195
- "flesh, organic, fabric, realistic face",
196
- "Lego Style",
197
- 0.9, 30, 404
198
- ],
199
- [
200
- "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_human_openpose.png",
201
- "a fashion model posing, pixelated style, 16-bit color palette, arcade style",
202
- "blur, anti-aliasing, photograph",
203
- "Pixel Art (SD1.5)",
204
- 1.0, 30, 505
205
- ],
206
- [
207
- "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_vermeer_scribble.png",
208
- "classic portrait painting, whimsical clay character, soft lighting, play-doh style",
209
- "oil painting, canvas texture, flat",
210
- "Claymation Style",
211
- 1.0, 30, 606
212
- ],
213
- [
214
- "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_room_seg.png",
215
- "empty room interior, white walls, wooden floor, high quality, photorealistic",
216
- "furniture, clutter, messy, low res",
217
- "None (Base SD1.5)",
218
- 0.8, 30, 707
219
- ]
220
  ]
221
 
222
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
223
 
224
  with gr.Column(elem_id="col-container"):
225
- gr.Markdown("# 🎨 ControlNet + LoRA Style Mixer")
226
  gr.Markdown(
227
  """
228
  <p class='guide-text'>
229
- <b>Transform Structure into Style.</b><br>
230
- This pipeline uses <b>ControlNet (Canny)</b> to lock the edges of your image,
231
- and <b>LoRA</b> adapters to completely repaint the texture.
232
- Select a preset below to see the magic.
233
  </p>
234
  """
235
  )
@@ -248,22 +237,22 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
248
 
249
  negative_prompt = gr.Textbox(
250
  label="Negative Prompt",
251
- value="blurry, low quality, distorted, ugly, bad anatomy, watermark",
252
  lines=1
253
  )
254
 
255
  lora_selection = gr.Dropdown(
256
  label="Select LoRA Style",
257
  choices=list(LORA_REGISTRY.keys()),
258
- value="None (Base SD1.5)",
259
  info="Automatically injects trigger words and loads weights."
260
  )
261
 
262
  with gr.Accordion("⚙️ Advanced Settings", open=False):
263
  controlnet_conditioning_scale = gr.Slider(
264
- label="ControlNet Strength (Edge Fidelity)",
265
- minimum=0.0, maximum=2.0, value=1.0, step=0.1,
266
- info="1.0 = Follow edges strictly. Lower for more 'creative' interpretation."
267
  )
268
  steps = gr.Slider(label="Inference Steps", minimum=10, maximum=50, value=30, step=1)
269
  seed = gr.Number(label="Seed", value=42, precision=0)
@@ -273,7 +262,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
273
  # Right Column: Outputs
274
  with gr.Column(scale=1):
275
  with gr.Row():
276
- output_canny = gr.Image(label="Detected Edges (ControlNet Sees This)", type="pil")
277
  output_result = gr.Image(label="Final Stylized Image", type="pil")
278
 
279
  # Examples Section
@@ -283,7 +272,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
283
  inputs=[input_image, prompt, negative_prompt, lora_selection, controlnet_conditioning_scale, steps, seed],
284
  outputs=[output_canny, output_result],
285
  fn=generate_controlled_image,
286
- cache_examples=False # CRITICAL FIX: Set to False to prevent async loop errors
287
  )
288
 
289
  # Event Wiring
 
7
  import os
8
 
9
  # Diffusers and ControlNet imports
10
+ from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL, UniPCMultistepScheduler
11
 
12
  # -----------------------------------------------------------------------------
13
+ # 1. Configuration & Registry (SDXL Version)
14
  # -----------------------------------------------------------------------------
15
  LORA_REGISTRY = {
16
+ "None (Base SDXL)": {
17
  "repo": None,
18
  "trigger": "",
19
  "weight": 0.0
20
  },
21
+ "Lego Style XL": {
22
  "repo": "lordjia/lelo-lego-lora-for-xl-sd1-5",
23
  "trigger": "LEGO Creator, LEGO MiniFig, ",
24
  "weight": 0.8,
25
  "file": "Lego_XL_v2.1.safetensors"
26
  },
27
+ "Claymation Style XL": {
28
  "repo": "DoctorDiffusion/doctor-diffusion-s-claymation-style-lora",
29
  "trigger": "made-of-clay, claymation style, ",
30
+ "weight": 0.9,
31
+ "file": "DD-made-of-clay-XL-v2.safetensors"
32
  },
33
+ "Pixel Art XL": {
34
+ "repo": "nerijs/pixel-art-xl",
35
+ "trigger": "pixel art, ",
36
+ "weight": 1.0,
37
+ "file": "pixel-art-xl.safetensors"
38
  }
39
  }
40
 
41
  # -----------------------------------------------------------------------------
42
  # 2. Model Initialization
43
  # -----------------------------------------------------------------------------
44
+ print("Initializing SDXL Inference Pipeline...")
45
 
46
  device = "cuda" if torch.cuda.is_available() else "cpu"
47
  dtype = torch.float16 if device == "cuda" else torch.float32
48
 
49
+ # 1. Load VAE (Critical for SDXL fp16 stability)
50
+ vae = AutoencoderKL.from_pretrained(
51
+ "madebyollin/sdxl-vae-fp16-fix",
52
+ torch_dtype=dtype
53
+ )
54
+
55
+ # 2. Load ControlNet (Must be SDXL version)
56
  controlnet = ControlNetModel.from_pretrained(
57
+ "diffusers/controlnet-canny-sdxl-1.0",
58
  torch_dtype=dtype,
59
  use_safetensors=True
60
  )
61
 
62
+ # 3. Load Base SDXL
63
+ pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
64
+ "stabilityai/stable-diffusion-xl-base-1.0",
65
  controlnet=controlnet,
66
+ vae=vae,
67
  torch_dtype=dtype,
68
  use_safetensors=True
69
  )
70
 
71
+ # Optimization
72
  pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
73
 
74
+ try:
75
+ pipe.enable_model_cpu_offload()
76
+ except Exception as e:
77
+ print(f"Warning: CPU offload failed, moving to device manually. {e}")
78
  pipe.to(device)
79
 
80
+ print("SDXL Pipeline Loaded Successfully.")
81
 
82
  # -----------------------------------------------------------------------------
83
  # 3. Computer Vision Helper Functions
 
107
  if input_image is None:
108
  raise gr.Error("Validation Error: Please upload an image first!")
109
 
110
+ # 1. Preprocess Image (SDXL works best at 1024x1024)
111
+ width, height = 1024, 1024
112
  input_image = input_image.resize((width, height))
113
  canny_image = get_canny_image(input_image)
114
 
 
119
  repo_id = style_config["repo"]
120
  trigger_text = style_config["trigger"]
121
  lora_weight = style_config["weight"]
122
+ lora_file = style_config.get("file", None)
123
 
124
  final_prompt = f"{trigger_text}{prompt}"
125
 
126
  try:
127
  if repo_id:
128
  print(f"Loading LoRA: {repo_id}")
129
+ if lora_file:
130
+ pipe.load_lora_weights(repo_id, weight_name=lora_file)
131
+ else:
132
+ pipe.load_lora_weights(repo_id)
133
+
134
  pipe.fuse_lora(lora_scale=lora_weight)
135
  print("LoRA fused successfully.")
136
 
 
150
  image=canny_image,
151
  num_inference_steps=int(steps),
152
  controlnet_conditioning_scale=float(controlnet_conditioning_scale),
153
+ guidance_scale=7.0, # SDXL usually prefers slightly lower CFG than SD1.5
154
  generator=generator,
155
  ).images
156
  except Exception as e:
 
177
  .guide-text {font-size: 1.1em; color: #4a5568;}
178
  """
179
 
180
+ # Example Data (Updated for SDXL context)
181
  examples = [
182
  [
183
  "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_canny.png",
184
+ "a colorful exotic bird sitting on a branch, detailed feathers, masterpiece, 8k",
185
+ "blurry, low quality, deformed, illustration",
186
+ "None (Base SDXL)",
187
  1.0, 30, 42
188
  ],
189
  [
190
  "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_vermeer_depth.png",
191
+ "portrait of a girl with a pearl earring, made of plastic blocks, interlocking bricks, toy aesthetic, macro photography",
192
+ "human skin, realistic, painting, blurry, drawing",
193
+ "Lego Style XL",
194
  0.8, 30, 101
195
  ],
196
  [
197
  "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_hed.png",
198
+ "pixel art, a cute bird, isometric view, retro game asset, 8-bit graphics",
199
+ "photorealistic, vector, high resolution, smooth, 3d render",
200
+ "Pixel Art XL",
201
  1.0, 30, 202
202
  ],
203
  [
204
  "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_room_mlsd.png",
205
+ "made-of-clay, claymation style, interior of a modern living room, stop motion animation, plasticine texture, fingerprint textures",
206
  "cgi, 3d render, glossy, architectural visualization",
207
+ "Claymation Style XL",
208
  1.0, 30, 303
209
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  ]
211
 
212
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
213
 
214
  with gr.Column(elem_id="col-container"):
215
+ gr.Markdown("# 🎨 SDXL ControlNet + LoRA Mixer")
216
  gr.Markdown(
217
  """
218
  <p class='guide-text'>
219
+ <b>SDXL Edition.</b><br>
220
+ Higher resolution, better prompt adherence, and native LoRA support.
221
+ Uses <b>ControlNet Canny (SDXL)</b> for structure.
 
222
  </p>
223
  """
224
  )
 
237
 
238
  negative_prompt = gr.Textbox(
239
  label="Negative Prompt",
240
+ value="blurry, low quality, distorted, ugly, bad anatomy, watermark, text",
241
  lines=1
242
  )
243
 
244
  lora_selection = gr.Dropdown(
245
  label="Select LoRA Style",
246
  choices=list(LORA_REGISTRY.keys()),
247
+ value="None (Base SDXL)",
248
  info="Automatically injects trigger words and loads weights."
249
  )
250
 
251
  with gr.Accordion("⚙️ Advanced Settings", open=False):
252
  controlnet_conditioning_scale = gr.Slider(
253
+ label="ControlNet Strength",
254
+ minimum=0.0, maximum=1.5, value=0.8, step=0.1,
255
+ info="SDXL ControlNet is strong. 0.8 is usually a good sweet spot."
256
  )
257
  steps = gr.Slider(label="Inference Steps", minimum=10, maximum=50, value=30, step=1)
258
  seed = gr.Number(label="Seed", value=42, precision=0)
 
262
  # Right Column: Outputs
263
  with gr.Column(scale=1):
264
  with gr.Row():
265
+ output_canny = gr.Image(label="Detected Edges", type="pil")
266
  output_result = gr.Image(label="Final Stylized Image", type="pil")
267
 
268
  # Examples Section
 
272
  inputs=[input_image, prompt, negative_prompt, lora_selection, controlnet_conditioning_scale, steps, seed],
273
  outputs=[output_canny, output_result],
274
  fn=generate_controlled_image,
275
+ cache_examples=False # Keep False for stability
276
  )
277
 
278
  # Event Wiring