primerz commited on
Commit
d4170e9
·
verified ·
1 Parent(s): 23acfdf

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +27 -42
models.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
  Model loading and initialization for Pixagram AI Pixel Art Generator
3
- UPDATED VERSION with proper InstantID pipeline integration
4
  """
5
  import torch
6
  import time
@@ -9,20 +9,19 @@ from diffusers import (
9
  AutoencoderKL,
10
  LCMScheduler
11
  )
12
- from transformers import CLIPVisionModelWithProjection
13
  from insightface.app import FaceAnalysis
14
  from controlnet_aux import ZoeDetector
15
  from huggingface_hub import hf_hub_download
16
  from compel import Compel, ReturnedEmbeddingsType
17
 
18
- # Use InstantID pipeline (replaces manual IP-Adapter setup)
19
  from pipeline_stable_diffusion_xl_instantid_img2img import (
20
  StableDiffusionXLInstantIDImg2ImgPipeline
21
  )
22
 
23
  from config import (
24
  device, dtype, MODEL_REPO, MODEL_FILES, HUGGINGFACE_TOKEN,
25
- FACE_DETECTION_CONFIG, CLIP_SKIP, DOWNLOAD_CONFIG, INSTANTID_CONFIG
26
  )
27
 
28
 
@@ -66,7 +65,7 @@ def load_face_analysis():
66
  try:
67
  face_app = FaceAnalysis(
68
  name=FACE_DETECTION_CONFIG['model_name'],
69
- root=FACE_DETECTION_CONFIG.get('root', './models/insightface'),
70
  providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
71
  )
72
  face_app.prepare(
@@ -95,34 +94,24 @@ def load_depth_detector():
95
 
96
  def load_controlnets():
97
  """
98
- Load both ControlNets for InstantID pipeline.
99
- Returns tuple: (identitynet, depthnet)
100
- Both are required for proper face preservation.
101
  """
102
  print("Loading InstantID ControlNet...")
103
- try:
104
- controlnet_instantid = ControlNetModel.from_pretrained(
105
- INSTANTID_CONFIG['repo'],
106
- subfolder=INSTANTID_CONFIG['controlnet_subfolder'],
107
- torch_dtype=dtype
108
- ).to(device)
109
- print(" [OK] InstantID ControlNet loaded")
110
- except Exception as e:
111
- print(f" [ERROR] InstantID ControlNet failed: {e}")
112
- raise
113
 
114
  print("Loading Zoe Depth ControlNet...")
115
- try:
116
- controlnet_depth = ControlNetModel.from_pretrained(
117
- "diffusers/controlnet-zoe-depth-sdxl-1.0",
118
- torch_dtype=dtype
119
- ).to(device)
120
- print(" [OK] Zoe Depth ControlNet loaded")
121
- except Exception as e:
122
- print(f" [ERROR] Depth ControlNet failed: {e}")
123
- raise
124
 
125
- # Return in order: InstantID first, Depth second
126
  return controlnet_instantid, controlnet_depth
127
 
128
 
@@ -138,23 +127,21 @@ def load_sdxl_pipeline(controlnets):
138
  # Use InstantID-enabled pipeline
139
  pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_single_file(
140
  model_path,
141
- controlnet=controlnets, # MUST be list of 2 ControlNets
142
  torch_dtype=dtype,
143
  use_safetensors=True
144
  ).to(device)
145
 
146
- # Load IP-Adapter weights
147
  print("Loading IP-Adapter for InstantID...")
148
  ip_adapter_path = download_model_with_retry(
149
- INSTANTID_CONFIG['repo'],
150
- INSTANTID_CONFIG['ip_adapter_file']
151
  )
152
  pipe.load_ip_adapter_instantid(ip_adapter_path)
153
- pipe.set_ip_adapter_scale(INSTANTID_CONFIG['default_ip_scale'])
154
 
155
  print(" [OK] InstantID pipeline loaded successfully")
156
- print(f" - IP-Adapter scale: {INSTANTID_CONFIG['default_ip_scale']}")
157
- print(f" - ControlNets: InstantID + Depth")
158
  return pipe, True
159
 
160
  except Exception as e:
@@ -163,7 +150,7 @@ def load_sdxl_pipeline(controlnets):
163
  traceback.print_exc()
164
 
165
  # Fallback to standard pipeline
166
- print(" Falling back to standard SDXL pipeline (no face preservation)")
167
  from diffusers import StableDiffusionXLControlNetImg2ImgPipeline
168
  pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
169
  "stabilityai/stable-diffusion-xl-base-1.0",
@@ -213,7 +200,6 @@ def setup_scheduler(pipe):
213
 
214
  def optimize_pipeline(pipe):
215
  """Apply optimizations to pipeline."""
216
- # Try to enable xformers
217
  if device == "cuda":
218
  try:
219
  pipe.enable_xformers_memory_efficient_attention()
@@ -229,7 +215,7 @@ def load_caption_model():
229
  """
230
  print("Loading caption model...")
231
 
232
- # Try GIT-Large first (good balance of quality and compatibility)
233
  try:
234
  from transformers import AutoProcessor, AutoModelForCausalLM
235
 
@@ -239,7 +225,7 @@ def load_caption_model():
239
  "microsoft/git-large-coco",
240
  torch_dtype=dtype
241
  ).to(device)
242
- print(" [OK] GIT-Large model loaded (produces detailed captions)")
243
  return caption_processor, caption_model, True, 'git'
244
  except Exception as e1:
245
  print(f" [INFO] GIT-Large not available: {e1}")
@@ -254,11 +240,10 @@ def load_caption_model():
254
  "Salesforce/blip-image-captioning-base",
255
  torch_dtype=dtype
256
  ).to(device)
257
- print(" [OK] BLIP base model loaded (standard captions)")
258
  return caption_processor, caption_model, True, 'blip'
259
  except Exception as e2:
260
  print(f" [WARNING] Caption models not available: {e2}")
261
- print(" Caption generation will be disabled")
262
  return None, None, False, 'none'
263
 
264
 
@@ -268,4 +253,4 @@ def set_clip_skip(pipe):
268
  print(f" [OK] CLIP skip set to {CLIP_SKIP}")
269
 
270
 
271
- print("[OK] Model loading functions ready (InstantID pipeline)")
 
1
  """
2
  Model loading and initialization for Pixagram AI Pixel Art Generator
3
+ UPDATED VERSION with proper InstantID pipeline support
4
  """
5
  import torch
6
  import time
 
9
  AutoencoderKL,
10
  LCMScheduler
11
  )
 
12
  from insightface.app import FaceAnalysis
13
  from controlnet_aux import ZoeDetector
14
  from huggingface_hub import hf_hub_download
15
  from compel import Compel, ReturnedEmbeddingsType
16
 
17
+ # Use InstantID pipeline
18
  from pipeline_stable_diffusion_xl_instantid_img2img import (
19
  StableDiffusionXLInstantIDImg2ImgPipeline
20
  )
21
 
22
  from config import (
23
  device, dtype, MODEL_REPO, MODEL_FILES, HUGGINGFACE_TOKEN,
24
+ FACE_DETECTION_CONFIG, CLIP_SKIP, DOWNLOAD_CONFIG
25
  )
26
 
27
 
 
65
  try:
66
  face_app = FaceAnalysis(
67
  name=FACE_DETECTION_CONFIG['model_name'],
68
+ root='./models/insightface',
69
  providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
70
  )
71
  face_app.prepare(
 
94
 
95
  def load_controlnets():
96
  """
97
+ Load ControlNets for InstantID pipeline.
98
+ Returns both ControlNets (InstantID first, then Depth).
 
99
  """
100
  print("Loading InstantID ControlNet...")
101
+ controlnet_instantid = ControlNetModel.from_pretrained(
102
+ "InstantX/InstantID",
103
+ subfolder="ControlNetModel",
104
+ torch_dtype=dtype
105
+ ).to(device)
106
+ print(" [OK] InstantID ControlNet loaded")
 
 
 
 
107
 
108
  print("Loading Zoe Depth ControlNet...")
109
+ controlnet_depth = ControlNetModel.from_pretrained(
110
+ "diffusers/controlnet-zoe-depth-sdxl-1.0",
111
+ torch_dtype=dtype
112
+ ).to(device)
113
+ print(" [OK] Zoe Depth ControlNet loaded")
 
 
 
 
114
 
 
115
  return controlnet_instantid, controlnet_depth
116
 
117
 
 
127
  # Use InstantID-enabled pipeline
128
  pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_single_file(
129
  model_path,
130
+ controlnet=controlnets,
131
  torch_dtype=dtype,
132
  use_safetensors=True
133
  ).to(device)
134
 
135
+ # Load IP-Adapter weights for InstantID
136
  print("Loading IP-Adapter for InstantID...")
137
  ip_adapter_path = download_model_with_retry(
138
+ "InstantX/InstantID",
139
+ "ip-adapter.bin"
140
  )
141
  pipe.load_ip_adapter_instantid(ip_adapter_path)
142
+ pipe.set_ip_adapter_scale(0.8) # Default scale
143
 
144
  print(" [OK] InstantID pipeline loaded successfully")
 
 
145
  return pipe, True
146
 
147
  except Exception as e:
 
150
  traceback.print_exc()
151
 
152
  # Fallback to standard pipeline
153
+ print(" Falling back to standard SDXL pipeline (no InstantID)")
154
  from diffusers import StableDiffusionXLControlNetImg2ImgPipeline
155
  pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
156
  "stabilityai/stable-diffusion-xl-base-1.0",
 
200
 
201
  def optimize_pipeline(pipe):
202
  """Apply optimizations to pipeline."""
 
203
  if device == "cuda":
204
  try:
205
  pipe.enable_xformers_memory_efficient_attention()
 
215
  """
216
  print("Loading caption model...")
217
 
218
+ # Try GIT-Large first
219
  try:
220
  from transformers import AutoProcessor, AutoModelForCausalLM
221
 
 
225
  "microsoft/git-large-coco",
226
  torch_dtype=dtype
227
  ).to(device)
228
+ print(" [OK] GIT-Large model loaded")
229
  return caption_processor, caption_model, True, 'git'
230
  except Exception as e1:
231
  print(f" [INFO] GIT-Large not available: {e1}")
 
240
  "Salesforce/blip-image-captioning-base",
241
  torch_dtype=dtype
242
  ).to(device)
243
+ print(" [OK] BLIP base model loaded")
244
  return caption_processor, caption_model, True, 'blip'
245
  except Exception as e2:
246
  print(f" [WARNING] Caption models not available: {e2}")
 
247
  return None, None, False, 'none'
248
 
249
 
 
253
  print(f" [OK] CLIP skip set to {CLIP_SKIP}")
254
 
255
 
256
+ print("[OK] Model loading functions ready")