primerz commited on
Commit
cec85e0
·
verified ·
1 Parent(s): 58466f2

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +38 -36
models.py CHANGED
@@ -70,8 +70,10 @@ def load_face_analysis():
70
 
71
  try:
72
  antelope_download = snapshot_download(repo_id="DIAMONIK7777/antelopev2", local_dir="/data/models/antelopev2")
73
- face_app = FaceAnalysis(name='antelopev2', root='/data', providers=['CPUExecutionProvider']) # Changed from CUDA to CPU
 
74
  face_app.prepare(ctx_id=0, det_size=(640, 640))
 
75
  return face_app, True
76
 
77
  except Exception as e:
@@ -91,9 +93,10 @@ def load_depth_detector():
91
  # Try LeresDetector first (best quality)
92
  try:
93
  print(" Attempting LeresDetector (highest quality)...")
 
94
  leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
95
- # leres_depth.to(device)
96
- print(" [OK] LeresDetector loaded successfully")
97
  return leres_depth, 'leres', True
98
  except Exception as e:
99
  print(f" [INFO] LeresDetector not available: {e}")
@@ -101,9 +104,10 @@ def load_depth_detector():
101
  # Fallback to ZoeDetector
102
  try:
103
  print(" Attempting ZoeDetector (fallback #1)...")
 
104
  zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
105
- # zoe_depth.to(device)
106
- print(" [OK] ZoeDetector loaded successfully")
107
  return zoe_depth, 'zoe', True
108
  except Exception as e:
109
  print(f" [INFO] ZoeDetector not available: {e}")
@@ -111,9 +115,10 @@ def load_depth_detector():
111
  # Final fallback to MidasDetector
112
  try:
113
  print(" Attempting MidasDetector (fallback #2)...")
 
114
  midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
115
- # midas_depth.to(device)
116
- print(" [OK] MidasDetector loaded successfully")
117
  return midas_depth, 'midas', True
118
  except Exception as e:
119
  print(f" [WARNING] MidasDetector not available: {e}")
@@ -126,9 +131,10 @@ def load_openpose_detector():
126
  """Load OpenPose detector."""
127
  print("Loading OpenPose detector...")
128
  try:
 
129
  openpose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
130
- # openpose.to(device)
131
- print(" [OK] OpenPose loaded successfully")
132
  return openpose, True
133
  except Exception as e:
134
  print(f" [WARNING] OpenPose not available: {e}")
@@ -151,20 +157,22 @@ def load_mediapipe_face_detector():
151
  def load_controlnets():
152
  """Load ControlNet models."""
153
  print("Loading ControlNet Zoe Depth model...")
 
154
  controlnet_depth = ControlNetModel.from_pretrained(
155
  "xinsir/controlnet-depth-sdxl-1.0",
156
  torch_dtype=dtype
157
- )#.to(device)
158
- print(" [OK] ControlNet Depth loaded")
159
 
160
  # --- NEW: Load OpenPose ControlNet ---
161
  print("Loading ControlNet OpenPose model...")
162
  try:
 
163
  controlnet_openpose = ControlNetModel.from_pretrained(
164
  "xinsir/controlnet-openpose-sdxl-1.0",
165
  torch_dtype=dtype
166
- )#.to(device)
167
- print(" [OK] ControlNet OpenPose loaded")
168
  except Exception as e:
169
  print(f" [WARNING] ControlNet OpenPose not available: {e}")
170
  controlnet_openpose = None
@@ -172,12 +180,13 @@ def load_controlnets():
172
 
173
  print("Loading InstantID ControlNet...")
174
  try:
 
175
  controlnet_instantid = ControlNetModel.from_pretrained(
176
  "InstantX/InstantID",
177
  subfolder="ControlNetModel",
178
  torch_dtype=dtype
179
- )#.to(device)
180
- print(" [OK] InstantID ControlNet loaded successfully")
181
  # Return all three models
182
  return controlnet_depth, controlnet_instantid, controlnet_openpose, True
183
  except Exception as e:
@@ -190,12 +199,13 @@ def load_image_encoder():
190
  """Load CLIP Image Encoder for IP-Adapter."""
191
  print("Loading CLIP Image Encoder for IP-Adapter...")
192
  try:
 
193
  image_encoder = CLIPVisionModelWithProjection.from_pretrained(
194
  "h94/IP-Adapter",
195
  subfolder="models/image_encoder",
196
  torch_dtype=dtype
197
- )#.to(device)
198
- print(" [OK] CLIP Image Encoder loaded successfully")
199
  return image_encoder
200
  except Exception as e:
201
  print(f" [ERROR] Could not load image encoder: {e}")
@@ -213,7 +223,7 @@ def load_sdxl_pipeline(controlnets):
213
  controlnet=controlnets,
214
  torch_dtype=dtype,
215
  use_safetensors=True
216
- ).to(device)
217
  print(" [OK] Custom checkpoint loaded successfully (VAE bundled)")
218
  return pipe, True
219
  except Exception as e:
@@ -224,7 +234,7 @@ def load_sdxl_pipeline(controlnets):
224
  controlnet=controlnets,
225
  torch_dtype=dtype,
226
  use_safetensors=True
227
- ).to(device)
228
  return pipe, False
229
 
230
 
@@ -399,22 +409,12 @@ def setup_scheduler(pipe):
399
  def optimize_pipeline(pipe):
400
  """Apply optimizations to pipeline."""
401
 
402
- # Try to enable xformers
403
- if device == "cuda":
404
- try:
405
- pipe.enable_xformers_memory_efficient_attention()
406
- print(" [OK] xformers enabled")
407
- except Exception as e:
408
- print(f" [INFO] xformers not available: {e}")
409
-
410
- # Enable CPU offloading for VRAM-constrained environments
411
- print(" [OK] Enabling model CPU offloading...")
412
- pipe.enable_model_cpu_offload()
413
 
414
  # Try to enable xformers
415
  if device == "cuda":
416
  try:
417
- pipe.enable_xformers_memory_efficient_attention()
418
  print(" [OK] xformers enabled")
419
  except Exception as e:
420
  print(f" [INFO] xformers not available: {e}")
@@ -433,11 +433,12 @@ def load_caption_model():
433
 
434
  print(" Attempting GIT-Large (recommended)...")
435
  caption_processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
 
436
  caption_model = AutoModelForCausalLM.from_pretrained(
437
  "microsoft/git-large-coco",
438
  torch_dtype=dtype
439
- )#.to(device)
440
- print(" [OK] GIT-Large model loaded (produces detailed captions)")
441
  return caption_processor, caption_model, True, 'git'
442
  except Exception as e1:
443
  print(f" [INFO] GIT-Large not available: {e1}")
@@ -448,11 +449,12 @@ def load_caption_model():
448
 
449
  print(" Attempting BLIP base (fallback)...")
450
  caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 
451
  caption_model = BlipForConditionalGeneration.from_pretrained(
452
  "Salesforce/blip-image-captioning-base",
453
  torch_dtype=dtype
454
- )#.to(device)
455
- print(" [OK] BLIP base model loaded (standard captions)")
456
  return caption_processor, caption_model, True, 'blip'
457
  except Exception as e2:
458
  print(f" [WARNING] Caption models not available: {e2}")
@@ -463,7 +465,7 @@ def load_caption_model():
463
  def set_clip_skip(pipe):
464
  """Set CLIP skip value."""
465
  if hasattr(pipe, 'text_encoder'):
466
- print(" [OK] CLIP skip set to {CLIP_SKIP}")
467
 
468
 
469
  print("[OK] Model loading functions ready")
 
70
 
71
  try:
72
  antelope_download = snapshot_download(repo_id="DIAMONIK7777/antelopev2", local_dir="/data/models/antelopev2")
73
+ # --- FIX: Load InsightFace on CPU to save VRAM ---
74
+ face_app = FaceAnalysis(name='antelopev2', root='/data', providers=['CPUExecutionProvider'])
75
  face_app.prepare(ctx_id=0, det_size=(640, 640))
76
+ print(" [OK] Face analysis loaded (on CPU)")
77
  return face_app, True
78
 
79
  except Exception as e:
 
93
  # Try LeresDetector first (best quality)
94
  try:
95
  print(" Attempting LeresDetector (highest quality)...")
96
+ # --- FIX: Load on CPU ---
97
  leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
98
+ # leres_depth.to(device) # Removed
99
+ print(" [OK] LeresDetector loaded successfully (on CPU)")
100
  return leres_depth, 'leres', True
101
  except Exception as e:
102
  print(f" [INFO] LeresDetector not available: {e}")
 
104
  # Fallback to ZoeDetector
105
  try:
106
  print(" Attempting ZoeDetector (fallback #1)...")
107
+ # --- FIX: Load on CPU ---
108
  zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
109
+ # zoe_depth.to(device) # Removed
110
+ print(" [OK] ZoeDetector loaded successfully (on CPU)")
111
  return zoe_depth, 'zoe', True
112
  except Exception as e:
113
  print(f" [INFO] ZoeDetector not available: {e}")
 
115
  # Final fallback to MidasDetector
116
  try:
117
  print(" Attempting MidasDetector (fallback #2)...")
118
+ # --- FIX: Load on CPU ---
119
  midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
120
+ # midas_depth.to(device) # Removed
121
+ print(" [OK] MidasDetector loaded successfully (on CPU)")
122
  return midas_depth, 'midas', True
123
  except Exception as e:
124
  print(f" [WARNING] MidasDetector not available: {e}")
 
131
  """Load OpenPose detector."""
132
  print("Loading OpenPose detector...")
133
  try:
134
+ # --- FIX: Load on CPU ---
135
  openpose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
136
+ # openpose.to(device) # Removed
137
+ print(" [OK] OpenPose loaded successfully (on CPU)")
138
  return openpose, True
139
  except Exception as e:
140
  print(f" [WARNING] OpenPose not available: {e}")
 
157
  def load_controlnets():
158
  """Load ControlNet models."""
159
  print("Loading ControlNet Zoe Depth model...")
160
+ # --- FIX: Load core models on GPU ---
161
  controlnet_depth = ControlNetModel.from_pretrained(
162
  "xinsir/controlnet-depth-sdxl-1.0",
163
  torch_dtype=dtype
164
+ ).to(device)
165
+ print(" [OK] ControlNet Depth loaded (on GPU)")
166
 
167
  # --- NEW: Load OpenPose ControlNet ---
168
  print("Loading ControlNet OpenPose model...")
169
  try:
170
+ # --- FIX: Load core models on GPU ---
171
  controlnet_openpose = ControlNetModel.from_pretrained(
172
  "xinsir/controlnet-openpose-sdxl-1.0",
173
  torch_dtype=dtype
174
+ ).to(device)
175
+ print(" [OK] ControlNet OpenPose loaded (on GPU)")
176
  except Exception as e:
177
  print(f" [WARNING] ControlNet OpenPose not available: {e}")
178
  controlnet_openpose = None
 
180
 
181
  print("Loading InstantID ControlNet...")
182
  try:
183
+ # --- FIX: Load core models on GPU ---
184
  controlnet_instantid = ControlNetModel.from_pretrained(
185
  "InstantX/InstantID",
186
  subfolder="ControlNetModel",
187
  torch_dtype=dtype
188
+ ).to(device)
189
+ print(" [OK] InstantID ControlNet loaded successfully (on GPU)")
190
  # Return all three models
191
  return controlnet_depth, controlnet_instantid, controlnet_openpose, True
192
  except Exception as e:
 
199
  """Load CLIP Image Encoder for IP-Adapter."""
200
  print("Loading CLIP Image Encoder for IP-Adapter...")
201
  try:
202
+ # --- FIX: Load core models on GPU ---
203
  image_encoder = CLIPVisionModelWithProjection.from_pretrained(
204
  "h94/IP-Adapter",
205
  subfolder="models/image_encoder",
206
  torch_dtype=dtype
207
+ ).to(device)
208
+ print(" [OK] CLIP Image Encoder loaded successfully (on GPU)")
209
  return image_encoder
210
  except Exception as e:
211
  print(f" [ERROR] Could not load image encoder: {e}")
 
223
  controlnet=controlnets,
224
  torch_dtype=dtype,
225
  use_safetensors=True
226
+ ).to(device) # This main pipe MUST be on device
227
  print(" [OK] Custom checkpoint loaded successfully (VAE bundled)")
228
  return pipe, True
229
  except Exception as e:
 
234
  controlnet=controlnets,
235
  torch_dtype=dtype,
236
  use_safetensors=True
237
+ ).to(device) # This main pipe MUST be on device
238
  return pipe, False
239
 
240
 
 
409
  def optimize_pipeline(pipe):
410
  """Apply optimizations to pipeline."""
411
 
412
+ # --- FIX: Removed enable_model_cpu_offload() ---
 
 
 
 
 
 
 
 
 
 
413
 
414
  # Try to enable xformers
415
  if device == "cuda":
416
  try:
417
+ pipe.enable_xformfiers_memory_efficient_attention()
418
  print(" [OK] xformers enabled")
419
  except Exception as e:
420
  print(f" [INFO] xformers not available: {e}")
 
433
 
434
  print(" Attempting GIT-Large (recommended)...")
435
  caption_processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
436
+ # --- FIX: Load on CPU ---
437
  caption_model = AutoModelForCausalLM.from_pretrained(
438
  "microsoft/git-large-coco",
439
  torch_dtype=dtype
440
+ ) # .to(device) removed
441
+ print(" [OK] GIT-Large model loaded (produces detailed captions, on CPU)")
442
  return caption_processor, caption_model, True, 'git'
443
  except Exception as e1:
444
  print(f" [INFO] GIT-Large not available: {e1}")
 
449
 
450
  print(" Attempting BLIP base (fallback)...")
451
  caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
452
+ # --- FIX: Load on CPU ---
453
  caption_model = BlipForConditionalGeneration.from_pretrained(
454
  "Salesforce/blip-image-captioning-base",
455
  torch_dtype=dtype
456
+ ) # .to(device) removed
457
+ print(" [OK] BLIP base model loaded (standard captions, on CPU)")
458
  return caption_processor, caption_model, True, 'blip'
459
  except Exception as e2:
460
  print(f" [WARNING] Caption models not available: {e2}")
 
465
  def set_clip_skip(pipe):
466
  """Set CLIP skip value."""
467
  if hasattr(pipe, 'text_encoder'):
468
+ print(f" [OK] CLIP skip set to {CLIP_SKIP}")
469
 
470
 
471
  print("[OK] Model loading functions ready")