primerz commited on
Commit
ede7ed8
·
verified ·
1 Parent(s): b2a3100

Upload 2 files

Browse files
Files changed (2) hide show
  1. generator.py +12 -12
  2. models.py +9 -9
generator.py CHANGED
@@ -33,16 +33,16 @@ class RetroArtConverter:
33
  'custom_checkpoint': False,
34
  'lora': False,
35
  'instantid': False,
36
- 'zoe_depth': False,
37
  'ip_adapter': False
38
  }
39
 
40
  # Initialize face analysis
41
  self.face_app, self.face_detection_enabled = load_face_analysis()
42
 
43
- # Load Zoe Depth detector
44
- self.zoe_depth, zoe_success = load_depth_detector()
45
- self.models_loaded['zoe_depth'] = zoe_success
46
 
47
  # Load ControlNets
48
  controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
@@ -146,8 +146,8 @@ class RetroArtConverter:
146
  print("============================\n")
147
 
148
  def get_depth_map(self, image):
149
- """Generate depth map using Zoe Depth"""
150
- if self.zoe_depth is not None:
151
  try:
152
  if image.mode != 'RGB':
153
  image = image.convert('RGB')
@@ -165,11 +165,11 @@ class RetroArtConverter:
165
 
166
  if target_width != orig_width or target_height != orig_height:
167
  image = image.resize((int(target_width), int(target_height)), Image.LANCZOS)
168
- print(f"[DEPTH] Resized for ZoeDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
169
 
170
  # FIXED: Add torch.no_grad() wrapper
171
  with torch.no_grad():
172
- depth_image = self.zoe_depth(image)
173
 
174
  depth_width, depth_height = depth_image.size
175
  # Convert numpy int64 to Python int to avoid PIL errors
@@ -181,11 +181,11 @@ class RetroArtConverter:
181
  if depth_width != orig_width_int or depth_height != orig_height_int:
182
  depth_image = depth_image.resize((orig_width_int, orig_height_int), Image.LANCZOS)
183
 
184
- print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
185
  return depth_image
186
 
187
  except Exception as e:
188
- print(f"[DEPTH] ZoeDetector failed ({e}), falling back to grayscale depth")
189
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
190
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
191
  return Image.fromarray(depth_colored)
@@ -467,7 +467,7 @@ class RetroArtConverter:
467
  resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
468
 
469
  # Generate depth map
470
- print("Generating Zoe depth map...")
471
  depth_image = self.get_depth_map(resized_image)
472
  if depth_image.size != (target_width, target_height):
473
  depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
@@ -636,7 +636,7 @@ class RetroArtConverter:
636
  # Reshape for Resampler: [1, 1, 512]
637
  face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
638
 
639
- # Pass through Resampler: [1, 1, 512] → [1, 16, 2048]
640
  face_proj_embeds = self.image_proj_model(face_emb_tensor)
641
 
642
  # Scale with identity preservation
 
33
  'custom_checkpoint': False,
34
  'lora': False,
35
  'instantid': False,
36
+ 'midas_depth': False,
37
  'ip_adapter': False
38
  }
39
 
40
  # Initialize face analysis
41
  self.face_app, self.face_detection_enabled = load_face_analysis()
42
 
43
+ # Load Midas Depth detector
44
+ self.midas_depth, midas_success = load_depth_detector()
45
+ self.models_loaded['midas_depth'] = midas_success
46
 
47
  # Load ControlNets
48
  controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
 
146
  print("============================\n")
147
 
148
  def get_depth_map(self, image):
149
+ """Generate depth map using Midas Depth"""
150
+ if self.midas_depth is not None:
151
  try:
152
  if image.mode != 'RGB':
153
  image = image.convert('RGB')
 
165
 
166
  if target_width != orig_width or target_height != orig_height:
167
  image = image.resize((int(target_width), int(target_height)), Image.LANCZOS)
168
+ print(f"[DEPTH] Resized for MidasDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
169
 
170
  # FIXED: Add torch.no_grad() wrapper
171
  with torch.no_grad():
172
+ depth_image = self.midas_depth(image)
173
 
174
  depth_width, depth_height = depth_image.size
175
  # Convert numpy int64 to Python int to avoid PIL errors
 
181
  if depth_width != orig_width_int or depth_height != orig_height_int:
182
  depth_image = depth_image.resize((orig_width_int, orig_height_int), Image.LANCZOS)
183
 
184
+ print(f"[DEPTH] Midas depth map generated: {orig_width}x{orig_height}")
185
  return depth_image
186
 
187
  except Exception as e:
188
+ print(f"[DEPTH] MidasDetector failed ({e}), falling back to grayscale depth")
189
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
190
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
191
  return Image.fromarray(depth_colored)
 
467
  resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
468
 
469
  # Generate depth map
470
+ print("Generating Midas depth map...")
471
  depth_image = self.get_depth_map(resized_image)
472
  if depth_image.size != (target_width, target_height):
473
  depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
 
636
  # Reshape for Resampler: [1, 1, 512]
637
  face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
638
 
639
+ # Pass through Resampler: [1, 1, 512] → [1, 16, 2048]
640
  face_proj_embeds = self.image_proj_model(face_emb_tensor)
641
 
642
  # Scale with identity preservation
models.py CHANGED
@@ -13,7 +13,7 @@ from diffusers import (
13
  from diffusers.models.attention_processor import AttnProcessor2_0
14
  from transformers import CLIPVisionModelWithProjection
15
  from insightface.app import FaceAnalysis
16
- from controlnet_aux import ZoeDetector
17
  from huggingface_hub import hf_hub_download
18
  from compel import Compel, ReturnedEmbeddingsType
19
 
@@ -82,15 +82,15 @@ def load_face_analysis():
82
 
83
 
84
  def load_depth_detector():
85
- """Load Zoe Depth detector."""
86
- print("Loading Zoe Depth detector...")
87
  try:
88
- zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
89
- zoe_depth.to(device)
90
- print(" [OK] Zoe Depth loaded successfully")
91
- return zoe_depth, True
92
  except Exception as e:
93
- print(f" [WARNING] Zoe Depth not available: {e}")
94
  return None, False
95
 
96
 
@@ -276,7 +276,7 @@ def setup_ip_adapter(pipe, image_encoder):
276
 
277
  print(" [OK] IP-Adapter fully loaded with InstantID architecture")
278
  print(f" - Resampler: 4 layers, 20 heads, 16 output tokens")
279
- print(f" - Face embeddings: 512D → 16x2048D")
280
 
281
  return image_proj_model, True
282
 
 
13
  from diffusers.models.attention_processor import AttnProcessor2_0
14
  from transformers import CLIPVisionModelWithProjection
15
  from insightface.app import FaceAnalysis
16
+ from controlnet_aux import MidasDetector
17
  from huggingface_hub import hf_hub_download
18
  from compel import Compel, ReturnedEmbeddingsType
19
 
 
82
 
83
 
84
  def load_depth_detector():
85
+ """Load Midas Depth detector."""
86
+ print("Loading Midas Depth detector...")
87
  try:
88
+ midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
89
+ midas_depth.to(device)
90
+ print(" [OK] Midas Depth loaded successfully")
91
+ return midas_depth, True
92
  except Exception as e:
93
+ print(f" [WARNING] Midas Depth not available: {e}")
94
  return None, False
95
 
96
 
 
276
 
277
  print(" [OK] IP-Adapter fully loaded with InstantID architecture")
278
  print(f" - Resampler: 4 layers, 20 heads, 16 output tokens")
279
+ print(f" - Face embeddings: 512D → 16x2048D")
280
 
281
  return image_proj_model, True
282