primerz commited on
Commit
1f8035e
·
verified ·
1 Parent(s): 977db4c

Upload 2 files

Browse files
Files changed (2) hide show
  1. generator.py +58 -18
  2. models.py +48 -7
generator.py CHANGED
@@ -20,7 +20,7 @@ from models import (
20
  load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
21
  load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
22
  setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
23
- load_openpose_detector
24
  )
25
 
26
 
@@ -34,17 +34,24 @@ class RetroArtConverter:
34
  'custom_checkpoint': False,
35
  'lora': False,
36
  'instantid': False,
37
- 'zoe_depth': False,
 
38
  'ip_adapter': False,
39
- 'openpose': False
 
40
  }
41
 
42
- # Initialize face analysis
43
  self.face_app, self.face_detection_enabled = load_face_analysis()
44
 
45
- # Load Zoe Depth detector
46
- self.zoe_depth, zoe_success = load_depth_detector()
47
- self.models_loaded['zoe_depth'] = zoe_success
 
 
 
 
 
48
 
49
  # --- NEW: Load OpenPose detector ---
50
  self.openpose_detector, openpose_success = load_openpose_detector()
@@ -182,8 +189,11 @@ class RetroArtConverter:
182
  print("============================\n")
183
 
184
  def get_depth_map(self, image):
185
- """Generate depth map using Zoe Depth"""
186
- if self.zoe_depth is not None:
 
 
 
187
  try:
188
  if image.mode != 'RGB':
189
  image = image.convert('RGB')
@@ -203,25 +213,27 @@ class RetroArtConverter:
203
  image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
204
 
205
  if target_width != orig_width or target_height != orig_height:
206
- print(f"[DEPTH] Resized for ZoeDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
207
 
208
  # FIXED: Add torch.no_grad() wrapper
209
  with torch.no_grad():
210
- depth_image = self.zoe_depth(image_for_depth)
211
 
212
  depth_width, depth_height = depth_image.size
213
  if depth_width != orig_width or depth_height != orig_height:
214
  depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
215
 
216
- print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
217
  return depth_image
218
 
219
  except Exception as e:
220
- print(f"[DEPTH] ZoeDetector failed ({e}), falling back to grayscale depth")
221
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
222
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
223
  return Image.fromarray(depth_colored)
224
  else:
 
 
225
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
226
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
227
  return Image.fromarray(depth_colored)
@@ -553,13 +565,13 @@ class RetroArtConverter:
553
  face_bbox_original = None
554
 
555
  if self.instantid_active and self.face_app is not None: # <-- Check instantid_active
556
- print("Detecting faces and extracting keypoints...")
557
  img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
558
  faces = self.face_app.get(img_array)
559
 
560
  if len(faces) > 0:
561
  has_detected_faces = True
562
- print(f"Detected {len(faces)} face(s)")
563
 
564
  # Get largest face
565
  face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
@@ -619,6 +631,33 @@ class RetroArtConverter:
619
  gender_str = 'M' if gender_code == 1 else ('F' if gender_code == 0 else 'N/A')
620
  print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
621
  print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
 
623
  # Set LORA scale
624
  if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
@@ -715,11 +754,12 @@ class RetroArtConverter:
715
  print(" Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
716
 
717
  else:
718
- # No face, must add a blank image to keep list order
719
- print("Using blank map for InstantID (no face/disabled)")
 
720
  control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
721
  conditioning_scales.append(0.0) # Set scale to 0
722
- scale_debug_str.append("Identity: 0.00")
723
 
724
  # 2. Depth
725
  if self.depth_active:
 
20
  load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
21
  load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
22
  setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
23
+ load_openpose_detector, load_mediapipe_face_detector
24
  )
25
 
26
 
 
34
  'custom_checkpoint': False,
35
  'lora': False,
36
  'instantid': False,
37
+ 'depth_detector': False,
38
+ 'depth_type': None,
39
  'ip_adapter': False,
40
+ 'openpose': False,
41
+ 'mediapipe_face': False
42
  }
43
 
44
+ # Initialize face analysis (InsightFace)
45
  self.face_app, self.face_detection_enabled = load_face_analysis()
46
 
47
+ # Load MediapipeFaceDetector (alternative face detection)
48
+ self.mediapipe_face, mediapipe_success = load_mediapipe_face_detector()
49
+ self.models_loaded['mediapipe_face'] = mediapipe_success
50
+
51
+ # Load Depth detector with fallback hierarchy (Leres → Zoe → Midas)
52
+ self.depth_detector, self.depth_type, depth_success = load_depth_detector()
53
+ self.models_loaded['depth_detector'] = depth_success
54
+ self.models_loaded['depth_type'] = self.depth_type
55
 
56
  # --- NEW: Load OpenPose detector ---
57
  self.openpose_detector, openpose_success = load_openpose_detector()
 
189
  print("============================\n")
190
 
191
  def get_depth_map(self, image):
192
+ """
193
+ Generate depth map using available depth detector.
194
+ Supports: LeresDetector, ZoeDetector, or MidasDetector.
195
+ """
196
+ if self.depth_detector is not None:
197
  try:
198
  if image.mode != 'RGB':
199
  image = image.convert('RGB')
 
213
  image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
214
 
215
  if target_width != orig_width or target_height != orig_height:
216
+ print(f"[DEPTH] Resized for {self.depth_type.upper()}Detector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
217
 
218
  # FIXED: Add torch.no_grad() wrapper
219
  with torch.no_grad():
220
+ depth_image = self.depth_detector(image_for_depth)
221
 
222
  depth_width, depth_height = depth_image.size
223
  if depth_width != orig_width or depth_height != orig_height:
224
  depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
225
 
226
+ print(f"[DEPTH] {self.depth_type.upper()} depth map generated: {orig_width}x{orig_height}")
227
  return depth_image
228
 
229
  except Exception as e:
230
+ print(f"[DEPTH] {self.depth_type.upper()}Detector failed ({e}), falling back to grayscale depth")
231
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
232
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
233
  return Image.fromarray(depth_colored)
234
  else:
235
+ # No depth detector available, use grayscale fallback
236
+ print("[DEPTH] No depth detector available, using grayscale fallback")
237
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
238
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
239
  return Image.fromarray(depth_colored)
 
565
  face_bbox_original = None
566
 
567
  if self.instantid_active and self.face_app is not None: # <-- Check instantid_active
568
+ print("Detecting faces with InsightFace...")
569
  img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
570
  faces = self.face_app.get(img_array)
571
 
572
  if len(faces) > 0:
573
  has_detected_faces = True
574
+ print(f" InsightFace detected {len(faces)} face(s)")
575
 
576
  # Get largest face
577
  face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
 
631
  gender_str = 'M' if gender_code == 1 else ('F' if gender_code == 0 else 'N/A')
632
  print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
633
  print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
634
+
635
+ else:
636
+ # InsightFace failed, try MediapipeFaceDetector as fallback
637
+ print("✗ InsightFace found no faces, trying MediapipeFaceDetector...")
638
+
639
+ if self.mediapipe_face is not None:
640
+ try:
641
+ # MediapipeFace returns an annotated image with keypoints
642
+ mediapipe_result = self.mediapipe_face(resized_image)
643
+
644
+ # Check if face was detected (result is not blank/black)
645
+ mediapipe_array = np.array(mediapipe_result)
646
+ if mediapipe_array.sum() > 1000: # If image has significant content
647
+ has_detected_faces = True
648
+ face_kps_image = mediapipe_result
649
+ print(f"✓ MediapipeFace detected face(s)")
650
+ print(f"[INFO] Using MediapipeFace keypoints (no embeddings available)")
651
+
652
+ # Note: MediapipeFace doesn't provide embeddings or detailed info
653
+ # So face_embeddings, face_crop_enhanced remain None
654
+ # InstantID will work with keypoints only (reduced quality)
655
+ else:
656
+ print("✗ MediapipeFace found no faces either")
657
+ except Exception as e:
658
+ print(f"[WARNING] MediapipeFace detection failed: {e}")
659
+ else:
660
+ print("[INFO] MediapipeFaceDetector not available")
661
 
662
  # Set LORA scale
663
  if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
 
754
  print(" Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
755
 
756
  else:
757
+ # No face detected by either detector, must add blank image to keep list order
758
+ print(" No face detected by InsightFace or MediapipeFace")
759
+ print(" Using blank map for InstantID (scale=0, no effect on output)")
760
  control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
761
  conditioning_scales.append(0.0) # Set scale to 0
762
+ scale_debug_str.append("Identity: 0.00 (no face)")
763
 
764
  # 2. Depth
765
  if self.depth_active:
models.py CHANGED
@@ -13,7 +13,7 @@ from diffusers import (
13
  from diffusers.models.attention_processor import AttnProcessor2_0
14
  from transformers import CLIPVisionModelWithProjection
15
  from insightface.app import FaceAnalysis
16
- from controlnet_aux import ZoeDetector, OpenposeDetector # <-- NEW
17
  from huggingface_hub import hf_hub_download
18
  from compel import Compel, ReturnedEmbeddingsType
19
 
@@ -82,16 +82,44 @@ def load_face_analysis():
82
 
83
 
84
  def load_depth_detector():
85
- """Load Zoe Depth detector."""
86
- print("Loading Zoe Depth detector...")
 
 
 
 
 
87
  try:
 
 
 
 
 
 
 
 
 
 
 
88
  zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
89
  zoe_depth.to(device)
90
- print(" [OK] Zoe Depth loaded successfully")
91
- return zoe_depth, True
92
  except Exception as e:
93
- print(f" [WARNING] Zoe Depth not available: {e}")
94
- return None, False
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  # --- NEW FUNCTION ---
97
  def load_openpose_detector():
@@ -107,6 +135,19 @@ def load_openpose_detector():
107
  return None, False
108
  # --- END NEW FUNCTION ---
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def load_controlnets():
111
  """Load ControlNet models."""
112
  print("Loading ControlNet Zoe Depth model...")
 
13
  from diffusers.models.attention_processor import AttnProcessor2_0
14
  from transformers import CLIPVisionModelWithProjection
15
  from insightface.app import FaceAnalysis
16
+ from controlnet_aux import ZoeDetector, OpenposeDetector, LeresDetector, MidasDetector, MediapipeFaceDetector
17
  from huggingface_hub import hf_hub_download
18
  from compel import Compel, ReturnedEmbeddingsType
19
 
 
82
 
83
 
84
  def load_depth_detector():
85
+ """
86
+ Load depth detector with fallback hierarchy: Leres → Zoe Midas.
87
+ Returns (detector, detector_type, success).
88
+ """
89
+ print("Loading depth detector with fallback hierarchy...")
90
+
91
+ # Try LeresDetector first (best quality)
92
  try:
93
+ print(" Attempting LeresDetector (highest quality)...")
94
+ leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
95
+ leres_depth.to(device)
96
+ print(" [OK] LeresDetector loaded successfully")
97
+ return leres_depth, 'leres', True
98
+ except Exception as e:
99
+ print(f" [INFO] LeresDetector not available: {e}")
100
+
101
+ # Fallback to ZoeDetector
102
+ try:
103
+ print(" Attempting ZoeDetector (fallback #1)...")
104
  zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
105
  zoe_depth.to(device)
106
+ print(" [OK] ZoeDetector loaded successfully")
107
+ return zoe_depth, 'zoe', True
108
  except Exception as e:
109
+ print(f" [INFO] ZoeDetector not available: {e}")
110
+
111
+ # Final fallback to MidasDetector
112
+ try:
113
+ print(" Attempting MidasDetector (fallback #2)...")
114
+ midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
115
+ midas_depth.to(device)
116
+ print(" [OK] MidasDetector loaded successfully")
117
+ return midas_depth, 'midas', True
118
+ except Exception as e:
119
+ print(f" [WARNING] MidasDetector not available: {e}")
120
+
121
+ print(" [ERROR] No depth detector available")
122
+ return None, None, False
123
 
124
  # --- NEW FUNCTION ---
125
  def load_openpose_detector():
 
135
  return None, False
136
  # --- END NEW FUNCTION ---
137
 
138
+ # --- NEW FUNCTION ---
139
+ def load_mediapipe_face_detector():
140
+ """Load MediapipeFaceDetector for advanced face detection."""
141
+ print("Loading MediapipeFaceDetector...")
142
+ try:
143
+ face_detector = MediapipeFaceDetector()
144
+ print(" [OK] MediapipeFaceDetector loaded successfully")
145
+ return face_detector, True
146
+ except Exception as e:
147
+ print(f" [WARNING] MediapipeFaceDetector not available: {e}")
148
+ return None, False
149
+ # --- END NEW FUNCTION ---
150
+
151
  def load_controlnets():
152
  """Load ControlNet models."""
153
  print("Loading ControlNet Zoe Depth model...")