Spaces:

primerz
/

pixagram-stable

Runtime error

App Files Files Community

primerz commited on Oct 31, 2025

Commit

1f8035e

verified ·

1 Parent(s): 977db4c

Upload 2 files

Browse files

Files changed (2) hide show

generator.py +58 -18
models.py +48 -7

generator.py CHANGED Viewed

@@ -20,7 +20,7 @@ from models import (
     load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
     load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
     setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
-    load_openpose_detector
 )
@@ -34,17 +34,24 @@ class RetroArtConverter:
             'custom_checkpoint': False,
             'lora': False,
             'instantid': False,
-            'zoe_depth': False,
             'ip_adapter': False,
-            'openpose': False
         }
-        # Initialize face analysis
         self.face_app, self.face_detection_enabled = load_face_analysis()
-        # Load Zoe Depth detector
-        self.zoe_depth, zoe_success = load_depth_detector()
-        self.models_loaded['zoe_depth'] = zoe_success
         # --- NEW: Load OpenPose detector ---
         self.openpose_detector, openpose_success = load_openpose_detector()
@@ -182,8 +189,11 @@ class RetroArtConverter:
         print("============================\n")
     def get_depth_map(self, image):
-            """Generate depth map using Zoe Depth"""
-            if self.zoe_depth is not None:
                 try:
                     if image.mode != 'RGB':
                         image = image.convert('RGB')
@@ -203,25 +213,27 @@ class RetroArtConverter:
                     image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
                     if target_width != orig_width or target_height != orig_height:
-                        print(f"[DEPTH] Resized for ZoeDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
                     # FIXED: Add torch.no_grad() wrapper
                     with torch.no_grad():
-                        depth_image = self.zoe_depth(image_for_depth)
                     depth_width, depth_height = depth_image.size
                     if depth_width != orig_width or depth_height != orig_height:
                         depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
-                    print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
                     return depth_image
                 except Exception as e:
-                    print(f"[DEPTH] ZoeDetector failed ({e}), falling back to grayscale depth")
                     gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                     depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                     return Image.fromarray(depth_colored)
             else:
                 gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                 depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                 return Image.fromarray(depth_colored)
@@ -553,13 +565,13 @@ class RetroArtConverter:
         face_bbox_original = None
         if self.instantid_active and self.face_app is not None: # <-- Check instantid_active
-            print("Detecting faces and extracting keypoints...")
             img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
             faces = self.face_app.get(img_array)
             if len(faces) > 0:
                 has_detected_faces = True
-                print(f"Detected {len(faces)} face(s)")
                 # Get largest face
                 face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
@@ -619,6 +631,33 @@ class RetroArtConverter:
                 gender_str = 'M' if gender_code == 1 else ('F' if gender_code == 0 else 'N/A')
                 print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
                 print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
@@ -715,11 +754,12 @@ class RetroArtConverter:
                     print("  Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
             else:
-                # No face, must add a blank image to keep list order
-                print("Using blank map for InstantID (no face/disabled)")
                 control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
                 conditioning_scales.append(0.0) # Set scale to 0
-                scale_debug_str.append("Identity: 0.00")
         # 2. Depth
         if self.depth_active:

     load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
     load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
     setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
+    load_openpose_detector, load_mediapipe_face_detector
 )
             'custom_checkpoint': False,
             'lora': False,
             'instantid': False,
+            'depth_detector': False,
+            'depth_type': None,
             'ip_adapter': False,
+            'openpose': False,
+            'mediapipe_face': False
         }
+        # Initialize face analysis (InsightFace)
         self.face_app, self.face_detection_enabled = load_face_analysis()
+        # Load MediapipeFaceDetector (alternative face detection)
+        self.mediapipe_face, mediapipe_success = load_mediapipe_face_detector()
+        self.models_loaded['mediapipe_face'] = mediapipe_success
+        # Load Depth detector with fallback hierarchy (Leres → Zoe → Midas)
+        self.depth_detector, self.depth_type, depth_success = load_depth_detector()
+        self.models_loaded['depth_detector'] = depth_success
+        self.models_loaded['depth_type'] = self.depth_type
         # --- NEW: Load OpenPose detector ---
         self.openpose_detector, openpose_success = load_openpose_detector()
         print("============================\n")
     def get_depth_map(self, image):
+            """
+            Generate depth map using available depth detector.
+            Supports: LeresDetector, ZoeDetector, or MidasDetector.
+            """
+            if self.depth_detector is not None:
                 try:
                     if image.mode != 'RGB':
                         image = image.convert('RGB')
                     image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
                     if target_width != orig_width or target_height != orig_height:
+                        print(f"[DEPTH] Resized for {self.depth_type.upper()}Detector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
                     # FIXED: Add torch.no_grad() wrapper
                     with torch.no_grad():
+                        depth_image = self.depth_detector(image_for_depth)
                     depth_width, depth_height = depth_image.size
                     if depth_width != orig_width or depth_height != orig_height:
                         depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
+                    print(f"[DEPTH] {self.depth_type.upper()} depth map generated: {orig_width}x{orig_height}")
                     return depth_image
                 except Exception as e:
+                    print(f"[DEPTH] {self.depth_type.upper()}Detector failed ({e}), falling back to grayscale depth")
                     gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                     depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                     return Image.fromarray(depth_colored)
             else:
+                # No depth detector available, use grayscale fallback
+                print("[DEPTH] No depth detector available, using grayscale fallback")
                 gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                 depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                 return Image.fromarray(depth_colored)
         face_bbox_original = None
         if self.instantid_active and self.face_app is not None: # <-- Check instantid_active
+            print("Detecting faces with InsightFace...")
             img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
             faces = self.face_app.get(img_array)
             if len(faces) > 0:
                 has_detected_faces = True
+                print(f"✓ InsightFace detected {len(faces)} face(s)")
                 # Get largest face
                 face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
                 gender_str = 'M' if gender_code == 1 else ('F' if gender_code == 0 else 'N/A')
                 print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
                 print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
+            else:
+                # InsightFace failed, try MediapipeFaceDetector as fallback
+                print("✗ InsightFace found no faces, trying MediapipeFaceDetector...")
+                if self.mediapipe_face is not None:
+                    try:
+                        # MediapipeFace returns an annotated image with keypoints
+                        mediapipe_result = self.mediapipe_face(resized_image)
+                        # Check if face was detected (result is not blank/black)
+                        mediapipe_array = np.array(mediapipe_result)
+                        if mediapipe_array.sum() > 1000:  # If image has significant content
+                            has_detected_faces = True
+                            face_kps_image = mediapipe_result
+                            print(f"✓ MediapipeFace detected face(s)")
+                            print(f"[INFO] Using MediapipeFace keypoints (no embeddings available)")
+                            # Note: MediapipeFace doesn't provide embeddings or detailed info
+                            # So face_embeddings, face_crop_enhanced remain None
+                            # InstantID will work with keypoints only (reduced quality)
+                        else:
+                            print("✗ MediapipeFace found no faces either")
+                    except Exception as e:
+                        print(f"[WARNING] MediapipeFace detection failed: {e}")
+                else:
+                    print("[INFO] MediapipeFaceDetector not available")
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
                     print("  Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
             else:
+                # No face detected by either detector, must add blank image to keep list order
+                print("✗ No face detected by InsightFace or MediapipeFace")
+                print("  Using blank map for InstantID (scale=0, no effect on output)")
                 control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
                 conditioning_scales.append(0.0) # Set scale to 0
+                scale_debug_str.append("Identity: 0.00 (no face)")
         # 2. Depth
         if self.depth_active:

models.py CHANGED Viewed

@@ -13,7 +13,7 @@ from diffusers import (
 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
-from controlnet_aux import ZoeDetector, OpenposeDetector  # <-- NEW
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
@@ -82,16 +82,44 @@ def load_face_analysis():
 def load_depth_detector():
-    """Load Zoe Depth detector."""
-    print("Loading Zoe Depth detector...")
     try:
         zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
         zoe_depth.to(device)
-        print("  [OK] Zoe Depth loaded successfully")
-        return zoe_depth, True
     except Exception as e:
-        print(f"  [WARNING] Zoe Depth not available: {e}")
-        return None, False
 # --- NEW FUNCTION ---
 def load_openpose_detector():
@@ -107,6 +135,19 @@ def load_openpose_detector():
         return None, False
 # --- END NEW FUNCTION ---
 def load_controlnets():
     """Load ControlNet models."""
     print("Loading ControlNet Zoe Depth model...")

 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
+from controlnet_aux import ZoeDetector, OpenposeDetector, LeresDetector, MidasDetector, MediapipeFaceDetector
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
 def load_depth_detector():
+    """
+    Load depth detector with fallback hierarchy: Leres → Zoe → Midas.
+    Returns (detector, detector_type, success).
+    """
+    print("Loading depth detector with fallback hierarchy...")
+    # Try LeresDetector first (best quality)
     try:
+        print("  Attempting LeresDetector (highest quality)...")
+        leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
+        leres_depth.to(device)
+        print("  [OK] LeresDetector loaded successfully")
+        return leres_depth, 'leres', True
+    except Exception as e:
+        print(f"  [INFO] LeresDetector not available: {e}")
+    # Fallback to ZoeDetector
+    try:
+        print("  Attempting ZoeDetector (fallback #1)...")
         zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
         zoe_depth.to(device)
+        print("  [OK] ZoeDetector loaded successfully")
+        return zoe_depth, 'zoe', True
     except Exception as e:
+        print(f"  [INFO] ZoeDetector not available: {e}")
+    # Final fallback to MidasDetector
+    try:
+        print("  Attempting MidasDetector (fallback #2)...")
+        midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
+        midas_depth.to(device)
+        print("  [OK] MidasDetector loaded successfully")
+        return midas_depth, 'midas', True
+    except Exception as e:
+        print(f"  [WARNING] MidasDetector not available: {e}")
+    print("  [ERROR] No depth detector available")
+    return None, None, False
 # --- NEW FUNCTION ---
 def load_openpose_detector():
         return None, False
 # --- END NEW FUNCTION ---
+# --- NEW FUNCTION ---
+def load_mediapipe_face_detector():
+    """Load MediapipeFaceDetector for advanced face detection."""
+    print("Loading MediapipeFaceDetector...")
+    try:
+        face_detector = MediapipeFaceDetector()
+        print("  [OK] MediapipeFaceDetector loaded successfully")
+        return face_detector, True
+    except Exception as e:
+        print(f"  [WARNING] MediapipeFaceDetector not available: {e}")
+        return None, False
+# --- END NEW FUNCTION ---
 def load_controlnets():
     """Load ControlNet models."""
     print("Loading ControlNet Zoe Depth model...")