pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 30

Commit

ede7ed8

verified ·

1 Parent(s): b2a3100

Upload 2 files

Browse files

Files changed (2) hide show

generator.py +12 -12
models.py +9 -9

generator.py CHANGED Viewed

@@ -33,16 +33,16 @@ class RetroArtConverter:
             'custom_checkpoint': False,
             'lora': False,
             'instantid': False,
-            'zoe_depth': False,
             'ip_adapter': False
         }
         # Initialize face analysis
         self.face_app, self.face_detection_enabled = load_face_analysis()
-        # Load Zoe Depth detector
-        self.zoe_depth, zoe_success = load_depth_detector()
-        self.models_loaded['zoe_depth'] = zoe_success
         # Load ControlNets
         controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
@@ -146,8 +146,8 @@ class RetroArtConverter:
         print("============================\n")
     def get_depth_map(self, image):
-            """Generate depth map using Zoe Depth"""
-            if self.zoe_depth is not None:
                 try:
                     if image.mode != 'RGB':
                         image = image.convert('RGB')
@@ -165,11 +165,11 @@ class RetroArtConverter:
                     if target_width != orig_width or target_height != orig_height:
                         image = image.resize((int(target_width), int(target_height)), Image.LANCZOS)
-                        print(f"[DEPTH] Resized for ZoeDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
                     # FIXED: Add torch.no_grad() wrapper
                     with torch.no_grad():
-                        depth_image = self.zoe_depth(image)
                     depth_width, depth_height = depth_image.size
                     # Convert numpy int64 to Python int to avoid PIL errors
@@ -181,11 +181,11 @@ class RetroArtConverter:
                     if depth_width != orig_width_int or depth_height != orig_height_int:
                         depth_image = depth_image.resize((orig_width_int, orig_height_int), Image.LANCZOS)
-                    print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
                     return depth_image
                 except Exception as e:
-                    print(f"[DEPTH] ZoeDetector failed ({e}), falling back to grayscale depth")
                     gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                     depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                     return Image.fromarray(depth_colored)
@@ -467,7 +467,7 @@ class RetroArtConverter:
         resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
         # Generate depth map
-        print("Generating Zoe depth map...")
         depth_image = self.get_depth_map(resized_image)
         if depth_image.size != (target_width, target_height):
             depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
@@ -636,7 +636,7 @@ class RetroArtConverter:
                     # Reshape for Resampler: [1, 1, 512]
                     face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
-                    # Pass through Resampler: [1, 1, 512] Ã¢â€ â€™ [1, 16, 2048]
                     face_proj_embeds = self.image_proj_model(face_emb_tensor)
                     # Scale with identity preservation

             'custom_checkpoint': False,
             'lora': False,
             'instantid': False,
+            'midas_depth': False,
             'ip_adapter': False
         }
         # Initialize face analysis
         self.face_app, self.face_detection_enabled = load_face_analysis()
+        # Load Midas Depth detector
+        self.midas_depth, midas_success = load_depth_detector()
+        self.models_loaded['midas_depth'] = midas_success
         # Load ControlNets
         controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
         print("============================\n")
     def get_depth_map(self, image):
+            """Generate depth map using Midas Depth"""
+            if self.midas_depth is not None:
                 try:
                     if image.mode != 'RGB':
                         image = image.convert('RGB')
                     if target_width != orig_width or target_height != orig_height:
                         image = image.resize((int(target_width), int(target_height)), Image.LANCZOS)
+                        print(f"[DEPTH] Resized for MidasDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
                     # FIXED: Add torch.no_grad() wrapper
                     with torch.no_grad():
+                        depth_image = self.midas_depth(image)
                     depth_width, depth_height = depth_image.size
                     # Convert numpy int64 to Python int to avoid PIL errors
                     if depth_width != orig_width_int or depth_height != orig_height_int:
                         depth_image = depth_image.resize((orig_width_int, orig_height_int), Image.LANCZOS)
+                    print(f"[DEPTH] Midas depth map generated: {orig_width}x{orig_height}")
                     return depth_image
                 except Exception as e:
+                    print(f"[DEPTH] MidasDetector failed ({e}), falling back to grayscale depth")
                     gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                     depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                     return Image.fromarray(depth_colored)
         resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
         # Generate depth map
+        print("Generating Midas depth map...")
         depth_image = self.get_depth_map(resized_image)
         if depth_image.size != (target_width, target_height):
             depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
                     # Reshape for Resampler: [1, 1, 512]
                     face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
+                    # Pass through Resampler: [1, 1, 512] ÃƒÂ¢Ã¢â‚¬Â Ã¢â‚¬â„¢ [1, 16, 2048]
                     face_proj_embeds = self.image_proj_model(face_emb_tensor)
                     # Scale with identity preservation

models.py CHANGED Viewed

@@ -13,7 +13,7 @@ from diffusers import (
 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
-from controlnet_aux import ZoeDetector
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
@@ -82,15 +82,15 @@ def load_face_analysis():
 def load_depth_detector():
-    """Load Zoe Depth detector."""
-    print("Loading Zoe Depth detector...")
     try:
-        zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
-        zoe_depth.to(device)
-        print("  [OK] Zoe Depth loaded successfully")
-        return zoe_depth, True
     except Exception as e:
-        print(f"  [WARNING] Zoe Depth not available: {e}")
         return None, False
@@ -276,7 +276,7 @@ def setup_ip_adapter(pipe, image_encoder):
         print("  [OK] IP-Adapter fully loaded with InstantID architecture")
         print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
-        print(f"  - Face embeddings: 512D Ã¢â€ â€™ 16x2048D")
         return image_proj_model, True

 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
+from controlnet_aux import MidasDetector
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
 def load_depth_detector():
+    """Load Midas Depth detector."""
+    print("Loading Midas Depth detector...")
     try:
+        midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
+        midas_depth.to(device)
+        print("  [OK] Midas Depth loaded successfully")
+        return midas_depth, True
     except Exception as e:
+        print(f"  [WARNING] Midas Depth not available: {e}")
         return None, False
         print("  [OK] IP-Adapter fully loaded with InstantID architecture")
         print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
+        print(f"  - Face embeddings: 512D ÃƒÂ¢Ã¢â‚¬Â Ã¢â‚¬â„¢ 16x2048D")
         return image_proj_model, True