pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 30

Commit

171e0fc

verified ·

1 Parent(s): 8064305

Update models.py

Browse files

Files changed (1) hide show

models.py +46 -45

models.py CHANGED Viewed

@@ -13,7 +13,7 @@ from diffusers import (
 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
-from controlnet_aux import MidasDetector, LeresDetector
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
@@ -82,26 +82,30 @@ def load_face_analysis():
 def load_depth_detector():
-    """Load LeRes++ Depth detector (superior to Midas/Zoe for detailed depth estimation)."""
-    print("Loading LeRes++ Depth detector...")
     try:
-        from controlnet_aux import LeresDetector
-        leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
-        leres_depth.to(device)
-        print("  [OK] LeRes++ Depth loaded successfully (+15-20% accuracy over Midas/Zoe)")
-        return leres_depth, True
     except Exception as e:
-        print(f"  [WARNING] LeRes++ Depth not available: {e}")
-        print("  Attempting fallback to Midas Depth...")
-        try:
-            midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
-            midas_depth.to(device)
-            print("  [OK] Midas Depth loaded as fallback")
-            return midas_depth, True
-        except Exception as e2:
-            print(f"  [ERROR] All depth detectors failed: {e2}")
-            return None, False
 def load_controlnets():
     """Load ControlNet models."""
@@ -111,6 +115,19 @@ def load_controlnets():
         torch_dtype=dtype
     ).to(device)
     print("  [OK] ControlNet Depth loaded")
     print("Loading InstantID ControlNet...")
     try:
@@ -120,10 +137,12 @@ def load_controlnets():
             torch_dtype=dtype
         ).to(device)
         print("  [OK] InstantID ControlNet loaded successfully")
-        return controlnet_depth, controlnet_instantid, True
     except Exception as e:
         print(f"  [WARNING] InstantID ControlNet not available: {e}")
-        return controlnet_depth, None, False
 def load_image_encoder():
@@ -150,7 +169,7 @@ def load_sdxl_pipeline(controlnets):
         pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_single_file(
             model_path,
-            controlnet=controlnets,
             torch_dtype=dtype,
             use_safetensors=True
         ).to(device)
@@ -161,7 +180,7 @@ def load_sdxl_pipeline(controlnets):
         print("  Using default SDXL base model")
         pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
             "stabilityai/stable-diffusion-xl-base-1.0",
-            controlnet=controlnets,
             torch_dtype=dtype,
             use_safetensors=True
         ).to(device)
@@ -173,7 +192,7 @@ def load_lora(pipe):
     print("Loading LORA (retroart) from HuggingFace Hub...")
     try:
         lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
-        pipe.load_lora_weights(lora_path)
         print(f"  [OK] LORA loaded successfully")
         return True
     except Exception as e:
@@ -285,7 +304,7 @@ def setup_ip_adapter(pipe, image_encoder):
         print("  [OK] IP-Adapter fully loaded with InstantID architecture")
         print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
-        print(f"  - Face embeddings: 512D ÃƒÂ¢Ã¢â‚¬Â Ã¢â‚¬â„¢ 16x2048D")
         return image_proj_model, True
@@ -297,37 +316,19 @@ def setup_ip_adapter(pipe, image_encoder):
 def setup_compel(pipe):
-    """Setup Compel for better SDXL prompt handling with robust error handling."""
     print("Setting up Compel for enhanced prompt processing...")
     try:
-        # FIXED: Handle SDXL dual tokenizer setup more carefully
         compel = Compel(
             tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
             text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
             returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
-            requires_pooled=[False, True],
-            padding_get_round_multiple=False  # Disable padding that might cause mismatches
         )
-        print("  [OK] Compel loaded successfully with SDXL dual tokenizers")
         return compel, True
-    except TypeError:
-        # Fallback for older Compel versions without padding parameter
-        try:
-            compel = Compel(
-                tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
-                text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
-                returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
-                requires_pooled=[False, True]
-            )
-            print("  [OK] Compel loaded (standard config)")
-            return compel, True
-        except Exception as e:
-            print(f"  [WARNING] Compel not available: {e}")
-            print("  [INFO] Will use standard prompt encoding instead")
-            return None, False
     except Exception as e:
         print(f"  [WARNING] Compel not available: {e}")
-        print("  [INFO] Will use standard prompt encoding instead")
         return None, False

 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
+from controlnet_aux import ZoeDetector, OpenposeDetector  # <-- NEW
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
 def load_depth_detector():
+    """Load Zoe Depth detector."""
+    print("Loading Zoe Depth detector...")
     try:
+        zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
+        zoe_depth.to(device)
+        print("  [OK] Zoe Depth loaded successfully")
+        return zoe_depth, True
     except Exception as e:
+        print(f"  [WARNING] Zoe Depth not available: {e}")
+        return None, False
+# --- NEW FUNCTION ---
+def load_openpose_detector():
+    """Load OpenPose detector."""
+    print("Loading OpenPose detector...")
+    try:
+        openpose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
+        openpose.to(device)
+        print("  [OK] OpenPose loaded successfully")
+        return openpose, True
+    except Exception as e:
+        print(f"  [WARNING] OpenPose not available: {e}")
+        return None, False
+# --- END NEW FUNCTION ---
 def load_controlnets():
     """Load ControlNet models."""
         torch_dtype=dtype
     ).to(device)
     print("  [OK] ControlNet Depth loaded")
+    # --- NEW: Load OpenPose ControlNet ---
+    print("Loading ControlNet OpenPose model...")
+    try:
+        controlnet_openpose = ControlNetModel.from_pretrained(
+            "diffusers/controlnet-openpose-sdxl-1.0",
+            torch_dtype=dtype
+        ).to(device)
+        print("  [OK] ControlNet OpenPose loaded")
+    except Exception as e:
+        print(f"  [WARNING] ControlNet OpenPose not available: {e}")
+        controlnet_openpose = None
+    # --- END NEW ---
     print("Loading InstantID ControlNet...")
     try:
             torch_dtype=dtype
         ).to(device)
         print("  [OK] InstantID ControlNet loaded successfully")
+        # Return all three models
+        return controlnet_depth, controlnet_instantid, controlnet_openpose, True
     except Exception as e:
         print(f"  [WARNING] InstantID ControlNet not available: {e}")
+        # Return models, indicating InstantID failure
+        return controlnet_depth, None, controlnet_openpose, False
 def load_image_encoder():
         pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_single_file(
             model_path,
+            controlnet=controlnets,  # Pass the list of 3 controlnets
             torch_dtype=dtype,
             use_safetensors=True
         ).to(device)
         print("  Using default SDXL base model")
         pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
             "stabilityai/stable-diffusion-xl-base-1.0",
+            controlnet=controlnets,  # Pass the list of 3 controlnets
             torch_dtype=dtype,
             use_safetensors=True
         ).to(device)
     print("Loading LORA (retroart) from HuggingFace Hub...")
     try:
         lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
+        pipe.load_lora_weights(lora_path, adapter_name="retroart")
         print(f"  [OK] LORA loaded successfully")
         return True
     except Exception as e:
         print("  [OK] IP-Adapter fully loaded with InstantID architecture")
         print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
+        print(f"  - Face embeddings: 512D -> 16x2048D")
         return image_proj_model, True
 def setup_compel(pipe):
+    """Setup Compel for better SDXL prompt handling."""
     print("Setting up Compel for enhanced prompt processing...")
     try:
         compel = Compel(
             tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
             text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
             returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+            requires_pooled=[False, True]
         )
+        print("  [OK] Compel loaded successfully")
         return compel, True
     except Exception as e:
         print(f"  [WARNING] Compel not available: {e}")
         return None, False