pixagram-dev

Runtime error

App Files Files Community

primerz commited on Nov 1

Commit

cec85e0

verified ·

1 Parent(s): 58466f2

Update models.py

Browse files

Files changed (1) hide show

models.py +38 -36

models.py CHANGED Viewed

@@ -70,8 +70,10 @@ def load_face_analysis():
     try:
         antelope_download = snapshot_download(repo_id="DIAMONIK7777/antelopev2", local_dir="/data/models/antelopev2")
-        face_app = FaceAnalysis(name='antelopev2', root='/data', providers=['CPUExecutionProvider']) # Changed from CUDA to CPU
         face_app.prepare(ctx_id=0, det_size=(640, 640))
         return face_app, True
     except Exception as e:
@@ -91,9 +93,10 @@ def load_depth_detector():
     # Try LeresDetector first (best quality)
     try:
         print("  Attempting LeresDetector (highest quality)...")
         leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
-        # leres_depth.to(device)
-        print("  [OK] LeresDetector loaded successfully")
         return leres_depth, 'leres', True
     except Exception as e:
         print(f"  [INFO] LeresDetector not available: {e}")
@@ -101,9 +104,10 @@ def load_depth_detector():
     # Fallback to ZoeDetector
     try:
         print("  Attempting ZoeDetector (fallback #1)...")
         zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
-        # zoe_depth.to(device)
-        print("  [OK] ZoeDetector loaded successfully")
         return zoe_depth, 'zoe', True
     except Exception as e:
         print(f"  [INFO] ZoeDetector not available: {e}")
@@ -111,9 +115,10 @@ def load_depth_detector():
     # Final fallback to MidasDetector
     try:
         print("  Attempting MidasDetector (fallback #2)...")
         midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
-        # midas_depth.to(device)
-        print("  [OK] MidasDetector loaded successfully")
         return midas_depth, 'midas', True
     except Exception as e:
         print(f"  [WARNING] MidasDetector not available: {e}")
@@ -126,9 +131,10 @@ def load_openpose_detector():
     """Load OpenPose detector."""
     print("Loading OpenPose detector...")
     try:
         openpose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
-        # openpose.to(device)
-        print("  [OK] OpenPose loaded successfully")
         return openpose, True
     except Exception as e:
         print(f"  [WARNING] OpenPose not available: {e}")
@@ -151,20 +157,22 @@ def load_mediapipe_face_detector():
 def load_controlnets():
     """Load ControlNet models."""
     print("Loading ControlNet Zoe Depth model...")
     controlnet_depth = ControlNetModel.from_pretrained(
         "xinsir/controlnet-depth-sdxl-1.0",
         torch_dtype=dtype
-    )#.to(device)
-    print("  [OK] ControlNet Depth loaded")
     # --- NEW: Load OpenPose ControlNet ---
     print("Loading ControlNet OpenPose model...")
     try:
         controlnet_openpose = ControlNetModel.from_pretrained(
             "xinsir/controlnet-openpose-sdxl-1.0",
             torch_dtype=dtype
-        )#.to(device)
-        print("  [OK] ControlNet OpenPose loaded")
     except Exception as e:
         print(f"  [WARNING] ControlNet OpenPose not available: {e}")
         controlnet_openpose = None
@@ -172,12 +180,13 @@ def load_controlnets():
     print("Loading InstantID ControlNet...")
     try:
         controlnet_instantid = ControlNetModel.from_pretrained(
             "InstantX/InstantID",
             subfolder="ControlNetModel",
             torch_dtype=dtype
-        )#.to(device)
-        print("  [OK] InstantID ControlNet loaded successfully")
         # Return all three models
         return controlnet_depth, controlnet_instantid, controlnet_openpose, True
     except Exception as e:
@@ -190,12 +199,13 @@ def load_image_encoder():
     """Load CLIP Image Encoder for IP-Adapter."""
     print("Loading CLIP Image Encoder for IP-Adapter...")
     try:
         image_encoder = CLIPVisionModelWithProjection.from_pretrained(
             "h94/IP-Adapter",
             subfolder="models/image_encoder",
             torch_dtype=dtype
-        )#.to(device)
-        print("  [OK] CLIP Image Encoder loaded successfully")
         return image_encoder
     except Exception as e:
         print(f"  [ERROR] Could not load image encoder: {e}")
@@ -213,7 +223,7 @@ def load_sdxl_pipeline(controlnets):
             controlnet=controlnets,
             torch_dtype=dtype,
             use_safetensors=True
-        ).to(device)
         print("  [OK] Custom checkpoint loaded successfully (VAE bundled)")
         return pipe, True
     except Exception as e:
@@ -224,7 +234,7 @@ def load_sdxl_pipeline(controlnets):
             controlnet=controlnets,
             torch_dtype=dtype,
             use_safetensors=True
-        ).to(device)
         return pipe, False
@@ -399,22 +409,12 @@ def setup_scheduler(pipe):
 def optimize_pipeline(pipe):
     """Apply optimizations to pipeline."""
-    # Try to enable xformers
-    if device == "cuda":
-        try:
-            pipe.enable_xformers_memory_efficient_attention()
-            print("  [OK] xformers enabled")
-        except Exception as e:
-            print(f"  [INFO] xformers not available: {e}")
-    # Enable CPU offloading for VRAM-constrained environments
-    print("  [OK] Enabling model CPU offloading...")
-    pipe.enable_model_cpu_offload()
     # Try to enable xformers
     if device == "cuda":
         try:
-            pipe.enable_xformers_memory_efficient_attention()
             print("  [OK] xformers enabled")
         except Exception as e:
             print(f"  [INFO] xformers not available: {e}")
@@ -433,11 +433,12 @@ def load_caption_model():
         print("  Attempting GIT-Large (recommended)...")
         caption_processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
         caption_model = AutoModelForCausalLM.from_pretrained(
             "microsoft/git-large-coco",
             torch_dtype=dtype
-        )#.to(device)
-        print("  [OK] GIT-Large model loaded (produces detailed captions)")
         return caption_processor, caption_model, True, 'git'
     except Exception as e1:
         print(f"  [INFO] GIT-Large not available: {e1}")
@@ -448,11 +449,12 @@ def load_caption_model():
             print("  Attempting BLIP base (fallback)...")
             caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
             caption_model = BlipForConditionalGeneration.from_pretrained(
                 "Salesforce/blip-image-captioning-base",
                 torch_dtype=dtype
-            )#.to(device)
-            print("  [OK] BLIP base model loaded (standard captions)")
             return caption_processor, caption_model, True, 'blip'
         except Exception as e2:
             print(f"  [WARNING] Caption models not available: {e2}")
@@ -463,7 +465,7 @@ def load_caption_model():
 def set_clip_skip(pipe):
     """Set CLIP skip value."""
     if hasattr(pipe, 'text_encoder'):
-        print("  [OK] CLIP skip set to {CLIP_SKIP}")
 print("[OK] Model loading functions ready")

     try:
         antelope_download = snapshot_download(repo_id="DIAMONIK7777/antelopev2", local_dir="/data/models/antelopev2")
+        # --- FIX: Load InsightFace on CPU to save VRAM ---
+        face_app = FaceAnalysis(name='antelopev2', root='/data', providers=['CPUExecutionProvider'])
         face_app.prepare(ctx_id=0, det_size=(640, 640))
+        print("  [OK] Face analysis loaded (on CPU)")
         return face_app, True
     except Exception as e:
     # Try LeresDetector first (best quality)
     try:
         print("  Attempting LeresDetector (highest quality)...")
+        # --- FIX: Load on CPU ---
         leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
+        # leres_depth.to(device) # Removed
+        print("  [OK] LeresDetector loaded successfully (on CPU)")
         return leres_depth, 'leres', True
     except Exception as e:
         print(f"  [INFO] LeresDetector not available: {e}")
     # Fallback to ZoeDetector
     try:
         print("  Attempting ZoeDetector (fallback #1)...")
+        # --- FIX: Load on CPU ---
         zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
+        # zoe_depth.to(device) # Removed
+        print("  [OK] ZoeDetector loaded successfully (on CPU)")
         return zoe_depth, 'zoe', True
     except Exception as e:
         print(f"  [INFO] ZoeDetector not available: {e}")
     # Final fallback to MidasDetector
     try:
         print("  Attempting MidasDetector (fallback #2)...")
+        # --- FIX: Load on CPU ---
         midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
+        # midas_depth.to(device) # Removed
+        print("  [OK] MidasDetector loaded successfully (on CPU)")
         return midas_depth, 'midas', True
     except Exception as e:
         print(f"  [WARNING] MidasDetector not available: {e}")
     """Load OpenPose detector."""
     print("Loading OpenPose detector...")
     try:
+        # --- FIX: Load on CPU ---
         openpose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
+        # openpose.to(device) # Removed
+        print("  [OK] OpenPose loaded successfully (on CPU)")
         return openpose, True
     except Exception as e:
         print(f"  [WARNING] OpenPose not available: {e}")
 def load_controlnets():
     """Load ControlNet models."""
     print("Loading ControlNet Zoe Depth model...")
+    # --- FIX: Load core models on GPU ---
     controlnet_depth = ControlNetModel.from_pretrained(
         "xinsir/controlnet-depth-sdxl-1.0",
         torch_dtype=dtype
+    ).to(device)
+    print("  [OK] ControlNet Depth loaded (on GPU)")
     # --- NEW: Load OpenPose ControlNet ---
     print("Loading ControlNet OpenPose model...")
     try:
+        # --- FIX: Load core models on GPU ---
         controlnet_openpose = ControlNetModel.from_pretrained(
             "xinsir/controlnet-openpose-sdxl-1.0",
             torch_dtype=dtype
+        ).to(device)
+        print("  [OK] ControlNet OpenPose loaded (on GPU)")
     except Exception as e:
         print(f"  [WARNING] ControlNet OpenPose not available: {e}")
         controlnet_openpose = None
     print("Loading InstantID ControlNet...")
     try:
+        # --- FIX: Load core models on GPU ---
         controlnet_instantid = ControlNetModel.from_pretrained(
             "InstantX/InstantID",
             subfolder="ControlNetModel",
             torch_dtype=dtype
+        ).to(device)
+        print("  [OK] InstantID ControlNet loaded successfully (on GPU)")
         # Return all three models
         return controlnet_depth, controlnet_instantid, controlnet_openpose, True
     except Exception as e:
     """Load CLIP Image Encoder for IP-Adapter."""
     print("Loading CLIP Image Encoder for IP-Adapter...")
     try:
+        # --- FIX: Load core models on GPU ---
         image_encoder = CLIPVisionModelWithProjection.from_pretrained(
             "h94/IP-Adapter",
             subfolder="models/image_encoder",
             torch_dtype=dtype
+        ).to(device)
+        print("  [OK] CLIP Image Encoder loaded successfully (on GPU)")
         return image_encoder
     except Exception as e:
         print(f"  [ERROR] Could not load image encoder: {e}")
             controlnet=controlnets,
             torch_dtype=dtype,
             use_safetensors=True
+        ).to(device) # This main pipe MUST be on device
         print("  [OK] Custom checkpoint loaded successfully (VAE bundled)")
         return pipe, True
     except Exception as e:
             controlnet=controlnets,
             torch_dtype=dtype,
             use_safetensors=True
+        ).to(device) # This main pipe MUST be on device
         return pipe, False
 def optimize_pipeline(pipe):
     """Apply optimizations to pipeline."""
+    # --- FIX: Removed enable_model_cpu_offload() ---
     # Try to enable xformers
     if device == "cuda":
         try:
+            pipe.enable_xformfiers_memory_efficient_attention()
             print("  [OK] xformers enabled")
         except Exception as e:
             print(f"  [INFO] xformers not available: {e}")
         print("  Attempting GIT-Large (recommended)...")
         caption_processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
+        # --- FIX: Load on CPU ---
         caption_model = AutoModelForCausalLM.from_pretrained(
             "microsoft/git-large-coco",
             torch_dtype=dtype
+        ) # .to(device) removed
+        print("  [OK] GIT-Large model loaded (produces detailed captions, on CPU)")
         return caption_processor, caption_model, True, 'git'
     except Exception as e1:
         print(f"  [INFO] GIT-Large not available: {e1}")
             print("  Attempting BLIP base (fallback)...")
             caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+            # --- FIX: Load on CPU ---
             caption_model = BlipForConditionalGeneration.from_pretrained(
                 "Salesforce/blip-image-captioning-base",
                 torch_dtype=dtype
+            ) # .to(device) removed
+            print("  [OK] BLIP base model loaded (standard captions, on CPU)")
             return caption_processor, caption_model, True, 'blip'
         except Exception as e2:
             print(f"  [WARNING] Caption models not available: {e2}")
 def set_clip_skip(pipe):
     """Set CLIP skip value."""
     if hasattr(pipe, 'text_encoder'):
+        print(f"  [OK] CLIP skip set to {CLIP_SKIP}")
 print("[OK] Model loading functions ready")