pixagram-backup

Runtime error

App Files Files Community

primerz commited on Nov 6

Commit

70a37ed

verified ·

1 Parent(s): 82f7fe1

Update models.py

Browse files

Files changed (1) hide show

models.py +122 -168

models.py CHANGED Viewed

@@ -1,25 +1,34 @@
 """
 Model loading and initialization for Pixagram AI Pixel Art Generator
-FIXED VERSION with proper IP-Adapter and BLIP-2 support
 """
 import torch
 import time
 from diffusers import (
-    StableDiffusionXLControlNetImg2ImgPipeline,
     ControlNetModel,
     AutoencoderKL,
-    LCMScheduler
 )
 from diffusers.models.attention_processor import AttnProcessor2_0
-from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
-from controlnet_aux import ZoeDetector
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
-# Use reference implementation's attention processor
-from attention_processor import IPAttnProcessor2_0, AttnProcessor
-from resampler import Resampler
 from config import (
     device, dtype, MODEL_REPO, MODEL_FILES, HUGGINGFACE_TOKEN,
@@ -62,19 +71,19 @@ def download_model_with_retry(repo_id, filename, max_retries=None):
 def load_face_analysis():
-    """Load face analysis model with proper error handling."""
-    print("Loading face analysis model...")
     try:
         face_app = FaceAnalysis(
-            name=FACE_DETECTION_CONFIG['model_name'],
-            root='./models/insightface',
             providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
         )
         face_app.prepare(
-            ctx_id=FACE_DETECTION_CONFIG['ctx_id'],
-            det_size=FACE_DETECTION_CONFIG['det_size']
         )
-        print("  [OK] Face analysis model loaded successfully")
         return face_app, True
     except Exception as e:
         print(f"  [WARNING] Face detection not available: {e}")
@@ -82,89 +91,122 @@ def load_face_analysis():
 def load_depth_detector():
-    """Load Zoe Depth detector."""
-    print("Loading Zoe Depth detector...")
     try:
-        zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
-        zoe_depth.to(device)
-        print("  [OK] Zoe Depth loaded successfully")
-        return zoe_depth, True
     except Exception as e:
-        print(f"  [WARNING] Zoe Depth not available: {e}")
         return None, False
 def load_controlnets():
-    """Load ControlNet models."""
-    print("Loading ControlNet Zoe Depth model...")
     controlnet_depth = ControlNetModel.from_pretrained(
-        "diffusers/controlnet-zoe-depth-sdxl-1.0",
         torch_dtype=dtype
     ).to(device)
     print("  [OK] ControlNet Depth loaded")
-    print("Loading InstantID ControlNet...")
     try:
-        controlnet_instantid = ControlNetModel.from_pretrained(
-            "InstantX/InstantID",
-            subfolder="ControlNetModel",
             torch_dtype=dtype
         ).to(device)
-        print("  [OK] InstantID ControlNet loaded successfully")
-        return controlnet_depth, controlnet_instantid, True
     except Exception as e:
-        print(f"  [WARNING] InstantID ControlNet not available: {e}")
         return controlnet_depth, None, False
 def load_image_encoder():
-    """Load CLIP Image Encoder for IP-Adapter."""
-    print("Loading CLIP Image Encoder for IP-Adapter...")
-    try:
-        image_encoder = CLIPVisionModelWithProjection.from_pretrained(
-            "h94/IP-Adapter",
-            subfolder="models/image_encoder",
-            torch_dtype=dtype
-        ).to(device)
-        print("  [OK] CLIP Image Encoder loaded successfully")
-        return image_encoder
-    except Exception as e:
-        print(f"  [ERROR] Could not load image encoder: {e}")
-        return None
 def load_sdxl_pipeline(controlnets):
-    """Load SDXL checkpoint from HuggingFace Hub."""
-    print("Loading SDXL checkpoint (horizon) with bundled VAE from HuggingFace Hub...")
     try:
-        model_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['checkpoint'])
-        pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_single_file(
-            model_path,
-            controlnet=controlnets,
-            torch_dtype=dtype,
-            use_safetensors=True
         ).to(device)
-        print("  [OK] Custom checkpoint loaded successfully (VAE bundled)")
         return pipe, True
     except Exception as e:
-        print(f"  [WARNING] Could not load custom checkpoint: {e}")
-        print("  Using default SDXL base model")
-        pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0",
-            controlnet=controlnets,
-            torch_dtype=dtype,
-            use_safetensors=True
-        ).to(device)
-        return pipe, False
 def load_lora(pipe):
-    """Load LORA from HuggingFace Hub."""
     print("Loading LORA (retroart) from HuggingFace Hub...")
     try:
         lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
-        # **FIX 2: Add adapter_name="retroart"**
         pipe.load_lora_weights(lora_path, adapter_name="retroart")
         print(f"  [OK] LORA loaded successfully")
         return True
@@ -173,113 +215,25 @@ def load_lora(pipe):
         return False
-def setup_ip_adapter(pipe, image_encoder):
     """
-    Setup IP-Adapter for InstantID face embeddings - PROPER IMPLEMENTATION.
-    Based on the reference InstantID pipeline.
     """
-    if image_encoder is None:
-        return None, False
-    print("Setting up IP-Adapter for InstantID face embeddings (proper implementation)...")
     try:
-        # Download InstantID weights
-        ip_adapter_path = download_model_with_retry(
-            "InstantX/InstantID",
-            "ip-adapter.bin"
-        )
-        # Load full state dict
-        state_dict = torch.load(ip_adapter_path, map_location="cpu")
-        # Extract image_proj and ip_adapter weights
-        image_proj_state_dict = {}
-        ip_adapter_state_dict = {}
-        for key, value in state_dict.items():
-            if key.startswith("image_proj."):
-                image_proj_state_dict[key.replace("image_proj.", "")] = value
-            elif key.startswith("ip_adapter."):
-                ip_adapter_state_dict[key.replace("ip_adapter.", "")] = value
-        # Create Resampler (image projection model) with CORRECT parameters from reference
-        print("Creating Resampler (Perceiver architecture)...")
-        image_proj_model = Resampler(
-            dim=1280,                                       # Hidden dimension
-            depth=4,                                        # IMPORTANT: 4 layers (not 8!)
-            dim_head=64,                                    # Dimension per head
-            heads=20,                                       # Number of heads
-            num_queries=16,                                 # Number of output tokens
-            embedding_dim=512,                              # InsightFace embedding dim
-            output_dim=pipe.unet.config.cross_attention_dim,  # SDXL cross-attention dim (2048)
-            ff_mult=4                                       # Feedforward multiplier
         )
-        image_proj_model.eval()
-        image_proj_model = image_proj_model.to(device, dtype=dtype)
-        # Load image_proj weights
-        if image_proj_state_dict:
-            try:
-                image_proj_model.load_state_dict(image_proj_state_dict, strict=True)
-                print("  [OK] Resampler loaded with pretrained weights")
-            except Exception as e:
-                print(f"  [WARNING] Could not load Resampler weights: {e}")
-                print("  Using randomly initialized Resampler")
-        else:
-            print("  [WARNING] No image_proj weights found, using random initialization")
-        # Setup IP-Adapter attention processors
-        print("Setting up IP-Adapter attention processors...")
-        attn_procs = {}
-        num_tokens = 16  # Match Resampler num_queries
-        for name in pipe.unet.attn_processors.keys():
-            cross_attention_dim = None if name.endswith("attn1.processor") else pipe.unet.config.cross_attention_dim
-            if name.startswith("mid_block"):
-                hidden_size = pipe.unet.config.block_out_channels[-1]
-            elif name.startswith("up_blocks"):
-                block_id = int(name[len("up_blocks.")])
-                hidden_size = list(reversed(pipe.unet.config.block_out_channels))[block_id]
-            elif name.startswith("down_blocks"):
-                block_id = int(name[len("down_blocks.")])
-                hidden_size = pipe.unet.config.block_out_channels[block_id]
-            else:
-                hidden_size = pipe.unet.config.block_out_channels[-1]
-            if cross_attention_dim is None:
-                attn_procs[name] = AttnProcessor2_0()
-            else:
-                attn_procs[name] = IPAttnProcessor2_0(
-                    hidden_size=hidden_size,
-                    cross_attention_dim=cross_attention_dim,
-                    scale=1.0,
-                    num_tokens=num_tokens
-                ).to(device, dtype=dtype)
-        # Set attention processors
-        pipe.unet.set_attn_processor(attn_procs)
-        # Load IP-Adapter weights into attention processors
-        if ip_adapter_state_dict:
-            try:
-                ip_layers = torch.nn.ModuleList(pipe.unet.attn_processors.values())
-                ip_layers.load_state_dict(ip_adapter_state_dict, strict=False)
-                print("  [OK] IP-Adapter attention weights loaded")
-            except Exception as e:
-                print(f"  [WARNING] Could not load IP-Adapter weights: {e}")
-        else:
-            print("  [WARNING] No ip_adapter weights found")
-        # Store image encoder and projection model
-        pipe.image_encoder = image_encoder
-        print("  [OK] IP-Adapter fully loaded with InstantID architecture")
-        print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
-        print(f"  - Face embeddings: 512D â†’ 16x2048D")
-        return image_proj_model, True
     except Exception as e:
         print(f"  [ERROR] Could not setup IP-Adapter: {e}")
@@ -369,4 +323,4 @@ def set_clip_skip(pipe):
         print(f"  [OK] CLIP skip set to {CLIP_SKIP}")
-print("[OK] Model loading functions ready")

 """
 Model loading and initialization for Pixagram AI Pixel Art Generator
+HYBRID VERSION - Supports both local files and HuggingFace repos
+MODIFIED for IP-Adapter-FaceIDXL (non-plus) and LCM Scheduler
 """
 import torch
 import time
+import os
 from diffusers import (
     ControlNetModel,
     AutoencoderKL,
+    LCMScheduler,  # Changed back to LCM
+    StableDiffusionXLControlNetImg2ImgPipeline
 )
 from diffusers.models.attention_processor import AttnProcessor2_0
+from transformers import CLIPVisionModelWithProjection, pipeline
 from insightface.app import FaceAnalysis
+from controlnet_aux import LeresDetector, CannyDetector
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
+# Import the IP-Adapter wrapper classes
+try:
+    # Import base class and the specific SDXL class
+    from ip_adapter.ip_adapter_faceid import IPAdapterFaceID, IPAdapterFaceIDXL
+except ImportError:
+    print("="*80)
+    print("[FATAL ERROR] `ip_adapter` library not found.")
+    print("Please install it: pip install ip-adapter")
+    print("="*80)
+    raise
 from config import (
     device, dtype, MODEL_REPO, MODEL_FILES, HUGGINGFACE_TOKEN,
 def load_face_analysis():
+    """Load face analysis model (buffalo_l) with proper error handling."""
+    print("Loading face analysis model (buffalo_l)...")
     try:
         face_app = FaceAnalysis(
+            name='buffalo_l',  # Changed from antelopev2
+            root='/data',
             providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
         )
         face_app.prepare(
+            ctx_id=0,
+            det_size=(640, 640)
         )
+        print("  [OK] Face analysis model (buffalo_l) loaded successfully")
         return face_app, True
     except Exception as e:
         print(f"  [WARNING] Face detection not available: {e}")
 def load_depth_detector():
+    """Load LeReS++ Depth detector."""
+    print("Loading LeReS++ detector...")
     try:
+        leres = LeresDetector.from_pretrained("lllyasviel/Annotators")
+        leres.to(device)
+        print("  [OK] LeReS++ loaded successfully")
+        return leres, True
     except Exception as e:
+        print(f"  [WARNING] LeReS++ not available: {e}")
+        return None, False
+def load_canny_detector():
+    """Load Canny detector."""
+    print("Loading Canny detector...")
+    try:
+        canny = CannyDetector()
+        print("  [OK] Canny loaded successfully")
+        return canny, True
+    except Exception as e:
+        print(f"  [WARNING] Canny detector not available: {e}")
         return None, False
 def load_controlnets():
+    """Load ControlNet models for Depth and Canny."""
+    print("Loading ControlNet Depth model...")
     controlnet_depth = ControlNetModel.from_pretrained(
+        "diffusers/controlnet-depth-sdxl-1.0",  # Standard depth model
         torch_dtype=dtype
     ).to(device)
     print("  [OK] ControlNet Depth loaded")
+    print("Loading ControlNet Canny model...")
     try:
+        controlnet_canny = ControlNetModel.from_pretrained(
+            "diffusers/controlnet-canny-sdxl-1.0",
             torch_dtype=dtype
         ).to(device)
+        print("  [OK] ControlNet Canny loaded successfully")
+        return controlnet_depth, controlnet_canny, True
     except Exception as e:
+        print(f"  [WARNING] ControlNet Canny not available: {e}")
         return controlnet_depth, None, False
 def load_image_encoder():
+    """
+    [DEPRECATED] This function is no longer needed by IPAdapterFaceIDXL,
+    but we keep it here in case other components need it.
+    It will not be called by the generator.
+    """
+    print("Loading CLIP Image Encoder [SKIPPED - Not required by IPAdapterFaceIDXL]")
+    return None
 def load_sdxl_pipeline(controlnets):
+    """
+    Load SDXL checkpoint - MODIFIED for LCM and built-in VAE.
+    """
+    # --- VAE LOADING REMOVED ---
+    # We are using the VAE built into the "horizon" checkpoint.
+    print("Loading SDXL checkpoint (using built-in VAE)...")
+    pipeline_kwargs = {
+        "controlnet": controlnets,
+        "torch_dtype": dtype,
+        "use_safetensors": True,
+        # "vae": None, # <--- This line was correctly removed
+    }
+    # ATTEMPT 1: Try loading from local file (This should be your "horizon" checkpoint)
+    if MODEL_FILES.get('checkpoint'):
+        try:
+            print(f"  [Attempt 1] Loading from local file: {MODEL_FILES['checkpoint']}...")
+            model_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['checkpoint'])
+            if model_path and os.path.exists(model_path) and model_path.endswith('.safetensors'):
+                pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_single_file(
+                    model_path,
+                    **pipeline_kwargs
+                ).to(device)
+                print(f"  [OK] Checkpoint loaded from local file: {model_path}")
+                return pipe, True
+            else:
+                print(f"  [INFO] Local file not found or invalid...")
+        except Exception as e:
+            print(f"  [WARNING] from_single_file failed: {e}")
+    # ATTEMPT 2: Try loading from HuggingFace repo
     try:
+        print(f"  [Attempt 2] Loading from HuggingFace repo: {MODEL_REPO}...")
+        pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
+            MODEL_REPO,
+            **pipeline_kwargs
         ).to(device)
+        print(f"  [OK] Checkpoint loaded from HuggingFace repo: {MODEL_REPO}")
         return pipe, True
     except Exception as e:
+        print(f"  [WARNING] from_pretrained failed: {e}")
+    # ATTEMPT 3: Fallback (Base SDXL)
+    print(f"  [Attempt 3] Loading base SDXL model...")
+    pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
+        "stabilityai/stable-diffusion-xl-base-1.0",
+        **pipeline_kwargs
+    ).to(device)
+    print("  [OK] Base SDXL model loaded")
+    return pipe, False
 def load_lora(pipe):
+    """Load LORA (retroart) from HuggingFace Hub."""
     print("Loading LORA (retroart) from HuggingFace Hub...")
     try:
         lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
         pipe.load_lora_weights(lora_path, adapter_name="retroart")
         print(f"  [OK] LORA loaded successfully")
         return True
         return False
+def setup_ip_adapter(pipe):
     """
+    Setup IP-Adapter-FaceIDXL wrapper.
+    [FIXED] Does not take image_encoder_path.
     """
+    print("Setting up IP-Adapter-FaceIDXL...")
     try:
+        # Download the SDXL non-plus FaceID model
+        ip_ckpt_path = hf_hub_download(
+            repo_id="h94/IP-Adapter-FaceID",
+            filename="ip-adapter-faceid_sdxl.bin",
+            token=HUGGINGFACE_TOKEN
         )
+        # --- [FIX] Instantiate without image_encoder_path ---
+        ip_model = IPAdapterFaceIDXL(pipe, ip_ckpt_path, device)
+        print("  [OK] IPAdapterFaceIDXL wrapper initialized successfully.")
+        return ip_model, True
     except Exception as e:
         print(f"  [ERROR] Could not setup IP-Adapter: {e}")
         print(f"  [OK] CLIP skip set to {CLIP_SKIP}")
+print("[OK] Model loading functions ready (IP-Adapter-FaceIDXL / LCM VERSION)")