pixagram-neo-backup

Runtime error

App Files Files Community

primerz commited on Nov 5, 2025

Commit

bbcd03c

verified ·

1 Parent(s): f587361

Update models.py

Browse files

Files changed (1) hide show

models.py +20 -112

models.py CHANGED Viewed

@@ -116,20 +116,15 @@ def load_sdxl_pipeline(controlnets):
     """
     print("Loading pipeline...")
-    # Load VAE (line 128)
-    vae = AutoencoderKL.from_pretrained(
-        "madebyollin/sdxl-vae-fp16-fix",
-        torch_dtype=dtype
-    )
-    print("  [OK] VAE loaded")
-    # Create pipeline (line 134) - controlnets as LIST!
-    pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
-        "frankjoshua/albedobaseXL_v21",
-        vae=vae,
-        controlnet=controlnets,  # â† LIST [identitynet, zoedepthnet] - NO WRAPPER!
-        torch_dtype=dtype
-    )
     print("  [OK] Pipeline created with direct controlnet list")
     # LCM scheduler
@@ -309,120 +304,33 @@ def load_image_encoder():
         print(f"  [ERROR] Could not load image encoder: {e}")
         return None
-def setup_ip_adapter(pipe, image_encoder):
     """
-    Setup IP-Adapter for InstantID face embeddings - PROPER IMPLEMENTATION.
-    Based on the reference InstantID pipeline.
     """
-    if image_encoder is None:
-        return None, False
-    print("Setting up IP-Adapter for InstantID face embeddings (proper implementation)...")
     try:
         # Download InstantID weights
-        ip_adapter_path = download_model_with_retry(
             "InstantX/InstantID",
             "ip-adapter.bin"
         )
-        # Load full state dict
-        state_dict = torch.load(ip_adapter_path, map_location="cpu")
-        # Extract image_proj and ip_adapter weights
-        image_proj_state_dict = {}
-        ip_adapter_state_dict = {}
-        for key, value in state_dict.items():
-            if key.startswith("image_proj."):
-                image_proj_state_dict[key.replace("image_proj.", "")] = value
-            elif key.startswith("ip_adapter."):
-                ip_adapter_state_dict[key.replace("ip_adapter.", "")] = value
-        # Create Resampler (image projection model) with CORRECT parameters from reference
-        print("Creating Resampler (Perceiver architecture)...")
-        image_proj_model = Resampler(
-            dim=1280,                                       # Hidden dimension
-            depth=4,                                        # IMPORTANT: 4 layers (not 8!)
-            dim_head=64,                                    # Dimension per head
-            heads=20,                                       # Number of heads
-            num_queries=16,                                 # Number of output tokens
-            embedding_dim=512,                              # InsightFace embedding dim
-            output_dim=pipe.unet.config.cross_attention_dim,  # SDXL cross-attention dim (2048)
-            ff_mult=4                                       # Feedforward multiplier
-        )
-        image_proj_model.eval()
-        image_proj_model = image_proj_model.to(device, dtype=dtype)
-        # Load image_proj weights
-        if image_proj_state_dict:
-            try:
-                image_proj_model.load_state_dict(image_proj_state_dict, strict=True)
-                print("  [OK] Resampler loaded with pretrained weights")
-            except Exception as e:
-                print(f"  [WARNING] Could not load Resampler weights: {e}")
-                print("  Using randomly initialized Resampler")
-        else:
-            print("  [WARNING] No image_proj weights found, using random initialization")
-        # Setup IP-Adapter attention processors
-        print("Setting up IP-Adapter attention processors...")
-        attn_procs = {}
-        num_tokens = 16  # Match Resampler num_queries
-        for name in pipe.unet.attn_processors.keys():
-            cross_attention_dim = None if name.endswith("attn1.processor") else pipe.unet.config.cross_attention_dim
-            if name.startswith("mid_block"):
-                hidden_size = pipe.unet.config.block_out_channels[-1]
-            elif name.startswith("up_blocks"):
-                block_id = int(name[len("up_blocks.")])
-                hidden_size = list(reversed(pipe.unet.config.block_out_channels))[block_id]
-            elif name.startswith("down_blocks"):
-                block_id = int(name[len("down_blocks.")])
-                hidden_size = pipe.unet.config.block_out_channels[block_id]
-            else:
-                hidden_size = pipe.unet.config.block_out_channels[-1]
-            if cross_attention_dim is None:
-                attn_procs[name] = AttnProcessor2_0()
-            else:
-                attn_procs[name] = IPAttnProcessor2_0(
-                    hidden_size=hidden_size,
-                    cross_attention_dim=cross_attention_dim,
-                    scale=1.0,
-                    num_tokens=num_tokens
-                ).to(device, dtype=dtype)
-        # Set attention processors
-        pipe.unet.set_attn_processor(attn_procs)
-        # Load IP-Adapter weights into attention processors
-        if ip_adapter_state_dict:
-            try:
-                ip_layers = torch.nn.ModuleList(pipe.unet.attn_processors.values())
-                ip_layers.load_state_dict(ip_adapter_state_dict, strict=False)
-                print("  [OK] IP-Adapter attention weights loaded")
-            except Exception as e:
-                print(f"  [WARNING] Could not load IP-Adapter weights: {e}")
-        else:
-            print("  [WARNING] No ip_adapter weights found")
-        # Store image encoder and projection model
-        pipe.image_encoder = image_encoder
-        print("  [OK] IP-Adapter fully loaded with InstantID architecture")
-        print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
-        print(f"  - Face embeddings: 512D Ã¢â€ â€™ 16x2048D")
-        return image_proj_model, True
     except Exception as e:
         print(f"  [ERROR] Could not setup IP-Adapter: {e}")
         import traceback
         traceback.print_exc()
-        return None, False
 __all__ = ['draw_kps', 'fuse_lora_with_scale', 'load_image_encoder', 'setup_ip_adapter']

     """
     print("Loading pipeline...")
+    model_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['checkpoint'])
+    pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_single_file(
+        model_path,
+        controlnet=controlnets,
+        torch_dtype=dtype,
+        use_safetensors=True
+    );
     print("  [OK] Pipeline created with direct controlnet list")
     # LCM scheduler
         print(f"  [ERROR] Could not load image encoder: {e}")
         return None
+def setup_ip_adapter(pipe):
     """
+    Setup IP-Adapter for InstantID - SIMPLIFIED VERSION.
+    Uses the pipeline's built-in method like exampleapp.py.
     """
+    print("Setting up IP-Adapter for InstantID face embeddings...")
     try:
         # Download InstantID weights
+        face_adapter_path = download_model_with_retry(
             "InstantX/InstantID",
             "ip-adapter.bin"
         )
+        # Use the pipeline's built-in method (like exampleapp.py line 139)
+        pipe.load_ip_adapter_instantid(face_adapter_path)
+        # Set initial scale (like exampleapp.py line 140)
+        pipe.set_ip_adapter_scale(0.8)
+        print("  [OK] IP-Adapter loaded successfully with built-in method")
+        return True
     except Exception as e:
         print(f"  [ERROR] Could not setup IP-Adapter: {e}")
         import traceback
         traceback.print_exc()
+        return False
 __all__ = ['draw_kps', 'fuse_lora_with_scale', 'load_image_encoder', 'setup_ip_adapter']