pixagram-dev

Runtime error

App Files Files Community

primerz commited on Nov 1

Commit

f4d10c2

verified ·

1 Parent(s): a6caf7b

Update models.py

Browse files

Files changed (1) hide show

models.py +100 -54

models.py CHANGED Viewed

@@ -5,6 +5,7 @@ FIXED VERSION with proper IP-Adapter and BLIP-2 support
 import torch
 import time
 import os
 from diffusers import (
     StableDiffusionXLControlNetImg2ImgPipeline,
     ControlNetModel,
@@ -14,8 +15,8 @@ from diffusers import (
 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
-from controlnet_aux import ZoeDetector, OpenposeDetector, LeresDetector, MidasDetector, MediapipeFaceDetector
-from huggingface_hub import hf_hub_download, snapshot_download
 from compel import Compel, ReturnedEmbeddingsType
 # Use reference implementation's attention processor
@@ -28,26 +29,24 @@ from config import (
 )
-def download_model_with_retry(repo_id, filename, max_retries=None):
     """Download model with retry logic and proper token handling."""
     if max_retries is None:
         max_retries = DOWNLOAD_CONFIG['max_retries']
     for attempt in range(max_retries):
         try:
             print(f"  Attempting to download {filename} (attempt {attempt + 1}/{max_retries})...")
-            kwargs = {"repo_type": "model"}
-            if HUGGINGFACE_TOKEN:
-                kwargs["token"] = HUGGINGFACE_TOKEN
-            path = hf_hub_download(
                 repo_id=repo_id,
                 filename=filename,
                 **kwargs
             )
-            print(f"  [OK] Downloaded: {filename}")
-            return path
         except Exception as e:
             print(f"  [WARNING] Download attempt {attempt + 1} failed: {e}")
@@ -63,46 +62,90 @@ def download_model_with_retry(repo_id, filename, max_retries=None):
 def load_face_analysis():
-    """Load face analysis model with proper error handling."""
     print("Loading face analysis model...")
-    model_name = FACE_DETECTION_CONFIG['model_name'] # "antelopev2"
-    local_model_root = '.' # We want files to be in ./antelopev2
     local_model_path = os.path.join(local_model_root, model_name)
     try:
-        # --- NEW: Download models from HF Hub ---
-        print(f"  Ensuring insightface models are downloaded to {local_model_path}...")
-        # Check if files exist before downloading
-        required_files = ["1k3d68.onnx", "2d106det.onnx", "genderage.onnx", "glintr100.onnx", "scrfd_10g_bnkps.onnx"]
-        files_exist = all(os.path.exists(os.path.join(local_model_path, f)) for f in required_files)
-        if not files_exist:
-            print(f"  Models not found locally, downloading from {MODEL_REPO}...")
-            try:
-                snapshot_kwargs = {
-                    "repo_id": MODEL_REPO,
-                    "allow_patterns": [f"{model_name}/*.onnx"],
-                    "local_dir": local_model_root,
-                    "local_dir_use_symlinks": False,
-                    "repo_type": "model"
-                }
-                if HUGGINGFACE_TOKEN:
-                    snapshot_kwargs["token"] = HUGGINGFACE_TOKEN
-                snapshot_download(**snapshot_kwargs)
-                print("  [OK] Downloaded insightface models.")
-            except Exception as e:
-                print(f"  [ERROR] Failed to download insightface models: {e}")
-                raise # Re-raise exception to be caught by the outer try-except
-        else:
-            print("  [OK] Insightface models found locally.")
         # --- END NEW ---
         face_app = FaceAnalysis(
-            name=model_name,
-            root=local_model_root, # This will look in ./antelopev2
             providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
         )
         face_app.prepare(
@@ -114,6 +157,8 @@ def load_face_analysis():
     except Exception as e:
         print(f"  [WARNING] Face detection not available: {e}")
         return None, False
@@ -242,11 +287,11 @@ def load_sdxl_pipeline(controlnets):
     """Load SDXL checkpoint from HuggingFace Hub."""
     print("Loading SDXL checkpoint (horizon) with bundled VAE from HuggingFace Hub...")
     try:
-        model_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['checkpoint'])
         pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_single_file(
             model_path,
-            controlnet=controlnets,  # Pass the list of 3 controlnets
             torch_dtype=dtype,
             use_safetensors=True
         ).to(device)
@@ -257,7 +302,7 @@ def load_sdxl_pipeline(controlnets):
         print("  Using default SDXL base model")
         pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
             "stabilityai/stable-diffusion-xl-base-1.0",
-            controlnet=controlnets,  # Pass the list of 3 controlnets
             torch_dtype=dtype,
             use_safetensors=True
         ).to(device)
@@ -282,7 +327,7 @@ def load_loras(pipe):
             continue
         try:
-            lora_path = download_model_with_retry(MODEL_REPO, filename)
             pipe.load_lora_weights(lora_path, adapter_name=adapter_name)
             print(f"  [OK] LORA loaded successfully: {filename} as '{adapter_name}'")
             loaded_loras[adapter_name] = True
@@ -310,7 +355,8 @@ def setup_ip_adapter(pipe, image_encoder):
         # Download InstantID weights
         ip_adapter_path = download_model_with_retry(
             "InstantX/InstantID",
-            "ip-adapter.bin"
         )
         # Load full state dict
@@ -329,14 +375,14 @@ def setup_ip_adapter(pipe, image_encoder):
         # Create Resampler (image projection model) with CORRECT parameters from reference
         print("Creating Resampler (Perceiver architecture)...")
         image_proj_model = Resampler(
-            dim=1280,                                       # Hidden dimension
-            depth=4,                                        # IMPORTANT: 4 layers (not 8!)
-            dim_head=64,                                    # Dimension per head
-            heads=20,                                       # Number of heads
-            num_queries=16,                                 # Number of output tokens
-            embedding_dim=512,                              # InsightFace embedding dim
-            output_dim=pipe.unet.config.cross_attention_dim,  # SDXL cross-attention dim (2048)
-            ff_mult=4                                       # Feedforward multiplier
         )
         image_proj_model.eval()
@@ -356,7 +402,7 @@ def setup_ip_adapter(pipe, image_encoder):
         # Setup IP-Adapter attention processors
         print("Setting up IP-Adapter attention processors...")
         attn_procs = {}
-        num_tokens = 16  # Match Resampler num_queries
         for name in pipe.unet.attn_processors.keys():
             cross_attention_dim = None if name.endswith("attn1.processor") else pipe.unet.config.cross_attention_dim
@@ -444,7 +490,7 @@ def optimize_pipeline(pipe):
             pipe.enable_xformers_memory_efficient_attention()
             print("  [OK] xformers enabled")
         except Exception as e:
-            print(f"  [INFO] xformers not available: {e}")
 def load_caption_model():

 import torch
 import time
 import os
+import shutil
 from diffusers import (
     StableDiffusionXLControlNetImg2ImgPipeline,
     ControlNetModel,
 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
+from controlnet_aux import ZoeDetector, OpenposeDetector, LeresDetector, MidasDetector, MedipeFaceDetector
+from huggingface_hub import hf_hub_download, HfHubDownloadConfig
 from compel import Compel, ReturnedEmbeddingsType
 # Use reference implementation's attention processor
 )
+def download_model_with_retry(repo_id, filename, max_retries=None, **kwargs):
     """Download model with retry logic and proper token handling."""
     if max_retries is None:
         max_retries = DOWNLOAD_CONFIG['max_retries']
+    # Ensure token is passed if available
+    if HUGGINGFACE_TOKEN and "token" not in kwargs:
+        kwargs["token"] = HUGGINGFACE_TOKEN
     for attempt in range(max_retries):
         try:
             print(f"  Attempting to download {filename} (attempt {attempt + 1}/{max_retries})...")
+            return hf_hub_download(
                 repo_id=repo_id,
                 filename=filename,
                 **kwargs
             )
         except Exception as e:
             print(f"  [WARNING] Download attempt {attempt + 1} failed: {e}")
 def load_face_analysis():
+    """
+    Load face analysis model with proper error handling.
+    This version downloads files manually to a custom folder
+    to bypass the insightface hard-coded zip download.
+    """
     print("Loading face analysis model...")
+    # Use a custom model name to prevent insightface from auto-downloading a zip
+    model_name = FACE_DETECTION_CONFIG['model_name'] # "pixagram_face_models"
+    local_model_root = '.' # We want files to be in ./pixagram_face_models
     local_model_path = os.path.join(local_model_root, model_name)
     try:
+        # --- NEW: Manual download logic ---
+        print(f"  Ensuring insightface models are present in {local_model_path}...")
+        os.makedirs(local_model_path, exist_ok=True)
+        required_files = [
+            "1k3d68.onnx",
+            "2d106det.onnx",
+            "genderage.onnx",
+            "glintr100.onnx",
+            "scrfd_10g_bnkps.onnx"
+        ]
+        # Download config to control download location
+        download_config = HfHubDownloadConfig(
+            local_dir=local_model_path,
+            local_dir_use_symlinks=False,
+            resume_download=True
+        )
+        for file_name in required_files:
+            local_file_path = os.path.join(local_model_path, file_name)
+            if not os.path.exists(local_file_path):
+                print(f"    Downloading {file_name}...")
+                # Path to the file in the HF model repo
+                repo_file_path = f"antelopev2/{file_name}"
+                try:
+                    # Download the file directly into our target folder
+                    downloaded_path = download_model_with_retry(
+                        repo_id=MODEL_REPO,
+                        filename=repo_file_path,
+                        local_dir=local_model_path,
+                        local_dir_use_symlinks=False,
+                        resume_download=True,
+                        repo_type="model"
+                    )
+                    # hf_hub_download *might* preserve folder structure,
+                    # e.g., saving to ./pixagram_face_models/antelopev2/genderage.onnx
+                    # We must move it if that happens.
+                    expected_download_path = os.path.join(local_model_path, *repo_file_path.split('/'))
+                    if os.path.exists(expected_download_path) and expected_download_path != local_file_path:
+                        print(f"    Moving {expected_download_path} to {local_file_path}")
+                        shutil.move(expected_download_path, local_file_path)
+                        # Clean up empty antelopev2 folder if it was created
+                        try:
+                            os.rmdir(os.path.dirname(expected_download_path))
+                        except OSError:
+                            pass # Not empty, which is fine
+                    elif not os.path.exists(local_file_path):
+                         # Fallback in case logic is wrong, just check the returned path
+                         if downloaded_path != local_file_path:
+                             print(f"    Moving {downloaded_path} to {local_file_path}")
+                             shutil.move(downloaded_path, local_file_path)
+                except Exception as e:
+                    print(f"    [ERROR] Failed to download {file_name}: {e}")
+                    raise # Re-raise to stop startup
+        print("  [OK] All insightface models are present locally.")
         # --- END NEW ---
         face_app = FaceAnalysis(
+            name=model_name,        # "pixagram_face_models" (custom name)
+            root=local_model_root,  # "." (looks in ./pixagram_face_models)
             providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
         )
         face_app.prepare(
     except Exception as e:
         print(f"  [WARNING] Face detection not available: {e}")
+        import traceback
+        traceback.print_exc()
         return None, False
     """Load SDXL checkpoint from HuggingFace Hub."""
     print("Loading SDXL checkpoint (horizon) with bundled VAE from HuggingFace Hub...")
     try:
+        model_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['checkpoint'], repo_type="model")
         pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_single_file(
             model_path,
+            controlnet=controlnets,
             torch_dtype=dtype,
             use_safetensors=True
         ).to(device)
         print("  Using default SDXL base model")
         pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
             "stabilityai/stable-diffusion-xl-base-1.0",
+            controlnet=controlnets,
             torch_dtype=dtype,
             use_safetensors=True
         ).to(device)
             continue
         try:
+            lora_path = download_model_with_retry(MODEL_REPO, filename, repo_type="model")
             pipe.load_lora_weights(lora_path, adapter_name=adapter_name)
             print(f"  [OK] LORA loaded successfully: {filename} as '{adapter_name}'")
             loaded_loras[adapter_name] = True
         # Download InstantID weights
         ip_adapter_path = download_model_with_retry(
             "InstantX/InstantID",
+            "ip-adapter.bin",
+            repo_type="model"
         )
         # Load full state dict
         # Create Resampler (image projection model) with CORRECT parameters from reference
         print("Creating Resampler (Perceiver architecture)...")
         image_proj_model = Resampler(
+            dim=1280,
+            depth=4,
+            dim_head=64,
+            heads=20,
+            num_queries=16,
+            embedding_dim=512,
+            output_dim=pipe.unet.config.cross_attention_dim,
+            ff_mult=4
         )
         image_proj_model.eval()
         # Setup IP-Adapter attention processors
         print("Setting up IP-Adapter attention processors...")
         attn_procs = {}
+        num_tokens = 16
         for name in pipe.unet.attn_processors.keys():
             cross_attention_dim = None if name.endswith("attn1.processor") else pipe.unet.config.cross_attention_dim
             pipe.enable_xformers_memory_efficient_attention()
             print("  [OK] xformers enabled")
         except Exception as e:
+            print(f"  [INFO] xformformers not available: {e}")
 def load_caption_model():