pixagram-neo-backup

Runtime error

App Files Files Community

primerz commited on Nov 2

Commit

d4170e9

verified ·

1 Parent(s): 23acfdf

Update models.py

Browse files

Files changed (1) hide show

models.py +27 -42

models.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 Model loading and initialization for Pixagram AI Pixel Art Generator
-UPDATED VERSION with proper InstantID pipeline integration
 """
 import torch
 import time
@@ -9,20 +9,19 @@ from diffusers import (
     AutoencoderKL,
     LCMScheduler
 )
-from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
 from controlnet_aux import ZoeDetector
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
-# Use InstantID pipeline (replaces manual IP-Adapter setup)
 from pipeline_stable_diffusion_xl_instantid_img2img import (
     StableDiffusionXLInstantIDImg2ImgPipeline
 )
 from config import (
     device, dtype, MODEL_REPO, MODEL_FILES, HUGGINGFACE_TOKEN,
-    FACE_DETECTION_CONFIG, CLIP_SKIP, DOWNLOAD_CONFIG, INSTANTID_CONFIG
 )
@@ -66,7 +65,7 @@ def load_face_analysis():
     try:
         face_app = FaceAnalysis(
             name=FACE_DETECTION_CONFIG['model_name'],
-            root=FACE_DETECTION_CONFIG.get('root', './models/insightface'),
             providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
         )
         face_app.prepare(
@@ -95,34 +94,24 @@ def load_depth_detector():
 def load_controlnets():
     """
-    Load both ControlNets for InstantID pipeline.
-    Returns tuple: (identitynet, depthnet)
-    Both are required for proper face preservation.
     """
     print("Loading InstantID ControlNet...")
-    try:
-        controlnet_instantid = ControlNetModel.from_pretrained(
-            INSTANTID_CONFIG['repo'],
-            subfolder=INSTANTID_CONFIG['controlnet_subfolder'],
-            torch_dtype=dtype
-        ).to(device)
-        print("  [OK] InstantID ControlNet loaded")
-    except Exception as e:
-        print(f"  [ERROR] InstantID ControlNet failed: {e}")
-        raise
     print("Loading Zoe Depth ControlNet...")
-    try:
-        controlnet_depth = ControlNetModel.from_pretrained(
-            "diffusers/controlnet-zoe-depth-sdxl-1.0",
-            torch_dtype=dtype
-        ).to(device)
-        print("  [OK] Zoe Depth ControlNet loaded")
-    except Exception as e:
-        print(f"  [ERROR] Depth ControlNet failed: {e}")
-        raise
-    # Return in order: InstantID first, Depth second
     return controlnet_instantid, controlnet_depth
@@ -138,23 +127,21 @@ def load_sdxl_pipeline(controlnets):
         # Use InstantID-enabled pipeline
         pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_single_file(
             model_path,
-            controlnet=controlnets,  # MUST be list of 2 ControlNets
             torch_dtype=dtype,
             use_safetensors=True
         ).to(device)
-        # Load IP-Adapter weights
         print("Loading IP-Adapter for InstantID...")
         ip_adapter_path = download_model_with_retry(
-            INSTANTID_CONFIG['repo'],
-            INSTANTID_CONFIG['ip_adapter_file']
         )
         pipe.load_ip_adapter_instantid(ip_adapter_path)
-        pipe.set_ip_adapter_scale(INSTANTID_CONFIG['default_ip_scale'])
         print("  [OK] InstantID pipeline loaded successfully")
-        print(f"  - IP-Adapter scale: {INSTANTID_CONFIG['default_ip_scale']}")
-        print(f"  - ControlNets: InstantID + Depth")
         return pipe, True
     except Exception as e:
@@ -163,7 +150,7 @@ def load_sdxl_pipeline(controlnets):
         traceback.print_exc()
         # Fallback to standard pipeline
-        print("  Falling back to standard SDXL pipeline (no face preservation)")
         from diffusers import StableDiffusionXLControlNetImg2ImgPipeline
         pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
             "stabilityai/stable-diffusion-xl-base-1.0",
@@ -213,7 +200,6 @@ def setup_scheduler(pipe):
 def optimize_pipeline(pipe):
     """Apply optimizations to pipeline."""
-    # Try to enable xformers
     if device == "cuda":
         try:
             pipe.enable_xformers_memory_efficient_attention()
@@ -229,7 +215,7 @@ def load_caption_model():
     """
     print("Loading caption model...")
-    # Try GIT-Large first (good balance of quality and compatibility)
     try:
         from transformers import AutoProcessor, AutoModelForCausalLM
@@ -239,7 +225,7 @@ def load_caption_model():
             "microsoft/git-large-coco",
             torch_dtype=dtype
         ).to(device)
-        print("  [OK] GIT-Large model loaded (produces detailed captions)")
         return caption_processor, caption_model, True, 'git'
     except Exception as e1:
         print(f"  [INFO] GIT-Large not available: {e1}")
@@ -254,11 +240,10 @@ def load_caption_model():
                 "Salesforce/blip-image-captioning-base",
                 torch_dtype=dtype
             ).to(device)
-            print("  [OK] BLIP base model loaded (standard captions)")
             return caption_processor, caption_model, True, 'blip'
         except Exception as e2:
             print(f"  [WARNING] Caption models not available: {e2}")
-            print("  Caption generation will be disabled")
             return None, None, False, 'none'
@@ -268,4 +253,4 @@ def set_clip_skip(pipe):
         print(f"  [OK] CLIP skip set to {CLIP_SKIP}")
-print("[OK] Model loading functions ready (InstantID pipeline)")

 """
 Model loading and initialization for Pixagram AI Pixel Art Generator
+UPDATED VERSION with proper InstantID pipeline support
 """
 import torch
 import time
     AutoencoderKL,
     LCMScheduler
 )
 from insightface.app import FaceAnalysis
 from controlnet_aux import ZoeDetector
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
+# Use InstantID pipeline
 from pipeline_stable_diffusion_xl_instantid_img2img import (
     StableDiffusionXLInstantIDImg2ImgPipeline
 )
 from config import (
     device, dtype, MODEL_REPO, MODEL_FILES, HUGGINGFACE_TOKEN,
+    FACE_DETECTION_CONFIG, CLIP_SKIP, DOWNLOAD_CONFIG
 )
     try:
         face_app = FaceAnalysis(
             name=FACE_DETECTION_CONFIG['model_name'],
+            root='./models/insightface',
             providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
         )
         face_app.prepare(
 def load_controlnets():
     """
+    Load ControlNets for InstantID pipeline.
+    Returns both ControlNets (InstantID first, then Depth).
     """
     print("Loading InstantID ControlNet...")
+    controlnet_instantid = ControlNetModel.from_pretrained(
+        "InstantX/InstantID",
+        subfolder="ControlNetModel",
+        torch_dtype=dtype
+    ).to(device)
+    print("  [OK] InstantID ControlNet loaded")
     print("Loading Zoe Depth ControlNet...")
+    controlnet_depth = ControlNetModel.from_pretrained(
+        "diffusers/controlnet-zoe-depth-sdxl-1.0",
+        torch_dtype=dtype
+    ).to(device)
+    print("  [OK] Zoe Depth ControlNet loaded")
     return controlnet_instantid, controlnet_depth
         # Use InstantID-enabled pipeline
         pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_single_file(
             model_path,
+            controlnet=controlnets,
             torch_dtype=dtype,
             use_safetensors=True
         ).to(device)
+        # Load IP-Adapter weights for InstantID
         print("Loading IP-Adapter for InstantID...")
         ip_adapter_path = download_model_with_retry(
+            "InstantX/InstantID",
+            "ip-adapter.bin"
         )
         pipe.load_ip_adapter_instantid(ip_adapter_path)
+        pipe.set_ip_adapter_scale(0.8)  # Default scale
         print("  [OK] InstantID pipeline loaded successfully")
         return pipe, True
     except Exception as e:
         traceback.print_exc()
         # Fallback to standard pipeline
+        print("  Falling back to standard SDXL pipeline (no InstantID)")
         from diffusers import StableDiffusionXLControlNetImg2ImgPipeline
         pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
             "stabilityai/stable-diffusion-xl-base-1.0",
 def optimize_pipeline(pipe):
     """Apply optimizations to pipeline."""
     if device == "cuda":
         try:
             pipe.enable_xformers_memory_efficient_attention()
     """
     print("Loading caption model...")
+    # Try GIT-Large first
     try:
         from transformers import AutoProcessor, AutoModelForCausalLM
             "microsoft/git-large-coco",
             torch_dtype=dtype
         ).to(device)
+        print("  [OK] GIT-Large model loaded")
         return caption_processor, caption_model, True, 'git'
     except Exception as e1:
         print(f"  [INFO] GIT-Large not available: {e1}")
                 "Salesforce/blip-image-captioning-base",
                 torch_dtype=dtype
             ).to(device)
+            print("  [OK] BLIP base model loaded")
             return caption_processor, caption_model, True, 'blip'
         except Exception as e2:
             print(f"  [WARNING] Caption models not available: {e2}")
             return None, None, False, 'none'
         print(f"  [OK] CLIP skip set to {CLIP_SKIP}")
+print("[OK] Model loading functions ready")