pixagram-neo-backup

Runtime error

App Files Files Community

primerz commited on Nov 5

Commit

bfd74f2

verified ·

1 Parent(s): 432102c

Update models.py

Browse files

Files changed (1) hide show

models.py +215 -214

models.py CHANGED Viewed

@@ -1,30 +1,24 @@
 """
-Models.py - Following examplewithface.py EXACTLY
-NO MultiControlNetModel wrapper!
-Using diffusers LoRA system (examplewithface.py lines 266-267)
 """
 import torch
-torch.jit.script = lambda f: f  # Critical: Disable JIT for compatibility
 import time
 import os
 from diffusers import (
-    ControlNetModel,
-    AutoencoderKL,
-    DPMSolverMultistepScheduler,
-    LCMScheduler,
-    UNet2DConditionModel
 )
 from insightface.app import FaceAnalysis
 from controlnet_aux import ZoeDetector
-from huggingface_hub import hf_hub_download, snapshot_download
-from safetensors.torch import load_file
 from compel import Compel, ReturnedEmbeddingsType
-from pipeline_stable_diffusion_xl_instantid_img2img import (
-    StableDiffusionXLInstantIDImg2ImgPipeline,
-    draw_kps
-)
-from cog_sdxl_dataset_and_utils import TokenEmbeddingsHandler
 from config import (
     device, dtype, MODEL_REPO, MODEL_FILES, HUGGINGFACE_TOKEN,
@@ -33,206 +27,237 @@ from config import (
 def download_model_with_retry(repo_id, filename, max_retries=None):
     if max_retries is None:
         max_retries = DOWNLOAD_CONFIG['max_retries']
     for attempt in range(max_retries):
         try:
             kwargs = {"repo_type": "model"}
             if HUGGINGFACE_TOKEN:
                 kwargs["token"] = HUGGINGFACE_TOKEN
-            path = hf_hub_download(repo_id=repo_id, filename=filename, **kwargs)
             return path
         except Exception as e:
             if attempt < max_retries - 1:
                 time.sleep(DOWNLOAD_CONFIG['retry_delay'])
             else:
                 raise
     return None
 def load_face_analysis():
-    """examplewithface.py line 113"""
-    print("Loading face analysis...")
     try:
-        # Download antelopev2 model
-        snapshot_download(
-            repo_id="DIAMONIK7777/antelopev2",
-            local_dir="/data/models/antelopev2"
         )
-        # examplewithface.py line 113 pattern
-        app = FaceAnalysis(name='antelopev2', root='/data', providers=['CPUExecutionProvider'])
-        app.prepare(ctx_id=0, det_size=(640, 640))
-        print("  [OK] Face analysis loaded")
-        return app, True
     except Exception as e:
-        print(f"  [ERROR] Face analysis failed: {e}")
-        import traceback
-        traceback.print_exc()
         return None, False
 def load_depth_detector():
-    """examplewithface.py line 151-155"""
-    print("Loading Zoe Depth...")
     try:
-        zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
-        zoe.to(device)  # examplewithface.py line 155
-        print("  [OK] Zoe Depth loaded")
-        return zoe, True
     except Exception as e:
-        print(f"  [WARNING] Zoe unavailable: {e}")
         return None, False
 def load_controlnets():
-    """examplewithface.py lines 122-126"""
-    print("Loading ControlNets...")
-    # Load but don't move to device yet - pipe.to(device) will handle it
-    identitynet = ControlNetModel.from_pretrained(
-        "InstantX/InstantID",
-        subfolder="ControlNetModel",
-        torch_dtype=dtype
-    )
-    print("  [OK] InstantID ControlNet")
-    zoedepthnet = ControlNetModel.from_pretrained(
         "diffusers/controlnet-zoe-depth-sdxl-1.0",
         torch_dtype=dtype
-    )
-    print("  [OK] Zoe Depth ControlNet")
-    return identitynet, zoedepthnet
 def load_sdxl_pipeline(controlnets):
     """
-    examplewithface.py lines 128-145
-    CRITICAL: Pass controlnets as LIST - NO MultiControlNetModel!
     """
-    print("Loading pipeline...")
-    model_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['checkpoint'])
-    pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_single_file(
-        model_path,
-        controlnet=controlnets,
-        torch_dtype=dtype,
-        use_safetensors=True
-    );
-    print("  [OK] Pipeline created with direct controlnet list")
-    # LCM scheduler
-    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-    print("  [OK] LCM scheduler")
-    # IP-Adapter (line 139)
-    ip_adapter_path = download_model_with_retry("InstantX/InstantID", "ip-adapter.bin")
-    pipe.load_ip_adapter_instantid(ip_adapter_path)
-    pipe.set_ip_adapter_scale(0.8)
-    print("  [OK] IP-Adapter loaded")
-    # DEBUG: Check UNet configuration
-    print(f"  [DEBUG] UNet cross_attention_dim: {pipe.unet.config.cross_attention_dim}")
-    if hasattr(pipe, 'image_proj_model'):
-        print(f"  [DEBUG] Resampler output_dim: {pipe.image_proj_model.proj_out.out_features}")
-    else:
-        print(f"  [DEBUG] WARNING: No image_proj_model found!")
-    pipe = pipe.to(device)
-    # DEBUG: Check text_encoder type
-    print(f"  [DEBUG] type(pipe.text_encoder): {type(pipe.text_encoder)}")
-    print(f"  [DEBUG] isinstance(pipe.text_encoder, list): {isinstance(pipe.text_encoder, list)}")
-    if hasattr(pipe, 'text_encoder_2'):
-        print(f"  [DEBUG] type(pipe.text_encoder_2): {type(pipe.text_encoder_2)}")
-    print("  [OK] Pipeline ready (following examplewithface.py EXACTLY)")
-    return pipe, True
-# Global LoRA state
-lora_path_cached = None
 def load_lora(pipe):
-    """Download and store LoRA path - actual loading will be done by Kohya loader"""
-    print("Downloading LoRA...")
-    global lora_path_cached
     try:
         lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
-        lora_path_cached = lora_path
-        print(f"  [OK] LoRA path stored (will be loaded with Kohya loader during generation)")
         return True
     except Exception as e:
-        print(f"  [WARNING] LoRA download failed: {e}")
-        import traceback
-        traceback.print_exc()
         return False
-def fuse_lora_with_scale(pipe, lora_scale):
     """
-    Following examplewithface.py lines 266-267 EXACTLY:
-    pipe.load_lora_weights(loaded_state_dict)
-    pipe.fuse_lora(lora_scale)
-    Uses DIFFUSERS built-in LoRA (NOT Kohya lora.py!)
     """
-    global lora_path_cached
-    if lora_path_cached is None:
-        return False
     try:
-        # Unfuse previous LoRA (example line 259)
-        try:
-            pipe.unfuse_lora()
-        except:
-            pass
-        # Unload previous LoRA (example line 260)
-        try:
-            pipe.unload_lora_weights()
-        except:
-            pass
-        print(f"  [LORA] Loading state dict from file...")
-        # Load state dict like example (lines 75-78)
-        if lora_path_cached.endswith('.safetensors'):
-            from safetensors.torch import load_file
-            state_dict = load_file(lora_path_cached)
-        else:
-            state_dict = torch.load(lora_path_cached, map_location="cpu")
-        print(f"  [LORA] Loading weights into pipeline...")
-        # examplewithface.py line 266
-        pipe.load_lora_weights(state_dict)
-        # examplewithface.py line 267
-        print(f"  [LORA] Fusing with scale {lora_scale}...")
-        pipe.fuse_lora(lora_scale)
-        print(f"  [OK] LoRA fused into model (diffusers method)")
         return True
     except Exception as e:
-        print(f"  [ERROR] LoRA fusion failed: {e}")
         import traceback
         traceback.print_exc()
         return False
 def setup_compel(pipe):
-    """examplewithface.py line 145"""
-    print("Setting up Compel...")
     try:
         compel = Compel(
             tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
@@ -240,99 +265,75 @@ def setup_compel(pipe):
             returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
             requires_pooled=[False, True]
         )
-        print("  [OK] Compel ready")
         return compel, True
     except Exception as e:
-        print(f"  [WARNING] Compel unavailable: {e}")
         return None, False
 def setup_scheduler(pipe):
-    pass
 def optimize_pipeline(pipe):
     if device == "cuda":
         try:
             pipe.enable_xformers_memory_efficient_attention()
             print("  [OK] xformers enabled")
-        except:
-            pass
-    if hasattr(pipe, 'enable_vae_slicing'):
-        pipe.enable_vae_slicing()
-    if hasattr(pipe, 'enable_vae_tiling'):
-        pipe.enable_vae_tiling()
 def load_caption_model():
     print("Loading caption model...")
     try:
         from transformers import AutoProcessor, AutoModelForCausalLM
-        processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
-        model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco", torch_dtype=dtype).to("cpu")
-        print("  [OK] GIT-Large")
-        return processor, model, True, 'git'
-    except:
         try:
             from transformers import BlipProcessor, BlipForConditionalGeneration
-            processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-            model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=dtype).to("cpu")
-            print("  [OK] BLIP")
-            return processor, model, True, 'blip'
-        except:
             return None, None, False, 'none'
 def set_clip_skip(pipe):
     if hasattr(pipe, 'text_encoder'):
-        print(f"  [OK] CLIP skip {CLIP_SKIP}")
-def load_image_encoder():
-    """Load CLIP Image Encoder for IP-Adapter."""
-    print("Loading CLIP Image Encoder for IP-Adapter...")
-    try:
-        image_encoder = CLIPVisionModelWithProjection.from_pretrained(
-            "h94/IP-Adapter",
-            subfolder="models/image_encoder",
-            torch_dtype=dtype
-        ).to(device)
-        print("  [OK] CLIP Image Encoder loaded successfully")
-        return image_encoder
-    except Exception as e:
-        print(f"  [ERROR] Could not load image encoder: {e}")
-        return None
-def setup_ip_adapter(pipe):
-    """
-    Setup IP-Adapter for InstantID - SIMPLIFIED VERSION.
-    Uses the pipeline's built-in method like exampleapp.py.
-    """
-    print("Setting up IP-Adapter for InstantID face embeddings...")
-    try:
-        # Download InstantID weights
-        face_adapter_path = download_model_with_retry(
-            "InstantX/InstantID",
-            "ip-adapter.bin"
-        )
-        # Use the pipeline's built-in method (like exampleapp.py line 139)
-        pipe.load_ip_adapter_instantid(face_adapter_path)
-        # Set initial scale (like exampleapp.py line 140)
-        pipe.set_ip_adapter_scale(0.8)
-        print("  [OK] IP-Adapter loaded successfully with built-in method")
-        return True
-    except Exception as e:
-        print(f"  [ERROR] Could not setup IP-Adapter: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-__all__ = ['draw_kps', 'fuse_lora_with_scale', 'load_image_encoder', 'setup_ip_adapter']
-print("[OK] models.py ready - NO MultiControlNetModel, following examplewithface.py")

 """
+Model loading and initialization for Pixagram AI Pixel Art Generator
+HYBRID VERSION - Supports both local files and HuggingFace repos
 """
 import torch
 import time
 import os
 from diffusers import (
+    ControlNetModel,
+    AutoencoderKL,
+    LCMScheduler
 )
+from diffusers.models.attention_processor import AttnProcessor2_0
+from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
 from controlnet_aux import ZoeDetector
+from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
+# Import the custom pipeline that has load_ip_adapter_instantid method
+from pipeline_stable_diffusion_xl_instantid_img2img import StableDiffusionXLInstantIDImg2ImgPipeline
 from config import (
     device, dtype, MODEL_REPO, MODEL_FILES, HUGGINGFACE_TOKEN,
 def download_model_with_retry(repo_id, filename, max_retries=None):
+    """Download model with retry logic and proper token handling."""
     if max_retries is None:
         max_retries = DOWNLOAD_CONFIG['max_retries']
     for attempt in range(max_retries):
         try:
+            print(f"  Attempting to download {filename} (attempt {attempt + 1}/{max_retries})...")
             kwargs = {"repo_type": "model"}
             if HUGGINGFACE_TOKEN:
                 kwargs["token"] = HUGGINGFACE_TOKEN
+            path = hf_hub_download(
+                repo_id=repo_id,
+                filename=filename,
+                **kwargs
+            )
+            print(f"  [OK] Downloaded: {filename}")
             return path
         except Exception as e:
+            print(f"  [WARNING] Download attempt {attempt + 1} failed: {e}")
             if attempt < max_retries - 1:
+                print(f"  Retrying in {DOWNLOAD_CONFIG['retry_delay']} seconds...")
                 time.sleep(DOWNLOAD_CONFIG['retry_delay'])
             else:
+                print(f"  [ERROR] Failed to download {filename} after {max_retries} attempts")
                 raise
     return None
 def load_face_analysis():
+    """Load face analysis model with proper error handling."""
+    print("Loading face analysis model...")
     try:
+        face_app = FaceAnalysis(
+            name=FACE_DETECTION_CONFIG['model_name'],
+            root='./models/insightface',
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
         )
+        face_app.prepare(
+            ctx_id=FACE_DETECTION_CONFIG['ctx_id'],
+            det_size=FACE_DETECTION_CONFIG['det_size']
+        )
+        print("  [OK] Face analysis model loaded successfully")
+        return face_app, True
     except Exception as e:
+        print(f"  [WARNING] Face detection not available: {e}")
         return None, False
 def load_depth_detector():
+    """Load Zoe Depth detector."""
+    print("Loading Zoe Depth detector...")
     try:
+        zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
+        zoe_depth.to(device)
+        print("  [OK] Zoe Depth loaded successfully")
+        return zoe_depth, True
     except Exception as e:
+        print(f"  [WARNING] Zoe Depth not available: {e}")
         return None, False
 def load_controlnets():
+    """Load ControlNet models."""
+    print("Loading ControlNet Zoe Depth model...")
+    controlnet_depth = ControlNetModel.from_pretrained(
         "diffusers/controlnet-zoe-depth-sdxl-1.0",
         torch_dtype=dtype
+    ).to(device)
+    print("  [OK] ControlNet Depth loaded")
+    print("Loading InstantID ControlNet...")
+    try:
+        controlnet_instantid = ControlNetModel.from_pretrained(
+            "InstantX/InstantID",
+            subfolder="ControlNetModel",
+            torch_dtype=dtype
+        ).to(device)
+        print("  [OK] InstantID ControlNet loaded successfully")
+        return controlnet_depth, controlnet_instantid, True
+    except Exception as e:
+        print(f"  [WARNING] InstantID ControlNet not available: {e}")
+        return controlnet_depth, None, False
+def load_image_encoder():
+    """Load CLIP Image Encoder for IP-Adapter."""
+    print("Loading CLIP Image Encoder for IP-Adapter...")
+    try:
+        image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+            "h94/IP-Adapter",
+            subfolder="models/image_encoder",
+            torch_dtype=dtype
+        ).to(device)
+        print("  [OK] CLIP Image Encoder loaded successfully")
+        return image_encoder
+    except Exception as e:
+        print(f"  [ERROR] Could not load image encoder: {e}")
+        return None
 def load_sdxl_pipeline(controlnets):
     """
+    Load SDXL checkpoint - HYBRID APPROACH.
+    Tries in order:
+    1. Local file via from_single_file (like examplemodels.py)
+    2. HuggingFace repo via from_pretrained (like exampleapp.py)
+    3. Fallback to known working checkpoint
     """
+    print("Loading SDXL checkpoint (hybrid approach)...")
+    # ATTEMPT 1: Try loading from local file using from_single_file
+    # This is the examplemodels.py approach
+    if MODEL_FILES.get('checkpoint'):
+        try:
+            print(f"  [Attempt 1] Loading from local file via from_single_file...")
+            model_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['checkpoint'])
+            # Check if file exists and is a safetensors file
+            if model_path and os.path.exists(model_path) and model_path.endswith('.safetensors'):
+                pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_single_file(
+                    model_path,
+                    controlnet=controlnets,
+                    torch_dtype=dtype,
+                    use_safetensors=True
+                ).to(device)
+                print(f"  [OK] Checkpoint loaded from local file: {model_path}")
+                return pipe, True
+            else:
+                print(f"  [INFO] Local file not found or invalid, trying next method...")
+        except Exception as e:
+            print(f"  [WARNING] from_single_file failed: {e}")
+            print(f"  [INFO] Trying from_pretrained approach...")
+    # ATTEMPT 2: Try loading from HuggingFace repo using from_pretrained
+    # This is the exampleapp.py approach
+    try:
+        print(f"  [Attempt 2] Loading from HuggingFace repo via from_pretrained...")
+        pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
+            MODEL_REPO,
+            controlnet=controlnets,
+            torch_dtype=dtype,
+            use_safetensors=True
+        ).to(device)
+        print(f"  [OK] Checkpoint loaded from HuggingFace repo: {MODEL_REPO}")
+        return pipe, True
+    except Exception as e:
+        print(f"  [WARNING] from_pretrained failed: {e}")
+        print(f"  [INFO] Trying fallback checkpoint...")
+    # ATTEMPT 3: Fallback to known working checkpoint
+    try:
+        print(f"  [Attempt 3] Loading fallback: frankjoshua/albedobaseXL_v21...")
+        pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
+            "frankjoshua/albedobaseXL_v21",
+            controlnet=controlnets,
+            torch_dtype=dtype,
+            use_safetensors=True
+        ).to(device)
+        print("  [OK] Fallback checkpoint loaded successfully")
+        return pipe, False
+    except Exception as e:
+        print(f"  [WARNING] Fallback also failed: {e}")
+        print("  [INFO] Trying SDXL base model...")
+    # ATTEMPT 4: Last resort - SDXL base
+    print(f"  [Attempt 4] Loading base SDXL model...")
+    pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
+        "stabilityai/stable-diffusion-xl-base-1.0",
+        controlnet=controlnets,
+        torch_dtype=dtype,
+        use_safetensors=True
+    ).to(device)
+    print("  [OK] Base SDXL model loaded")
+    return pipe, False
 def load_lora(pipe):
+    """Load LORA from HuggingFace Hub."""
+    print("Loading LORA (retroart) from HuggingFace Hub...")
     try:
         lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
+        pipe.load_lora_weights(lora_path, adapter_name="retroart")
+        print(f"  [OK] LORA loaded successfully")
         return True
     except Exception as e:
+        print(f"  [WARNING] Could not load LORA: {e}")
         return False
+def setup_ip_adapter(pipe):
     """
+    Setup IP-Adapter for InstantID - SIMPLIFIED VERSION.
+    Uses pipeline's built-in method (like exampleapp.py lines 139-140).
+    This is much simpler and more reliable than manual Resampler setup.
     """
+    print("Setting up IP-Adapter for InstantID face embeddings...")
     try:
+        # Download InstantID IP-Adapter weights
+        face_adapter_path = download_model_with_retry(
+            "InstantX/InstantID",
+            "ip-adapter.bin"
+        )
+        # Use the pipeline's built-in method
+        # This handles all the complex Resampler setup automatically
+        pipe.load_ip_adapter_instantid(face_adapter_path)
+        # Set initial scale (can be adjusted later during generation)
+        pipe.set_ip_adapter_scale(0.8)
+        print("  [OK] IP-Adapter loaded successfully with built-in method")
+        print("  - Pipeline handles Resampler and attention processors automatically")
+        print("  - Face embeddings will be properly integrated during generation")
         return True
     except Exception as e:
+        print(f"  [ERROR] Could not setup IP-Adapter: {e}")
         import traceback
         traceback.print_exc()
         return False
 def setup_compel(pipe):
+    """Setup Compel for better SDXL prompt handling."""
+    print("Setting up Compel for enhanced prompt processing...")
     try:
         compel = Compel(
             tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
             returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
             requires_pooled=[False, True]
         )
+        print("  [OK] Compel loaded successfully")
         return compel, True
     except Exception as e:
+        print(f"  [WARNING] Compel not available: {e}")
         return None, False
 def setup_scheduler(pipe):
+    """Setup LCM scheduler."""
+    print("Setting up LCM scheduler...")
+    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
+    print("  [OK] LCM scheduler configured")
 def optimize_pipeline(pipe):
+    """Apply optimizations to pipeline."""
+    # Try to enable xformers
     if device == "cuda":
         try:
             pipe.enable_xformers_memory_efficient_attention()
             print("  [OK] xformers enabled")
+        except Exception as e:
+            print(f"  [INFO] xformers not available: {e}")
 def load_caption_model():
+    """
+    Load caption model with proper error handling.
+    Tries multiple models in order of quality.
+    """
     print("Loading caption model...")
+    # Try GIT-Large first (good balance of quality and compatibility)
     try:
         from transformers import AutoProcessor, AutoModelForCausalLM
+        print("  Attempting GIT-Large (recommended)...")
+        caption_processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
+        caption_model = AutoModelForCausalLM.from_pretrained(
+            "microsoft/git-large-coco",
+            torch_dtype=dtype
+        ).to(device)
+        print("  [OK] GIT-Large model loaded (produces detailed captions)")
+        return caption_processor, caption_model, True, 'git'
+    except Exception as e1:
+        print(f"  [INFO] GIT-Large not available: {e1}")
+        # Try BLIP base as fallback
         try:
             from transformers import BlipProcessor, BlipForConditionalGeneration
+            print("  Attempting BLIP base (fallback)...")
+            caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+            caption_model = BlipForConditionalGeneration.from_pretrained(
+                "Salesforce/blip-image-captioning-base",
+                torch_dtype=dtype
+            ).to(device)
+            print("  [OK] BLIP base model loaded (standard captions)")
+            return caption_processor, caption_model, True, 'blip'
+        except Exception as e2:
+            print(f"  [WARNING] Caption models not available: {e2}")
+            print("  Caption generation will be disabled")
             return None, None, False, 'none'
 def set_clip_skip(pipe):
+    """Set CLIP skip value."""
     if hasattr(pipe, 'text_encoder'):
+        print(f"  [OK] CLIP skip set to {CLIP_SKIP}")
+print("[OK] Model loading functions ready (HYBRID VERSION)")