Spaces:

primerz
/

face-to-pixel-art

Running on Zero

App Files Files Community

primerz commited on 23 days ago

Commit

8444fe7

verified ·

1 Parent(s): 1f6c7a0

Upload 7 files

Browse files

Files changed (6) hide show

app.py +27 -30
config.py +15 -6
generator.py +63 -16
model.py +124 -19
requirements.txt +4 -1
utils.py +36 -1

app.py CHANGED Viewed

@@ -1,24 +1,20 @@
 import gradio as gr
 import torch
 from model import ModelHandler
 from generator import Generator
 from config import Config
-# Handle spaces module for HF Spaces ZeroGPU (optional)
-try:
-    import spaces
-    SPACES_AVAILABLE = True
-except ImportError:
-    SPACES_AVAILABLE = False
-    print("Running without HF Spaces ZeroGPU support")
 # 1. Initialize Models Globally (in RAM)
 print("Initializing Application...")
 handler = ModelHandler()
 handler.load_models()
 gen = Generator(handler)
-# 2. Define Inference Function
 def process_img(
     image,
     prompt,
@@ -26,9 +22,10 @@ def process_img(
     cfg_scale,
     steps,
     img_strength,
     depth_strength,
     edge_strength,
-    lora_strength,
     seed
 ):
     if image is None:
@@ -44,9 +41,10 @@ def process_img(
             guidance_scale=cfg_scale,
             num_inference_steps=steps,
             img2img_strength=img_strength,
             depth_strength=depth_strength,
             lineart_strength=edge_strength,
-            lora_strength=lora_strength,
             seed=seed
         )
         print("--- Generation Complete ---")
@@ -56,16 +54,12 @@ def process_img(
         print(f"Error during generation: {e}")
         raise gr.Error(f"An error occurred: {str(e)}")
-# Apply spaces.GPU decorator only if available
-if SPACES_AVAILABLE:
-    process_img = spaces.GPU(duration=20)(process_img)
 # 3. Build Gradio Interface
-with gr.Blocks(title="Image To Pixel Art", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
-        # 🎮 Image to Pixel Art
-        Upload any image and transform it into pixel art style!
         """
     )
@@ -81,7 +75,7 @@ with gr.Blocks(title="Image To Pixel Art", theme=gr.themes.Soft()) as demo:
             negative_prompt = gr.Textbox(
                 label="Negative Prompt (Optional)",
                 placeholder="e.g., blurry, text, watermark, bad art...",
-                value=Config.DEFAULT_NEGATIVE_PROMPT
             )
             with gr.Accordion("Advanced Settings", open=False):
@@ -116,6 +110,14 @@ with gr.Blocks(title="Image To Pixel Art", theme=gr.themes.Soft()) as demo:
                     value=Config.IMG_STRENGTH,
                     label="Image Strength (Img2Img)"
                 )
                 depth_strength = gr.Slider(
                     elem_id="depth_strength",
                     minimum=0.0,
@@ -132,14 +134,8 @@ with gr.Blocks(title="Image To Pixel Art", theme=gr.themes.Soft()) as demo:
                     value=Config.EDGE_STRENGTH,
                     label="EdgeMap Strength (LineArt)"
                 )
-                lora_strength = gr.Slider(
-                    elem_id="lora_strength",
-                    minimum=0.0,
-                    maximum=2.0,
-                    step=0.05,
-                    value=Config.LORA_STRENGTH,
-                    label="LoRA Strength (Pixel Art Style)"
-                )
             run_btn = gr.Button("Generate Pixel Art", variant="primary")
@@ -154,9 +150,10 @@ with gr.Blocks(title="Image To Pixel Art", theme=gr.themes.Soft()) as demo:
         cfg_scale,
         steps,
         img_strength,
         depth_strength,
         edge_strength,
-        lora_strength,
         seed
     ]
@@ -173,5 +170,5 @@ if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
-        show_api=True
-    )

 import gradio as gr
+import spaces
 import torch
 from model import ModelHandler
 from generator import Generator
+# --- IMPORT CONFIG ---
 from config import Config
 # 1. Initialize Models Globally (in RAM)
+# ZeroGPU will move them to VRAM inside the @spaces.GPU function
 print("Initializing Application...")
 handler = ModelHandler()
 handler.load_models()
 gen = Generator(handler)
+# 2. Define GPU-enabled Inference Function
+@spaces.GPU(duration=20) # <-- MODIFIED
 def process_img(
     image,
     prompt,
     cfg_scale,
     steps,
     img_strength,
+    face_strength,
     depth_strength,
     edge_strength,
+    # tile_strength,     # <-- REMOVED
     seed
 ):
     if image is None:
             guidance_scale=cfg_scale,
             num_inference_steps=steps,
             img2img_strength=img_strength,
+            face_strength=face_strength,
             depth_strength=depth_strength,
             lineart_strength=edge_strength,
+            # tile_strength=tile_strength,      # <-- REMOVED
             seed=seed
         )
         print("--- Generation Complete ---")
         print(f"Error during generation: {e}")
         raise gr.Error(f"An error occurred: {str(e)}")
 # 3. Build Gradio Interface
+with gr.Blocks(title="Face To Pixel Art", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
+        # 🎮 Face to Pixel Art
+        Upload any image. If there is a face, we'll keep the identity. If not, we'll pixelate the scene!
         """
     )
             negative_prompt = gr.Textbox(
                 label="Negative Prompt (Optional)",
                 placeholder="e.g., blurry, text, watermark, bad art...",
+                value=Config.DEFAULT_NEGATIVE_PROMPT # <-- MODIFIED
             )
             with gr.Accordion("Advanced Settings", open=False):
                     value=Config.IMG_STRENGTH,
                     label="Image Strength (Img2Img)"
                 )
+                face_strength = gr.Slider(
+                    elem_id="face_strength",
+                    minimum=0.0,
+                    maximum=1.0,
+                    step=0.05,
+                    value=Config.FACE_STRENGTH,
+                    label="Face Strength"
+                )
                 depth_strength = gr.Slider(
                     elem_id="depth_strength",
                     minimum=0.0,
                     value=Config.EDGE_STRENGTH,
                     label="EdgeMap Strength (LineArt)"
                 )
+                # --- MODIFIED: Renamed slider ---
+                # tile_strength = gr.Slider(...) # <-- REMOVED
             run_btn = gr.Button("Generate Pixel Art", variant="primary")
         cfg_scale,
         steps,
         img_strength,
+        face_strength,
         depth_strength,
         edge_strength,
+        # tile_strength,     # <-- REMOVED
         seed
     ]
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
+        show_api=True # share=True is not needed on Spaces
+    )

config.py CHANGED Viewed

@@ -7,16 +7,19 @@ class Config:
     # Base Model & LoRA (from primerz/pixagram)
     REPO_ID = "primerz/pixagram"
-    CHECKPOINT_FILENAME = "picto.safetensors"
     LORA_FILENAME = "retroart.safetensors"
-    LORA_STRENGTH = 1.0  # Default strength, now configurable via UI
     # Trigger Words for the LoRA
-    STYLE_TRIGGER = "p1x3l4rt, pixel art"
     # Default Negative Prompt
     DEFAULT_NEGATIVE_PROMPT = "Ugly, artifacts, blurry, disformed, photo-realistic, photo, photography, realistic, low-quality, text."
     # ControlNet Repos
     CN_ZOE_REPO = "diffusers/controlnet-zoE-depth-sdxl-1.0"
     CN_LINEART_REPO = "ShermanG/ControlNet-Standard-Lineart-for-SDXL"
@@ -27,10 +30,16 @@ class Config:
     # Captioning Model
     CAPTIONER_REPO = "Salesforce/blip-image-captioning-base"
     # Gradio Parameters
     CGF_SCALE = 1.2
     STEPS_NUMBER = 10
     IMG_STRENGTH = 0.65
-    DEPTH_STRENGTH = 0.8      # Updated default
-    EDGE_STRENGTH = 0.2       # Updated default
-    CLIP_SKIP = 1

     # Base Model & LoRA (from primerz/pixagram)
     REPO_ID = "primerz/pixagram"
+    CHECKPOINT_FILENAME = "horizon.safetensors"
     LORA_FILENAME = "retroart.safetensors"
+    LORA_STRENGTH = 1.0 # Fixed strength for fusion
     # Trigger Words for the LoRA
+    STYLE_TRIGGER = "HD pixel art artwork and high quality illustration in retroart style of "
     # Default Negative Prompt
     DEFAULT_NEGATIVE_PROMPT = "Ugly, artifacts, blurry, disformed, photo-realistic, photo, photography, realistic, low-quality, text."
+    # InstantID Assets
+    INSTANTID_REPO = "InstantX/InstantID"
     # ControlNet Repos
     CN_ZOE_REPO = "diffusers/controlnet-zoE-depth-sdxl-1.0"
     CN_LINEART_REPO = "ShermanG/ControlNet-Standard-Lineart-for-SDXL"
     # Captioning Model
     CAPTIONER_REPO = "Salesforce/blip-image-captioning-base"
+    # InsightFace Model (HF Hub mirror)
+    ANTELOPEV2_REPO = "DIAMONIK7777/antelopev2"
+    ANTELOPEV2_ROOT = "." # Parent folder
+    ANTELOPEV2_NAME = "antelopev2"
     # Gradio Parameters
     CGF_SCALE = 1.2
     STEPS_NUMBER = 10
     IMG_STRENGTH = 0.65
+    FACE_STRENGTH = 0.75
+    DEPTH_STRENGTH = 0.75
+    EDGE_STRENGTH = 0.75
+    CLIP_SKIP = 2

generator.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
 from config import Config
-from utils import resize_image_to_1mp, get_caption
 from PIL import Image
 class Generator:
@@ -34,9 +34,9 @@ class Generator:
         guidance_scale=1.5,
         num_inference_steps=6,
         img2img_strength=0.3,
-        depth_strength=0.8,
-        lineart_strength=0.2,
-        lora_strength=1.0,
         seed=-1
     ):
         # 1. Pre-process Inputs
@@ -44,8 +44,8 @@ class Generator:
         processed_image = resize_image_to_1mp(input_image)
         target_width, target_height = processed_image.size
-        # 2. Set LoRA scale
-        self.mh.set_lora_scale(lora_strength)
         # 3. Generate Prompt
         if not user_prompt.strip():
@@ -61,40 +61,87 @@ class Generator:
         print(f"Prompt: {final_prompt}")
         print(f"Negative Prompt: {negative_prompt}")
-        # 4. Generate Control Maps (Depth, LineArt)
         print("Generating Control Maps (Depth, LineArt)...")
         depth_map, lineart_map = self.prepare_control_images(processed_image, target_width, target_height)
-        # 5. Set controlnet conditioning scales
-        # ControlNet order: [Zoe_Depth, LineArt]
-        controlnet_conditioning_scale = [depth_strength, lineart_strength]
-        # Control guidance end - keep structure through the denoising process
-        control_guidance_end = [depth_strength, lineart_strength]
         # --- Seed/Generator Logic ---
         if seed == -1 or seed is None:
             seed = torch.Generator().seed()
         generator = torch.Generator(device=Config.DEVICE).manual_seed(int(seed))
         print(f"Using seed: {seed}")
         # 6. Run Inference
         print("Running pipeline...")
         result = self.mh.pipeline(
             prompt=final_prompt,
             negative_prompt=negative_prompt,
-            image=processed_image,
-            control_image=[depth_map, lineart_map],
             generator=generator,
-            # Parameters from UI
             strength=img2img_strength,
             num_inference_steps=num_inference_steps,
             guidance_scale=guidance_scale,
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             control_guidance_end=control_guidance_end,
         ).images[0]
-        return result

 import torch
 from config import Config
+from utils import resize_image_to_1mp, get_caption, draw_kps
 from PIL import Image
 class Generator:
         guidance_scale=1.5,
         num_inference_steps=6,
         img2img_strength=0.3,
+        face_strength=0.3,
+        depth_strength=0.3,
+        lineart_strength=0.3,
         seed=-1
     ):
         # 1. Pre-process Inputs
         processed_image = resize_image_to_1mp(input_image)
         target_width, target_height = processed_image.size
+        # 2. Get Face Info (replaces get_face_embedding)
+        face_info = self.mh.get_face_info(processed_image)
         # 3. Generate Prompt
         if not user_prompt.strip():
         print(f"Prompt: {final_prompt}")
         print(f"Negative Prompt: {negative_prompt}")
+        # 4. Generate OTHER Control Maps (Structure)
         print("Generating Control Maps (Depth, LineArt)...")
         depth_map, lineart_map = self.prepare_control_images(processed_image, target_width, target_height)
+        # 5. Logic for Face vs No-Face (NOW INCLUDES KPS)
+        # ControlNet order: [InstantID_KPS, Zoe_Depth, LineArt]
+        if face_info is not None:
+            print("Face detected: Applying InstantID with keypoints.")
+            # We use face_info['embedding'] (raw) instead of normed_embedding.
+            # Raw embedding has higher magnitude (~20-30) required for the adapter.
+            face_emb = torch.tensor(
+                face_info['embedding'],
+                dtype=Config.DTYPE,
+                device=Config.DEVICE
+            ).unsqueeze(0)
+            # Create keypoint image
+            face_kps = draw_kps(processed_image, face_info['kps'])
+            # Set strengths
+            controlnet_conditioning_scale = [face_strength, depth_strength, lineart_strength]
+            # --- UPDATED: Reduced IP Adapter Scale ---
+            # Lowered from 0.8 to 0.7 to allow LoRA style (pixel art) to
+            # override realistic skin textures while keeping identity.
+            self.mh.pipeline.set_ip_adapter_scale(0.7)
+        else:
+            print("No face detected: Disabling InstantID.")
+            # Create dummy embedding
+            face_emb = torch.zeros((1, 512), dtype=Config.DTYPE, device=Config.DEVICE)
+            # Create dummy keypoint image (black)
+            face_kps = Image.new('RGB', (target_width, target_height), (0, 0, 0))
+            # Set strengths
+            controlnet_conditioning_scale = [0.0, depth_strength, lineart_strength]
+            self.mh.pipeline.set_ip_adapter_scale(0.0)
+        # --- UPDATED: Control Guidance End Strategy ---
+        # We cap the Face ControlNet duration.
+        # Even if strength is 1.0, we stop it at 0.6 (60%) of the steps.
+        # This leaves the final 40% of steps pure for the Pixel Art LoRA
+        # to "pixelize" the face without the ControlNet trying to fix it back to a photo.
+        face_end_step = min(0.6, face_strength)
+        control_guidance_end = [
+            face_end_step,      # InstantID: Stop early for style
+            depth_strength,     # Depth: Keep structure longer
+            lineart_strength    # Lineart: Keep outlines longer
+        ]
         # --- Seed/Generator Logic ---
         if seed == -1 or seed is None:
             seed = torch.Generator().seed()
         generator = torch.Generator(device=Config.DEVICE).manual_seed(int(seed))
         print(f"Using seed: {seed}")
+        # --- END ---
         # 6. Run Inference
         print("Running pipeline...")
         result = self.mh.pipeline(
             prompt=final_prompt,
             negative_prompt=negative_prompt,
+            image=processed_image,  # Base img2img image
+            control_image=[face_kps, depth_map, lineart_map],
+            image_embeds=face_emb,  # Face identity embedding
             generator=generator,
+            # --- Parameters from UI ---
             strength=img2img_strength,
             num_inference_steps=num_inference_steps,
             guidance_scale=guidance_scale,
+            # --- End Parameters from UI ---
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             control_guidance_end=control_guidance_end,
+            clip_skip=Config.CLIP_SKIP,
         ).images[0]
+        return result

model.py CHANGED Viewed

@@ -1,38 +1,93 @@
 import torch
 import os
 from config import Config
 from diffusers import (
     ControlNetModel,
     LCMScheduler,
-    StableDiffusionXLControlNetImg2ImgPipeline,
 )
 from diffusers.models.controlnets.multicontrolnet import MultiControlNetModel
-from huggingface_hub import hf_hub_download
 from controlnet_aux import LeresDetector, LineartAnimeDetector
 class ModelHandler:
     def __init__(self):
         self.pipeline = None
         self.leres_detector = None
         self.lineart_anime_detector = None
-        self._lora_fused = False
-        self._current_lora_scale = None
     def load_models(self):
-        # 1. Load ControlNets (Zoe Depth and LineArt only)
-        print("Loading ControlNets (Zoe, LineArt)...")
         cn_zoe = ControlNetModel.from_pretrained(Config.CN_ZOE_REPO, torch_dtype=Config.DTYPE)
         cn_lineart = ControlNetModel.from_pretrained(Config.CN_LINEART_REPO, torch_dtype=Config.DTYPE)
-        # Wrap the list of models in a MultiControlNetModel
         print("Wrapping ControlNets in MultiControlNetModel...")
-        controlnet_list = [cn_zoe, cn_lineart]
         controlnet = MultiControlNetModel(controlnet_list)
-        # 2. Load SDXL Pipeline
         print(f"Loading SDXL Pipeline ({Config.CHECKPOINT_FILENAME})...")
         checkpoint_local_path = os.path.join("./models", Config.CHECKPOINT_FILENAME)
@@ -46,7 +101,7 @@ class ModelHandler:
             )
         print(f"Loading pipeline from local file: {checkpoint_local_path}")
-        self.pipeline = StableDiffusionXLControlNetImg2ImgPipeline.from_single_file(
             checkpoint_local_path,
             controlnet=controlnet,
             torch_dtype=Config.DTYPE,
@@ -65,6 +120,7 @@ class ModelHandler:
         scheduler_config = self.pipeline.scheduler.config
         scheduler_config['clip_sample'] = False
         self.pipeline.scheduler = LCMScheduler.from_config(
             scheduler_config,
             timestep_spacing="trailing",
@@ -72,25 +128,74 @@ class ModelHandler:
         )
         print("  [OK] LCMScheduler loaded (clip_sample=False, trailing spacing).")
-        # 3. Load LoRA weights (keep unfused for dynamic scaling)
         print("Loading LCM LoRA weights...")
         self.pipeline.load_lora_weights(
             Config.REPO_ID,
             weight_name=Config.LORA_FILENAME,
             adapter_name="lcm_lora"
         )
-        print("  [OK] LoRA weights loaded (unfused for dynamic scaling).")
-        # 4. Load Preprocessors
         print("Loading Preprocessors (LeReS, LineArtAnime)...")
         self.leres_detector = LeresDetector.from_pretrained(Config.ANNOTATOR_REPO)
         self.lineart_anime_detector = LineartAnimeDetector.from_pretrained(Config.ANNOTATOR_REPO)
         print("--- All models loaded successfully ---")
-    def set_lora_scale(self, scale):
-        """Set the LoRA scale for generation."""
-        if self.pipeline is not None:
-            self.pipeline.set_adapters(["lcm_lora"], adapter_weights=[scale])
-            self._current_lora_scale = scale
-            print(f"  [OK] LoRA scale set to {scale}")

 import torch
 import os
+import cv2
+import numpy as np
 from config import Config
 from diffusers import (
     ControlNetModel,
     LCMScheduler,
+    # AutoencoderKL # Removed as requested
 )
 from diffusers.models.controlnets.multicontrolnet import MultiControlNetModel
+# Import the custom pipeline from your local file
+from pipeline_stable_diffusion_xl_instantid_img2img import StableDiffusionXLInstantIDImg2ImgPipeline
+from huggingface_hub import snapshot_download, hf_hub_download
+from insightface.app import FaceAnalysis
 from controlnet_aux import LeresDetector, LineartAnimeDetector
 class ModelHandler:
     def __init__(self):
         self.pipeline = None
+        self.app = None # InsightFace
         self.leres_detector = None
         self.lineart_anime_detector = None
+        self.face_analysis_loaded = False
+    def load_face_analysis(self):
+        """
+        Load face analysis model.
+        Downloads from HF Hub to the path insightface expects.
+        """
+        print("Loading face analysis model...")
+        model_path = os.path.join(Config.ANTELOPEV2_ROOT, "models", Config.ANTELOPEV2_NAME)
+        if not os.path.exists(os.path.join(model_path, "scrfd_10g_bnkps.onnx")):
+            print(f"Downloading AntelopeV2 models from {Config.ANTELOPEV2_REPO} to {model_path}...")
+            try:
+                snapshot_download(
+                    repo_id=Config.ANTELOPEV2_REPO,
+                    local_dir=model_path, # Download to the correct expected path
+                )
+            except Exception as e:
+                print(f"  [ERROR] Failed to download AntelopeV2 models: {e}")
+                return False
+        try:
+            self.app = FaceAnalysis(
+                name=Config.ANTELOPEV2_NAME,
+                root=Config.ANTELOPEV2_ROOT,
+                providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
+            )
+            self.app.prepare(ctx_id=0, det_size=(640, 640))
+            print(f"  [OK] Face analysis model loaded successfully.")
+            return True
+        except Exception as e:
+            print(f"  [WARNING] Face detection system failed to initialize: {e}")
+            return False
     def load_models(self):
+        # 1. Load Face Analysis
+        self.face_analysis_loaded = self.load_face_analysis()
+        # 2. Load ControlNets
+        print("Loading ControlNets (InstantID, Zoe, LineArt)...")
+        # Load the InstantID ControlNet from the correct subfolder
+        print("Loading InstantID ControlNet from subfolder 'ControlNetModel'...")
+        cn_instantid = ControlNetModel.from_pretrained(
+            Config.INSTANTID_REPO,
+            subfolder="ControlNetModel",
+            torch_dtype=Config.DTYPE
+        )
+        print("  [OK] Loaded InstantID ControlNet.")
+        # Load other ControlNets normally
+        print("Loading Zoe and LineArt ControlNets...")
         cn_zoe = ControlNetModel.from_pretrained(Config.CN_ZOE_REPO, torch_dtype=Config.DTYPE)
         cn_lineart = ControlNetModel.from_pretrained(Config.CN_LINEART_REPO, torch_dtype=Config.DTYPE)
+        # --- Manually wrap the list of models in a MultiControlNetModel ---
         print("Wrapping ControlNets in MultiControlNetModel...")
+        controlnet_list = [cn_instantid, cn_zoe, cn_lineart]
         controlnet = MultiControlNetModel(controlnet_list)
+        # --- End wrapping ---
+        # 3. Load SDXL Pipeline
         print(f"Loading SDXL Pipeline ({Config.CHECKPOINT_FILENAME})...")
         checkpoint_local_path = os.path.join("./models", Config.CHECKPOINT_FILENAME)
             )
         print(f"Loading pipeline from local file: {checkpoint_local_path}")
+        self.pipeline = StableDiffusionXLInstantIDImg2ImgPipeline.from_single_file(
             checkpoint_local_path,
             controlnet=controlnet,
             torch_dtype=Config.DTYPE,
         scheduler_config = self.pipeline.scheduler.config
         scheduler_config['clip_sample'] = False
+        # --- MODIFIED: optimize for sharp pixel art style ---
         self.pipeline.scheduler = LCMScheduler.from_config(
             scheduler_config,
             timestep_spacing="trailing",
         )
         print("  [OK] LCMScheduler loaded (clip_sample=False, trailing spacing).")
+        # 5. Load Adapters (IP-Adapter & LoRA)
+        print("Loading Adapters (IP-Adapter & LoRA)...")
+        ip_adapter_filename = "ip-adapter.bin"
+        ip_adapter_local_path = os.path.join("./models", ip_adapter_filename)
+        if not os.path.exists(ip_adapter_local_path):
+            print(f"Downloading IP-Adapter to {ip_adapter_local_path}...")
+            hf_hub_download(
+                repo_id=Config.INSTANTID_REPO,
+                filename=ip_adapter_filename,
+                local_dir="./models",
+                local_dir_use_symlinks=False
+            )
+        print(f"Loading IP-Adapter from local file: {ip_adapter_local_path}")
+        # Load InstantID adapter first
+        self.pipeline.load_ip_adapter_instantid(ip_adapter_local_path)
         print("Loading LCM LoRA weights...")
+        # KEY CHANGE 1: Assign an adapter_name so Diffusers distinguishes it from InstantID
         self.pipeline.load_lora_weights(
             Config.REPO_ID,
             weight_name=Config.LORA_FILENAME,
             adapter_name="lcm_lora"
         )
+        # KEY CHANGE 2: Hardcode scale to 1.0 for LCM to remove trigger word dependency
+        # (Or ensure Config.LORA_STRENGTH is set to 1.0)
+        fuse_scale = 1.0
+        print(f"Fusing LoRA 'lcm_lora' with scale {fuse_scale}...")
+        # KEY CHANGE 3: Fuse ONLY the named adapter
+        self.pipeline.fuse_lora(
+            adapter_names=["lcm_lora"],
+            lora_scale=fuse_scale
+        )
+        # KEY CHANGE 4: Unload the side-car weights to free VRAM (since they are now inside the UNet)
+        self.pipeline.unload_lora_weights()
+        print("  [OK] LoRA fused and cleaned up.")
+        # 6. Load Preprocessors
         print("Loading Preprocessors (LeReS, LineArtAnime)...")
         self.leres_detector = LeresDetector.from_pretrained(Config.ANNOTATOR_REPO)
         self.lineart_anime_detector = LineartAnimeDetector.from_pretrained(Config.ANNOTATOR_REPO)
         print("--- All models loaded successfully ---")
+    def get_face_info(self, image):
+        """Extracts the largest face, returns insightface result object."""
+        if not self.face_analysis_loaded:
+            return None
+        try:
+            cv2_img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+            faces = self.app.get(cv2_img)
+            if len(faces) == 0:
+                return None
+            # Sort by size (width * height) to find the main character
+            faces = sorted(faces, key=lambda x: (x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]), reverse=True)
+            # Return the largest face info
+            return faces[0]
+        except Exception as e:
+            print(f"Face embedding extraction failed: {e}")
+            return None

requirements.txt CHANGED Viewed

@@ -5,7 +5,10 @@ peft
 torch
 opencv-python-headless
 Pillow
 gradio>=4.0.0
 controlnet_aux
 huggingface_hub
-timm

 torch
 opencv-python-headless
 Pillow
+insightface
+onnxruntime
 gradio>=4.0.0
 controlnet_aux
 huggingface_hub
+mediapipe
+timm

utils.py CHANGED Viewed

@@ -2,6 +2,9 @@ from PIL import Image
 from transformers import BlipProcessor, BlipForConditionalGeneration
 import torch
 from config import Config
 # Simple global caching for the captioner
 captioner_processor = None
@@ -18,7 +21,7 @@ def resize_image_to_1mp(image):
     new_h = int((target_pixels / aspect_ratio) ** 0.5)
     new_w = int(new_h * aspect_ratio)
-    # Ensure divisibility by 64 for efficiency
     new_w = (new_w // 64) * 64
     new_h = (new_h // 64) * 64
@@ -40,3 +43,35 @@ def get_caption(image):
     out = captioner_model.generate(**inputs)
     caption = captioner_processor.decode(out[0], skip_special_tokens=True)
     return caption

 from transformers import BlipProcessor, BlipForConditionalGeneration
 import torch
 from config import Config
+import cv2
+import numpy as np
+import math
 # Simple global caching for the captioner
 captioner_processor = None
     new_h = int((target_pixels / aspect_ratio) ** 0.5)
     new_w = int(new_h * aspect_ratio)
+    # Ensure divisibility by 48 for efficiency
     new_w = (new_w // 64) * 64
     new_h = (new_h // 64) * 64
     out = captioner_model.generate(**inputs)
     caption = captioner_processor.decode(out[0], skip_special_tokens=True)
     return caption
+# --- ADDED: Function from your provided file ---
+def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]):
+    stickwidth = 4
+    limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
+    kps = np.array(kps)
+    w, h = image_pil.size
+    out_img = np.zeros([h, w, 3])
+    for i in range(len(limbSeq)):
+        index = limbSeq[i]
+        color = color_list[index[0]]
+        x = kps[index][:, 0]
+        y = kps[index][:, 1]
+        length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
+        angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
+        polygon = cv2.ellipse2Poly(
+            (int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1
+        )
+        out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
+    out_img = (out_img * 0.6).astype(np.uint8)
+    for idx_kp, kp in enumerate(kps):
+        color = color_list[idx_kp]
+        x, y = kp
+        out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
+    out_img_pil = Image.fromarray(out_img.astype(np.uint8))
+    return out_img_pil
+# --- END ADDED ---