Spaces:

primerz
/

face-to-pixel-art

Running on Zero

App Files Files Community

primerz commited on about 1 month ago

Commit

3e3e641

verified ·

1 Parent(s): 6d5987b

Update generator.py

Browse files

Files changed (1) hide show

generator.py +30 -24

generator.py CHANGED Viewed

@@ -1,18 +1,23 @@
 import torch
 from config import Config
-from utils import resize_image_to_1mp, get_caption, prepare_control_images
 class Generator:
     def __init__(self, model_handler):
         self.mh = model_handler
     def predict(self, input_image, user_prompt=""):
         # 1. Pre-process Inputs
         print("Processing Input...")
         processed_image = resize_image_to_1mp(input_image)
         # 2. Get Face Embedding (Robust Mode)
-        # Now returns None instead of crashing if no face is found
         face_emb = self.mh.get_face_embedding(processed_image)
         # 3. Generate Prompt
@@ -25,49 +30,50 @@ class Generator:
         print(f"Prompt: {final_prompt}")
         # 4. Generate Control Maps (Structure)
-        depth_map, lineart_map = prepare_control_images(processed_image, self.mh.zoe_detector, self.mh.lineart_detector)
         # 5. Logic for Face vs No-Face
         if face_emb is not None:
             print("Face detected: Applying InstantID.")
-            # [InstantID, Zoe, LineArt]
-            # Stop InstantID at 50% to allow pixelation
             controlnet_conditioning_scale = [0.6, 0.4, 0.4]
             control_guidance_end = [0.5, 0.8, 0.8]
             ip_adapter_scale = 0.9
-            # InstantID requires the face embedding usually via IP-adapter input
-            # We pass the processed image to ip_adapter_image (library handles crop internally usually,
-            # or we rely on the embedding we extracted if using custom pipeline.
-            # Standard diffusers IP adapter uses the image).
-            ip_image = processed_image
         else:
             print("No face detected: Disabling InstantID, using only Structure+Style.")
             # Disable InstantID (Weight 0.0)
             controlnet_conditioning_scale = [0.0, 0.4, 0.4]
             control_guidance_end = [0.0, 0.8, 0.8]
             ip_adapter_scale = 0.0
-            # Pass generic image to satisfy input requirement, but scale is 0 so it's ignored
-            ip_image = processed_image
-        # Set IP Adapter Scale
-        self.mh.pipeline.set_ip_adapter_scale(ip_adapter_scale)
         # 6. Run Inference
         result = self.mh.pipeline(
             prompt=final_prompt,
-            # We pass the image list corresponding to [InstantID, Zoe, LineArt]
-            # Even if InstantID weight is 0, we must pass an image to keep list length correct.
-            image=[processed_image, depth_map, lineart_map],
-            # IP Adapter input
-            ip_adapter_image=[ip_image],
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             control_guidance_end=control_guidance_end,
-            num_inference_steps=8, # LCM is fast
-            guidance_scale=1.5,    # LCM needs low CFG
-            cross_attention_kwargs={"scale": 1.0}
         ).images[0]
         return result

 import torch
 from config import Config
+from utils import resize_image_to_1mp, get_caption
 class Generator:
     def __init__(self, model_handler):
         self.mh = model_handler
+    def prepare_control_images(self, image):
+        """Generates the conditioning maps from the input image."""
+        depth_map = self.mh.zoe_detector(image)
+        lineart_map = self.mh.lineart_detector(image)
+        return depth_map, lineart_map
     def predict(self, input_image, user_prompt=""):
         # 1. Pre-process Inputs
         print("Processing Input...")
         processed_image = resize_image_to_1mp(input_image)
         # 2. Get Face Embedding (Robust Mode)
         face_emb = self.mh.get_face_embedding(processed_image)
         # 3. Generate Prompt
         print(f"Prompt: {final_prompt}")
         # 4. Generate Control Maps (Structure)
+        print("Generating Control Maps (Depth, LineArt)...")
+        depth_map, lineart_map = self.prepare_control_images(processed_image)
         # 5. Logic for Face vs No-Face
         if face_emb is not None:
             print("Face detected: Applying InstantID.")
+            # [InstantID, Zoe, LineArt] (Must match load order in model.py)
+            # SCALE: InstantID Medium (0.6), Zoe Low (0.4), LineArt Low (0.4)
             controlnet_conditioning_scale = [0.6, 0.4, 0.4]
+            # STOP: InstantID stops EARLY (50%) to allow pixelation
             control_guidance_end = [0.5, 0.8, 0.8]
+            # IP Adapter Scale (Likeness): Keep High
             ip_adapter_scale = 0.9
+            # We must pass the face embedding and the image for the IP-Adapter
+            ip_adapter_image = processed_image
+            prompt_embeds, _ = self.mh.pipeline.ip_adapter.get_prompt_embeds(ip_adapter_image, face_emb, None)
         else:
             print("No face detected: Disabling InstantID, using only Structure+Style.")
             # Disable InstantID (Weight 0.0)
             controlnet_conditioning_scale = [0.0, 0.4, 0.4]
             control_guidance_end = [0.0, 0.8, 0.8]
             ip_adapter_scale = 0.0
+            prompt_embeds = None # No face embedding
         # 6. Run Inference
+        print("Running pipeline...")
         result = self.mh.pipeline(
             prompt=final_prompt,
+            prompt_embeds=prompt_embeds,
+            # ControlNet inputs
+            image=[processed_image, depth_map, lineart_map], # List for [ID, Zoe, LineArt]
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             control_guidance_end=control_guidance_end,
+            # LCM settings
+            num_inference_steps=8,
+            guidance_scale=1.5,
         ).images[0]
         return result