Spaces:

primerz
/

face-to-pixel-art

Running on Zero

App Files Files Community

primerz commited on Nov 17, 2025

Commit

6977800

verified ·

1 Parent(s): fa327ca

Update generator.py

Browse files

Files changed (1) hide show

generator.py +56 -21

generator.py CHANGED Viewed

@@ -1,29 +1,67 @@
 import torch
 from config import Config
-from utils import resize_image_to_1mp, get_caption, draw_kps
 from PIL import Image
 class Generator:
     def __init__(self, model_handler):
         self.mh = model_handler
     def prepare_control_images(self, image, width, height):
         """
         Generates conditioning maps, ensuring they are resized
         to the exact target dimensions (width, height).
         """
         print(f"Generating control maps for {width}x{height}...")
-        # Generate depth map
         depth_map_raw = self.mh.leres_detector(image)
-        # Generate lineart map
         lineart_map_raw = self.mh.lineart_anime_detector(image)
-        # Manually resize maps to match the exact output resolution
         depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
         lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
         return depth_map, lineart_map
     def predict(
@@ -31,16 +69,18 @@ class Generator:
         input_image,
         user_prompt="",
         negative_prompt="",
-        guidance_scale=0.0, # TCD Default 0.0
-        num_inference_steps=6,
-        img2img_strength=0.3,
         depth_strength=0.3,
         lineart_strength=0.3,
         seed=-1
     ):
-        # 1. Pre-process Inputs
         print("Processing Input...")
-        processed_image = resize_image_to_1mp(input_image)
         target_width, target_height = processed_image.size
         # 2. Get Face Info
@@ -53,7 +93,7 @@ class Generator:
                 final_prompt = f"{Config.STYLE_TRIGGER}, {generated_caption}"
             except Exception as e:
                 print(f"Captioning failed: {e}, using default prompt.")
-                final_prompt = f"{Config.STYLE_TRIGGER}, a beautiful pixel art image"
         else:
             final_prompt = f"{Config.STYLE_TRIGGER}, {user_prompt}"
@@ -67,23 +107,18 @@ class Generator:
         # 5. Logic for Face vs No-Face
         if face_info is not None:
             print("Face detected: Applying InstantID with keypoints.")
-            # Use Raw Embedding
             face_emb = torch.tensor(
                 face_info['embedding'],
                 dtype=Config.DTYPE,
                 device=Config.DEVICE
             ).unsqueeze(0)
             face_kps = draw_kps(processed_image, face_info['kps'])
             controlnet_conditioning_scale = [0.8, depth_strength, lineart_strength]
             self.mh.pipeline.set_ip_adapter_scale(0.8)
         else:
             print("No face detected: Disabling InstantID.")
             face_emb = torch.zeros((1, 512), dtype=Config.DTYPE, device=Config.DEVICE)
             face_kps = Image.new('RGB', (target_width, target_height), (0, 0, 0))
             controlnet_conditioning_scale = [0.0, depth_strength, lineart_strength]
             self.mh.pipeline.set_ip_adapter_scale(0.0)
@@ -105,7 +140,7 @@ class Generator:
             generator=generator,
             strength=img2img_strength,
-            guidance_scale=guidance_scale,
             num_inference_steps=num_inference_steps,
             controlnet_conditioning_scale=controlnet_conditioning_scale,
@@ -113,7 +148,7 @@ class Generator:
             clip_skip=2,
             # --- TCD Specific Parameter ---
-            eta=0.3,
             # ------------------------------
         ).images[0]

 import torch
 from config import Config
+from utils import get_caption, draw_kps # Removed resize_image_to_1mp
 from PIL import Image
 class Generator:
     def __init__(self, model_handler):
         self.mh = model_handler
+    def smart_crop_and_resize(self, image):
+        """
+        Analyzes aspect ratio and snaps to the best SDXL resolution bucket.
+        Performs a center crop to match the target ratio, then resizes.
+        """
+        w, h = image.size
+        aspect_ratio = w / h
+        # 1. Determine Target Resolution (Horizon SDXL Buckets)
+        if 0.85 <= aspect_ratio <= 1.15:
+            target_w, target_h = 1024, 1024
+            print(f"Snap to Bucket: Square (1024x1024)")
+        elif aspect_ratio < 0.85:
+            if aspect_ratio < 0.72:
+                target_w, target_h = 832, 1216 # Tall Portrait
+                print(f"Snap to Bucket: Tall Portrait (832x1216)")
+            else:
+                target_w, target_h = 896, 1152 # Standard Portrait
+                print(f"Snap to Bucket: Portrait (896x1152)")
+        else: # aspect_ratio > 1.15
+            if aspect_ratio > 1.35:
+                target_w, target_h = 1216, 832 # Wide Landscape
+                print(f"Snap to Bucket: Wide Landscape (1216x832)")
+            else:
+                target_w, target_h = 1152, 896 # Standard Landscape
+                print(f"Snap to Bucket: Landscape (1152x896)")
+        # 2. Center Crop to Target Aspect Ratio
+        target_ar = target_w / target_h
+        if aspect_ratio > target_ar:
+            new_w = int(h * target_ar)
+            offset = (w - new_w) // 2
+            crop_box = (offset, 0, offset + new_w, h)
+        else:
+            new_h = int(w / target_ar)
+            offset = (h - new_h) // 2
+            crop_box = (0, offset, w, offset + new_h)
+        cropped_img = image.crop(crop_box)
+        # 3. Resize to Exact Target Resolution
+        final_img = cropped_img.resize((target_w, target_h), Image.LANCZOS)
+        return final_img
     def prepare_control_images(self, image, width, height):
         """
         Generates conditioning maps, ensuring they are resized
         to the exact target dimensions (width, height).
         """
         print(f"Generating control maps for {width}x{height}...")
         depth_map_raw = self.mh.leres_detector(image)
         lineart_map_raw = self.mh.lineart_anime_detector(image)
         depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
         lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
         return depth_map, lineart_map
     def predict(
         input_image,
         user_prompt="",
         negative_prompt="",
+        # --- TCD Optimized Defaults ---
+        guidance_scale=0.0,
+        num_inference_steps=8, # TCD works well at 8 steps
+        img2img_strength=0.9, # Needs to be high for img2img
+        # ----------------------------
         depth_strength=0.3,
         lineart_strength=0.3,
         seed=-1
     ):
+        # 1. Pre-process Inputs (Using Smart Crop)
         print("Processing Input...")
+        processed_image = self.smart_crop_and_resize(input_image)
         target_width, target_height = processed_image.size
         # 2. Get Face Info
                 final_prompt = f"{Config.STYLE_TRIGGER}, {generated_caption}"
             except Exception as e:
                 print(f"Captioning failed: {e}, using default prompt.")
+                final_prompt = f"{Config.STYLE_TRIGGER}, a beautiful image"
         else:
             final_prompt = f"{Config.STYLE_TRIGGER}, {user_prompt}"
         # 5. Logic for Face vs No-Face
         if face_info is not None:
             print("Face detected: Applying InstantID with keypoints.")
             face_emb = torch.tensor(
                 face_info['embedding'],
                 dtype=Config.DTYPE,
                 device=Config.DEVICE
             ).unsqueeze(0)
             face_kps = draw_kps(processed_image, face_info['kps'])
             controlnet_conditioning_scale = [0.8, depth_strength, lineart_strength]
             self.mh.pipeline.set_ip_adapter_scale(0.8)
         else:
             print("No face detected: Disabling InstantID.")
             face_emb = torch.zeros((1, 512), dtype=Config.DTYPE, device=Config.DEVICE)
             face_kps = Image.new('RGB', (target_width, target_height), (0, 0, 0))
             controlnet_conditioning_scale = [0.0, depth_strength, lineart_strength]
             self.mh.pipeline.set_ip_adapter_scale(0.0)
             generator=generator,
             strength=img2img_strength,
+            guidance_scale=guidance_scale, # Will be 0.0 from default
             num_inference_steps=num_inference_steps,
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             clip_skip=2,
             # --- TCD Specific Parameter ---
+            eta=0.3, # Gamma/Stochasticity
             # ------------------------------
         ).images[0]