Spaces:

primerz
/

face-to-pixel-art

Running on Zero

App Files Files Community

primerz commited on Nov 17, 2025

Commit

5cf276c

verified ·

1 Parent(s): f5bcb07

Update generator.py

Browse files

Files changed (1) hide show

generator.py +21 -101

generator.py CHANGED Viewed

@@ -1,85 +1,29 @@
 import torch
 from config import Config
-from utils import get_caption, draw_kps
 from PIL import Image
 class Generator:
     def __init__(self, model_handler):
         self.mh = model_handler
-    def solve_bezier(self, t, p0, p1, p2, p3):
-        """
-        Calculates a point on a cubic Bezier curve for a given t (0 to 1).
-        """
-        t = max(0.0, min(1.0, t))
-        term0 = (1 - t)**3 * p0
-        term1 = 3 * (1 - t)**2 * t * p1
-        term2 = 3 * (1 - t) * t**2 * p2
-        term3 = t**3 * p3
-        return term0 + term1 + term2 + term3
-    def smart_crop_and_resize(self, image):
         """
-        Analyzes aspect ratio and snaps to the best SDXL resolution bucket.
-        Performs a center crop to match the target ratio, then resizes.
         """
-        w, h = image.size
-        aspect_ratio = w / h
-        # 1. Determine Target Resolution (Horizon SDXL Buckets)
-        if 0.85 <= aspect_ratio <= 1.15:
-            # Square-ish -> 1024x1024
-            target_w, target_h = 1024, 1024
-            print(f"Snap to Bucket: Square (1024x1024)")
-        elif aspect_ratio < 0.85:
-            # Portrait
-            # Decide between 896x1152 (AR ~0.77) and 832x1216 (AR ~0.68)
-            if aspect_ratio < 0.72:
-                target_w, target_h = 832, 1216 # Tall Portrait
-                print(f"Snap to Bucket: Tall Portrait (832x1216)")
-            else:
-                target_w, target_h = 896, 1152 # Standard Portrait
-                print(f"Snap to Bucket: Portrait (896x1152)")
-        else: # aspect_ratio > 1.15
-            # Landscape
-            # Decide between 1152x896 (AR ~1.28) and 1216x832 (AR ~1.46)
-            if aspect_ratio > 1.35:
-                target_w, target_h = 1216, 832 # Wide Landscape
-                print(f"Snap to Bucket: Wide Landscape (1216x832)")
-            else:
-                target_w, target_h = 1152, 896 # Standard Landscape
-                print(f"Snap to Bucket: Landscape (1152x896)")
-        # 2. Center Crop to Target Aspect Ratio
-        target_ar = target_w / target_h
-        if aspect_ratio > target_ar:
-            # Image is wider than target -> Crop width (cut sides)
-            new_w = int(h * target_ar)
-            offset = (w - new_w) // 2
-            crop_box = (offset, 0, offset + new_w, h)
-        else:
-            # Image is taller than target -> Crop height (cut top/bottom)
-            new_h = int(w / target_ar)
-            offset = (h - new_h) // 2
-            crop_box = (0, offset, w, offset + new_h)
-        cropped_img = image.crop(crop_box)
-        # 3. Resize to Exact Target Resolution
-        final_img = cropped_img.resize((target_w, target_h), Image.LANCZOS)
-        return final_img
-    def prepare_control_images(self, image, width, height):
         print(f"Generating control maps for {width}x{height}...")
         depth_map_raw = self.mh.leres_detector(image)
         lineart_map_raw = self.mh.lineart_anime_detector(image)
-        # Maps are resized to match the exact bucket resolution
         depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
         lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
         return depth_map, lineart_map
     def predict(
@@ -94,39 +38,15 @@ class Generator:
         lineart_strength=0.3,
         seed=-1
     ):
-        # 1. Pre-process Inputs (New Smart Crop)
         print("Processing Input...")
-        processed_image = self.smart_crop_and_resize(input_image)
         target_width, target_height = processed_image.size
         # 2. Get Face Info
         face_info = self.mh.get_face_info(processed_image)
-        # --- CUBIC BEZIER ADAPTIVE LOGIC ---
-        adaptive_cfg = guidance_scale
-        adaptive_strength = img2img_strength
-        if face_info is not None:
-            # 1. Calculate Face Coverage (t)
-            bbox = face_info['bbox']
-            face_area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
-            total_area = target_width * target_height
-            coverage_ratio = face_area / total_area
-            print(f"Face Coverage: {coverage_ratio:.3f} ({int(coverage_ratio * 12)}/12)")
-            # 2. Define Control Points (Half Less Aggressive)
-            cfg_mult = self.solve_bezier(coverage_ratio, 0.825, 0.85, 0.95, 1.0)
-            str_mult = self.solve_bezier(coverage_ratio, 0.9375, 0.95, 0.99, 1.0)
-            # 3. Apply Multipliers
-            adaptive_cfg = guidance_scale * cfg_mult
-            adaptive_strength = img2img_strength * str_mult
-            print(f"-> CFG Multiplier: {cfg_mult:.3f} | New CFG: {adaptive_cfg:.2f}")
-            print(f"-> Str Multiplier: {str_mult:.3f} | New Strength: {adaptive_strength:.2f}")
-        # --- END ADAPTIVE LOGIC ---
         # 3. Generate Prompt
         if not user_prompt.strip():
             try:
@@ -141,10 +61,10 @@ class Generator:
         print(f"Prompt: {final_prompt}")
         # 4. Generate Control Maps
-        print("Generating Control Maps...")
         depth_map, lineart_map = self.prepare_control_images(processed_image, target_width, target_height)
-        # 5. Face vs No-Face Setup
         if face_info is not None:
             print("Face detected: Applying InstantID with keypoints.")
@@ -184,18 +104,18 @@ class Generator:
             image_embeds=face_emb,
             generator=generator,
-            # --- Using Adaptive Values ---
-            strength=adaptive_strength,
-            guidance_scale=adaptive_cfg,
-            num_inference_steps=num_inference_steps,
-            # ---------------------------
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             control_guidance_end=control_guidance_end,
             clip_skip=2,
             # --- TCD Specific Parameter ---
-            eta=0.3, # Controls stochasticity (gamma) for TCD
             # ------------------------------
         ).images[0]

 import torch
 from config import Config
+from utils import resize_image_to_1mp, get_caption, draw_kps
 from PIL import Image
 class Generator:
     def __init__(self, model_handler):
         self.mh = model_handler
+    def prepare_control_images(self, image, width, height):
         """
+        Generates conditioning maps, ensuring they are resized
+        to the exact target dimensions (width, height).
         """
         print(f"Generating control maps for {width}x{height}...")
+        # Generate depth map
         depth_map_raw = self.mh.leres_detector(image)
+        # Generate lineart map
         lineart_map_raw = self.mh.lineart_anime_detector(image)
+        # Manually resize maps to match the exact output resolution
         depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
         lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
         return depth_map, lineart_map
     def predict(
         lineart_strength=0.3,
         seed=-1
     ):
+        # 1. Pre-process Inputs
         print("Processing Input...")
+        # Reverted to original aspect-ratio preserving resize
+        processed_image = resize_image_to_1mp(input_image)
         target_width, target_height = processed_image.size
         # 2. Get Face Info
         face_info = self.mh.get_face_info(processed_image)
         # 3. Generate Prompt
         if not user_prompt.strip():
             try:
         print(f"Prompt: {final_prompt}")
         # 4. Generate Control Maps
+        print("Generating Control Maps (Depth, LineArt)...")
         depth_map, lineart_map = self.prepare_control_images(processed_image, target_width, target_height)
+        # 5. Logic for Face vs No-Face
         if face_info is not None:
             print("Face detected: Applying InstantID with keypoints.")
             image_embeds=face_emb,
             generator=generator,
+            # --- Static Values (Adaptive Logic Removed) ---
+            strength=img2img_strength,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+            # --------------------------------------------
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             control_guidance_end=control_guidance_end,
             clip_skip=2,
             # --- TCD Specific Parameter ---
+            eta=0.3,
             # ------------------------------
         ).images[0]