Spaces:

primerz
/

face-to-pixel-art

Running on Zero

App Files Files Community

primerz commited on 27 days ago

Commit

ff014fd

verified ·

1 Parent(s): 2799929

Update generator.py

Browse files

Files changed (1) hide show

generator.py +39 -139

generator.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
 from config import Config
-from utils import get_caption, draw_kps
 from PIL import Image
 class Generator:
@@ -15,23 +15,23 @@ class Generator:
         w, h = image.size
         aspect_ratio = w / h
-        # 1. Determine Target Resolution (SDXL Buckets)
         if 0.85 <= aspect_ratio <= 1.15:
             target_w, target_h = 1024, 1024
             print(f"Snap to Bucket: Square (1024x1024)")
         elif aspect_ratio < 0.85:
             if aspect_ratio < 0.72:
-                target_w, target_h = 832, 1216  # Tall Portrait
                 print(f"Snap to Bucket: Tall Portrait (832x1216)")
             else:
-                target_w, target_h = 896, 1152  # Standard Portrait
                 print(f"Snap to Bucket: Portrait (896x1152)")
-        else:  # aspect_ratio > 1.15
             if aspect_ratio > 1.35:
-                target_w, target_h = 1216, 832  # Wide Landscape
                 print(f"Snap to Bucket: Wide Landscape (1216x832)")
             else:
-                target_w, target_h = 1152, 896  # Standard Landscape
                 print(f"Snap to Bucket: Landscape (1152x896)")
         # 2. Center Crop to Target Aspect Ratio
@@ -52,93 +52,33 @@ class Generator:
         final_img = cropped_img.resize((target_w, target_h), Image.LANCZOS)
         return final_img
-    def prepare_control_images(
-        self,
-        image,
-        width,
-        height,
-        edge_type=None,
-        canny_low=100,
-        canny_high=200
-    ):
         """
-        Generates conditioning maps based on edge_type.
-        Returns:
-            tuple: (depth_map, edge_maps_list) where edge_maps_list matches the ControlNet setup
         """
-        if edge_type is None:
-            edge_type = self.mh.edge_type
-        print(f"Generating control maps ({edge_type}) for {width}x{height}...")
-        # Always generate depth
-        depth_map_raw = self.mh.extract_depth(image)
         depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
-        edge_maps = []
-        if edge_type == "canny":
-            canny_map_raw = self.mh.extract_canny(image, canny_low, canny_high)
-            canny_map = canny_map_raw.resize((width, height), Image.LANCZOS)
-            edge_maps.append(canny_map)
-            print(f"  ✓ Canny edges generated")
-        elif edge_type == "lineart":
-            lineart_map_raw = self.mh.extract_lineart(image)
-            lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
-            edge_maps.append(lineart_map)
-            print(f"  ✓ LineArt edges generated")
-        elif edge_type == "both":
-            canny_map_raw = self.mh.extract_canny(image, canny_low, canny_high)
-            canny_map = canny_map_raw.resize((width, height), Image.LANCZOS)
-            edge_maps.append(canny_map)
-            lineart_map_raw = self.mh.extract_lineart(image)
-            lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
-            edge_maps.append(lineart_map)
-            print(f"  ✓ Both Canny and LineArt generated")
-        return depth_map, edge_maps
     def predict(
         self,
         input_image,
         user_prompt="",
         negative_prompt="",
-        guidance_scale=4.0,
-        num_inference_steps=8,
-        img2img_strength=0.9,
-        depth_strength=0.3,
-        edge_strength=0.3,
-        instantid_strength=0.8,
-        canny_low_threshold=100,
-        canny_high_threshold=200,
-        eta=0.45,
-        seed=-1,
-        return_control_images=False
     ):
-        """
-        Enhanced prediction with more control options.
-        Args:
-            input_image: PIL Image
-            user_prompt: Text prompt (optional, will auto-caption if empty)
-            negative_prompt: Negative prompt
-            guidance_scale: CFG scale (4.0 recommended for TCD + LoRA)
-            num_inference_steps: Number of steps (4-12 for TCD)
-            img2img_strength: Denoising strength
-            depth_strength: Depth ControlNet strength
-            edge_strength: Edge ControlNet strength (canny/lineart)
-            instantid_strength: Face preservation strength
-            canny_low_threshold: Canny low threshold (if using canny)
-            canny_high_threshold: Canny high threshold (if using canny)
-            eta: TCD stochasticity parameter
-            seed: Random seed (-1 for random)
-            return_control_images: Return control images for debugging
-        """
-        # 1. Pre-process Inputs
         print("Processing Input...")
         processed_image = self.smart_crop_and_resize(input_image)
         target_width, target_height = processed_image.size
@@ -161,92 +101,52 @@ class Generator:
         print(f"Negative Prompt: {negative_prompt}")
         # 4. Generate Control Maps
-        print("Generating Control Maps...")
-        depth_map, edge_maps = self.prepare_control_images(
-            processed_image,
-            target_width,
-            target_height,
-            canny_low=canny_low_threshold,
-            canny_high=canny_high_threshold
-        )
-        # 5. Setup conditioning based on face detection
-        control_images = []
-        conditioning_scales = []
-        control_guidance_end = []
         if face_info is not None:
-            print(f"✓ Face detected: Applying InstantID (strength: {instantid_strength})")
             face_emb = torch.tensor(
                 face_info['embedding'],
                 dtype=Config.DTYPE,
                 device=Config.DEVICE
             ).unsqueeze(0)
             face_kps = draw_kps(processed_image, face_info['kps'])
-            # Add face keypoints
-            control_images.append(face_kps)
-            conditioning_scales.append(instantid_strength)
-            control_guidance_end.append(0.3)
-            # Set IP-Adapter scale for face
-            self.mh.pipeline.set_ip_adapter_scale(instantid_strength)
         else:
-            print("✗ No face detected: Disabling InstantID")
             face_emb = torch.zeros((1, 512), dtype=Config.DTYPE, device=Config.DEVICE)
             face_kps = Image.new('RGB', (target_width, target_height), (0, 0, 0))
-            # Add placeholder face keypoints
-            control_images.append(face_kps)
-            conditioning_scales.append(0.0)
-            control_guidance_end.append(0.6)
             self.mh.pipeline.set_ip_adapter_scale(0.0)
-        # Add depth map
-        control_images.append(depth_map)
-        conditioning_scales.append(depth_strength)
-        control_guidance_end.append(0.6)
-        # Add edge map(s)
-        for edge_map in edge_maps:
-            control_images.append(edge_map)
-            conditioning_scales.append(edge_strength)
-            control_guidance_end.append(0.6)
-        # 6. Setup seed
         if seed == -1 or seed is None:
             seed = torch.Generator().seed()
         generator = torch.Generator(device=Config.DEVICE).manual_seed(int(seed))
         print(f"Using seed: {seed}")
-        # 7. Run Inference
-        print(f"Running pipeline (steps: {num_inference_steps}, cfg: {guidance_scale}, eta: {eta})...")
         result = self.mh.pipeline(
             prompt=final_prompt,
             negative_prompt=negative_prompt,
             image=processed_image,
-            control_image=control_images,
             image_embeds=face_emb,
             generator=generator,
             strength=img2img_strength,
             guidance_scale=guidance_scale,
-            num_inference_steps=num_inference_steps,
-            controlnet_conditioning_scale=conditioning_scales,
             control_guidance_end=control_guidance_end,
             clip_skip=0,
-            eta=eta,
         ).images[0]
-        if return_control_images:
-            return result, {
-                'depth': depth_map,
-                'edges': edge_maps,
-                'face_kps': face_kps if face_info else None,
-                'processed_input': processed_image
-            }
         return result

 import torch
 from config import Config
+from utils import get_caption, draw_kps # Removed resize_image_to_1mp
 from PIL import Image
 class Generator:
         w, h = image.size
         aspect_ratio = w / h
+        # 1. Determine Target Resolution (Horizon SDXL Buckets)
         if 0.85 <= aspect_ratio <= 1.15:
             target_w, target_h = 1024, 1024
             print(f"Snap to Bucket: Square (1024x1024)")
         elif aspect_ratio < 0.85:
             if aspect_ratio < 0.72:
+                target_w, target_h = 832, 1216 # Tall Portrait
                 print(f"Snap to Bucket: Tall Portrait (832x1216)")
             else:
+                target_w, target_h = 896, 1152 # Standard Portrait
                 print(f"Snap to Bucket: Portrait (896x1152)")
+        else: # aspect_ratio > 1.15
             if aspect_ratio > 1.35:
+                target_w, target_h = 1216, 832 # Wide Landscape
                 print(f"Snap to Bucket: Wide Landscape (1216x832)")
             else:
+                target_w, target_h = 1152, 896 # Standard Landscape
                 print(f"Snap to Bucket: Landscape (1152x896)")
         # 2. Center Crop to Target Aspect Ratio
         final_img = cropped_img.resize((target_w, target_h), Image.LANCZOS)
         return final_img
+    def prepare_control_images(self, image, width, height):
         """
+        Generates conditioning maps, ensuring they are resized
+        to the exact target dimensions (width, height).
         """
+        print(f"Generating control maps for {width}x{height}...")
+        depth_map_raw = self.mh.leres_detector(image)
+        lineart_map_raw = self.mh.lineart_anime_detector(image)
         depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
+        lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
+        return depth_map, lineart_map
     def predict(
         self,
         input_image,
         user_prompt="",
         negative_prompt="",
+        # --- DPMSolver++ Optimized Defaults ---
+        guidance_scale=7.0,
+        num_inference_steps=20,
+        img2img_strength=0.85,
+        # ----------------------------
+        depth_strength=0.8,
+        lineart_strength=0.8,
+        seed=-1
     ):
+        # 1. Pre-process Inputs (Using Smart Crop)
         print("Processing Input...")
         processed_image = self.smart_crop_and_resize(input_image)
         target_width, target_height = processed_image.size
         print(f"Negative Prompt: {negative_prompt}")
         # 4. Generate Control Maps
+        print("Generating Control Maps (Depth, LineArt)...")
+        depth_map, lineart_map = self.prepare_control_images(processed_image, target_width, target_height)
+        # 5. Logic for Face vs No-Face
         if face_info is not None:
+            print("Face detected: Applying InstantID with keypoints.")
             face_emb = torch.tensor(
                 face_info['embedding'],
                 dtype=Config.DTYPE,
                 device=Config.DEVICE
             ).unsqueeze(0)
             face_kps = draw_kps(processed_image, face_info['kps'])
+            controlnet_conditioning_scale = [0.8, depth_strength, lineart_strength]
+            self.mh.pipeline.set_ip_adapter_scale(0.8)
         else:
+            print("No face detected: Disabling InstantID.")
             face_emb = torch.zeros((1, 512), dtype=Config.DTYPE, device=Config.DEVICE)
             face_kps = Image.new('RGB', (target_width, target_height), (0, 0, 0))
+            controlnet_conditioning_scale = [0.0, depth_strength, lineart_strength]
             self.mh.pipeline.set_ip_adapter_scale(0.0)
+        control_guidance_end = [0.3, 0.6, 0.6]
         if seed == -1 or seed is None:
             seed = torch.Generator().seed()
         generator = torch.Generator(device=Config.DEVICE).manual_seed(int(seed))
         print(f"Using seed: {seed}")
+        # 6. Run Inference
+        print("Running pipeline...")
         result = self.mh.pipeline(
             prompt=final_prompt,
             negative_prompt=negative_prompt,
             image=processed_image,
+            control_image=[face_kps, depth_map, lineart_map],
             image_embeds=face_emb,
             generator=generator,
             strength=img2img_strength,
             guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+            controlnet_conditioning_scale=controlnet_conditioning_scale,
             control_guidance_end=control_guidance_end,
             clip_skip=0,
         ).images[0]
         return result