Spaces:

primerz
/

face-to-pixel-art

Running on Zero

App Files Files Community

primerz commited on Nov 17

Commit

8a3467f

verified ·

1 Parent(s): 051c96e

Update generator.py

Browse files

Files changed (1) hide show

generator.py +46 -27

generator.py CHANGED Viewed

@@ -21,7 +21,6 @@ class Generator:
         lineart_map_raw = self.mh.lineart_anime_detector(image)
         # Manually resize maps to match the exact output resolution
-        # This ensures the aspect ratio is preserved from the processed_image
         depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
         lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
@@ -39,16 +38,50 @@ class Generator:
         lineart_strength=0.3,
         seed=-1
     ):
-        # 1. Pre-process Inputs (Maintains Aspect Ratio)
         print("Processing Input...")
-        # Keeps original aspect ratio logic
         processed_image = resize_image_to_1mp(input_image)
         target_width, target_height = processed_image.size
-        # 2. Get Face Info
-        # (Note: Your model.py already handles the "Max Face" sorting logic)
         face_info = self.mh.get_face_info(processed_image)
         # 3. Generate Prompt
         if not user_prompt.strip():
             try:
@@ -61,46 +94,34 @@ class Generator:
             final_prompt = f"{Config.STYLE_TRIGGER}, {user_prompt}"
         print(f"Prompt: {final_prompt}")
-        print(f"Negative Prompt: {negative_prompt}")
-        # 4. Generate OTHER Control Maps (Structure)
         print("Generating Control Maps (Depth, LineArt)...")
         depth_map, lineart_map = self.prepare_control_images(processed_image, target_width, target_height)
         # 5. Logic for Face vs No-Face
-        # ControlNet order: [InstantID_KPS, Zoe_Depth, LineArt]
         if face_info is not None:
             print("Face detected: Applying InstantID with keypoints.")
-            # --- FIX APPLIED HERE ---
-            # Changed from face_info.normed_embedding to face_info['embedding']
-            # This fixes the "generic/Chinese face" issue by using the raw embedding magnitude.
             face_emb = torch.tensor(
                 face_info['embedding'],
                 dtype=Config.DTYPE,
                 device=Config.DEVICE
             ).unsqueeze(0)
-            # --- END FIX ---
-            # Create keypoint image
             face_kps = draw_kps(processed_image, face_info['kps'])
-            # Set strengths
-            # Note: 0.8 is the standard effective strength for InstantID
             controlnet_conditioning_scale = [0.8, depth_strength, lineart_strength]
             self.mh.pipeline.set_ip_adapter_scale(0.8)
         else:
             print("No face detected: Disabling InstantID.")
-            # Create dummy embedding
             face_emb = torch.zeros((1, 512), dtype=Config.DTYPE, device=Config.DEVICE)
-            # Create dummy keypoint image (black)
             face_kps = Image.new('RGB', (target_width, target_height), (0, 0, 0))
-            # Set strengths
             controlnet_conditioning_scale = [0.0, depth_strength, lineart_strength]
             self.mh.pipeline.set_ip_adapter_scale(0.0)
-        # We keep the guidance_end for pose low (Standard InstantID practice)
         control_guidance_end = [0.3, 0.6, 0.6]
         # --- Seed/Generator Logic ---
@@ -108,27 +129,25 @@ class Generator:
             seed = torch.Generator().seed()
         generator = torch.Generator(device=Config.DEVICE).manual_seed(int(seed))
         print(f"Using seed: {seed}")
-        # --- END ---
         # 6. Run Inference
         print("Running pipeline...")
         result = self.mh.pipeline(
             prompt=final_prompt,
             negative_prompt=negative_prompt,
-            image=processed_image,  # Base img2img image
             control_image=[face_kps, depth_map, lineart_map],
-            image_embeds=face_emb,  # Face identity embedding
             generator=generator,
-            # --- Parameters from UI ---
-            strength=img2img_strength,
             num_inference_steps=num_inference_steps,
-            guidance_scale=guidance_scale,
-            # --- End Parameters from UI ---
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             control_guidance_end=control_guidance_end,
             clip_skip=2,
         ).images[0]

         lineart_map_raw = self.mh.lineart_anime_detector(image)
         # Manually resize maps to match the exact output resolution
         depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
         lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
         lineart_strength=0.3,
         seed=-1
     ):
+        # 1. Pre-process Inputs
         print("Processing Input...")
         processed_image = resize_image_to_1mp(input_image)
         target_width, target_height = processed_image.size
+        # 2. Get Face Info
         face_info = self.mh.get_face_info(processed_image)
+        # --- START ADAPTIVE PARAMETER LOGIC ---
+        adaptive_cfg = guidance_scale
+        adaptive_strength = img2img_strength
+        if face_info is not None:
+            # Calculate Face Coverage Ratio
+            bbox = face_info['bbox']
+            face_area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
+            total_area = target_width * target_height
+            coverage_ratio = face_area / total_area
+            print(f"Face Coverage: {coverage_ratio:.2f} ({int(coverage_ratio * 12)}/12)")
+            # Apply variance logic based on your requested thresholds
+            if coverage_ratio >= (8/12): # > 0.66 (High Coverage)
+                # Lower CFG by 5-15% (avg 10%), keep strength same
+                adaptive_cfg = guidance_scale * 0.90
+                adaptive_strength = img2img_strength * 1.0
+                print("-> High Coverage: Applying slight CFG reduction (-10%)")
+            elif coverage_ratio >= (4/12): # 0.33 to 0.66 (Medium Coverage)
+                # CFG lower 20-30% (avg 25%), strength lower 5-10% (avg 7.5%)
+                adaptive_cfg = guidance_scale * 0.75
+                adaptive_strength = img2img_strength * 0.925
+                print("-> Medium Coverage: Lowering CFG (-25%) and Strength (-7.5%)")
+            else: # < 0.33 (Low Coverage)
+                # CFG lower 30-40% (avg 35%), strength lower 10-15% (avg 12.5%)
+                adaptive_cfg = guidance_scale * 0.65
+                adaptive_strength = img2img_strength * 0.875
+                print("-> Low Coverage: Significantly lowering CFG (-35%) and Strength (-12.5%)")
+        print(f"Adaptive CFG: {guidance_scale} -> {adaptive_cfg:.2f}")
+        print(f"Adaptive Strength: {img2img_strength} -> {adaptive_strength:.2f}")
+        # --- END ADAPTIVE PARAMETER LOGIC ---
         # 3. Generate Prompt
         if not user_prompt.strip():
             try:
             final_prompt = f"{Config.STYLE_TRIGGER}, {user_prompt}"
         print(f"Prompt: {final_prompt}")
+        # 4. Generate OTHER Control Maps
         print("Generating Control Maps (Depth, LineArt)...")
         depth_map, lineart_map = self.prepare_control_images(processed_image, target_width, target_height)
         # 5. Logic for Face vs No-Face
         if face_info is not None:
             print("Face detected: Applying InstantID with keypoints.")
+            # Corrected Raw Embedding Usage
             face_emb = torch.tensor(
                 face_info['embedding'],
                 dtype=Config.DTYPE,
                 device=Config.DEVICE
             ).unsqueeze(0)
             face_kps = draw_kps(processed_image, face_info['kps'])
             controlnet_conditioning_scale = [0.8, depth_strength, lineart_strength]
             self.mh.pipeline.set_ip_adapter_scale(0.8)
         else:
             print("No face detected: Disabling InstantID.")
             face_emb = torch.zeros((1, 512), dtype=Config.DTYPE, device=Config.DEVICE)
             face_kps = Image.new('RGB', (target_width, target_height), (0, 0, 0))
             controlnet_conditioning_scale = [0.0, depth_strength, lineart_strength]
             self.mh.pipeline.set_ip_adapter_scale(0.0)
         control_guidance_end = [0.3, 0.6, 0.6]
         # --- Seed/Generator Logic ---
             seed = torch.Generator().seed()
         generator = torch.Generator(device=Config.DEVICE).manual_seed(int(seed))
         print(f"Using seed: {seed}")
         # 6. Run Inference
         print("Running pipeline...")
         result = self.mh.pipeline(
             prompt=final_prompt,
             negative_prompt=negative_prompt,
+            image=processed_image,
             control_image=[face_kps, depth_map, lineart_map],
+            image_embeds=face_emb,
             generator=generator,
+            # --- UPDATED: Use Adaptive Parameters ---
+            strength=adaptive_strength,       # <-- Uses calculated strength
+            guidance_scale=adaptive_cfg,      # <-- Uses calculated CFG
             num_inference_steps=num_inference_steps,
+            # --------------------------------------
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             control_guidance_end=control_guidance_end,
             clip_skip=2,
         ).images[0]