pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 30

Commit

1daefa9

verified ·

1 Parent(s): bdae0e1

Upload 2 files

Browse files

Files changed (2) hide show

config.py +1 -1
generator.py +50 -17

config.py CHANGED Viewed

@@ -24,7 +24,7 @@ TRIGGER_WORD = "p1x3l4rt, pixel art"
 # Face detection configuration
 FACE_DETECTION_CONFIG = {
-    "model_name": "antelopev2",
     "det_size": (640, 640),
     "ctx_id": 0
 }

 # Face detection configuration
 FACE_DETECTION_CONFIG = {
+    "model_name": "buffalo_l",  # Most accurate, auto-downloads
     "det_size": (640, 640),
     "ctx_id": 0
 }

generator.py CHANGED Viewed

@@ -153,17 +153,15 @@ class RetroArtConverter:
                 if image.mode != 'RGB':
                     image = image.convert('RGB')
-                # Get original dimensions
-                orig_width, orig_height = image.size
-                # ZoeDetector handles sizing internally - just pass the image
-                depth_image = self.zoe_depth(image)
-                # Ensure output matches original size
-                if depth_image.size != (orig_width, orig_height):
-                    depth_image = depth_image.resize((orig_width, orig_height), Image.LANCZOS)
-                print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
                 return depth_image
             except Exception as e:
@@ -601,22 +599,57 @@ class RetroArtConverter:
             try:
                 print("Encoding prompts with Compel...")
-                # Encode prompts with Compel
                 conditioning = self.compel(prompt)
                 negative_conditioning = self.compel(negative_prompt)
                 # Extract embeddings
-                pipe_kwargs["prompt_embeds"] = conditioning[0]
-                pipe_kwargs["pooled_prompt_embeds"] = conditioning[1]
-                pipe_kwargs["negative_prompt_embeds"] = negative_conditioning[0]
-                pipe_kwargs["negative_pooled_prompt_embeds"] = negative_conditioning[1]
                 compel_success = True
-                print("[OK] Using Compel-encoded prompts")
             except Exception as e:
-                print(f"[COMPEL] Encoding failed: {e}")
-                print(f"[COMPEL] Using standard prompt encoding instead")
                 compel_success = False
         # Use standard prompts if Compel failed or not available

                 if image.mode != 'RGB':
                     image = image.convert('RGB')
+                # CRITICAL FIX: Convert to numpy and back to ensure clean PIL Image
+                # This removes any numpy int64 contamination in image.size
+                image_array = np.array(image)
+                clean_image = Image.fromarray(image_array)
+                # Now ZoeDetector receives clean PIL Image with Python int dimensions
+                depth_image = self.zoe_depth(clean_image)
+                print(f"[DEPTH] Zoe depth map generated: {clean_image.size[0]}x{clean_image.size[1]}")
                 return depth_image
             except Exception as e:
             try:
                 print("Encoding prompts with Compel...")
+                # Encode prompts separately (Compel handles dual text encoders internally)
                 conditioning = self.compel(prompt)
                 negative_conditioning = self.compel(negative_prompt)
                 # Extract embeddings
+                prompt_embeds = conditioning[0]
+                pooled_prompt_embeds = conditioning[1]
+                negative_prompt_embeds = negative_conditioning[0]
+                negative_pooled_prompt_embeds = negative_conditioning[1]
+                # Force to 77 tokens (SDXL standard) to ensure compatibility
+                target_length = 77
+                # Truncate or pad positive embeddings
+                if prompt_embeds.shape[1] > target_length:
+                    prompt_embeds = prompt_embeds[:, :target_length, :]
+                elif prompt_embeds.shape[1] < target_length:
+                    padding = torch.zeros(
+                        prompt_embeds.shape[0],
+                        target_length - prompt_embeds.shape[1],
+                        prompt_embeds.shape[2],
+                        dtype=prompt_embeds.dtype,
+                        device=prompt_embeds.device
+                    )
+                    prompt_embeds = torch.cat([prompt_embeds, padding], dim=1)
+                # Truncate or pad negative embeddings
+                if negative_prompt_embeds.shape[1] > target_length:
+                    negative_prompt_embeds = negative_prompt_embeds[:, :target_length, :]
+                elif negative_prompt_embeds.shape[1] < target_length:
+                    padding = torch.zeros(
+                        negative_prompt_embeds.shape[0],
+                        target_length - negative_prompt_embeds.shape[1],
+                        negative_prompt_embeds.shape[2],
+                        dtype=negative_prompt_embeds.dtype,
+                        device=negative_prompt_embeds.device
+                    )
+                    negative_prompt_embeds = torch.cat([negative_prompt_embeds, padding], dim=1)
+                pipe_kwargs["prompt_embeds"] = prompt_embeds
+                pipe_kwargs["pooled_prompt_embeds"] = pooled_prompt_embeds
+                pipe_kwargs["negative_prompt_embeds"] = negative_prompt_embeds
+                pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled_prompt_embeds
                 compel_success = True
+                print(f"[OK] Compel encoded: pos={prompt_embeds.shape}, neg={negative_prompt_embeds.shape}")
             except Exception as e:
+                # Compel encoding failed - fall back to standard encoding
+                print(f"[COMPEL] Failed: {e}")
+                print("[COMPEL] Falling back to standard encoding")
                 compel_success = False
         # Use standard prompts if Compel failed or not available