Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- config.py +1 -1
- generator.py +50 -17
config.py
CHANGED
|
@@ -24,7 +24,7 @@ TRIGGER_WORD = "p1x3l4rt, pixel art"
|
|
| 24 |
|
| 25 |
# Face detection configuration
|
| 26 |
FACE_DETECTION_CONFIG = {
|
| 27 |
-
"model_name": "
|
| 28 |
"det_size": (640, 640),
|
| 29 |
"ctx_id": 0
|
| 30 |
}
|
|
|
|
| 24 |
|
| 25 |
# Face detection configuration
|
| 26 |
FACE_DETECTION_CONFIG = {
|
| 27 |
+
"model_name": "buffalo_l", # Most accurate, auto-downloads
|
| 28 |
"det_size": (640, 640),
|
| 29 |
"ctx_id": 0
|
| 30 |
}
|
generator.py
CHANGED
|
@@ -153,17 +153,15 @@ class RetroArtConverter:
|
|
| 153 |
if image.mode != 'RGB':
|
| 154 |
image = image.convert('RGB')
|
| 155 |
|
| 156 |
-
#
|
| 157 |
-
|
|
|
|
|
|
|
| 158 |
|
| 159 |
-
# ZoeDetector
|
| 160 |
-
depth_image = self.zoe_depth(
|
| 161 |
|
| 162 |
-
|
| 163 |
-
if depth_image.size != (orig_width, orig_height):
|
| 164 |
-
depth_image = depth_image.resize((orig_width, orig_height), Image.LANCZOS)
|
| 165 |
-
|
| 166 |
-
print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
|
| 167 |
return depth_image
|
| 168 |
|
| 169 |
except Exception as e:
|
|
@@ -601,22 +599,57 @@ class RetroArtConverter:
|
|
| 601 |
try:
|
| 602 |
print("Encoding prompts with Compel...")
|
| 603 |
|
| 604 |
-
# Encode prompts
|
| 605 |
conditioning = self.compel(prompt)
|
| 606 |
negative_conditioning = self.compel(negative_prompt)
|
| 607 |
|
| 608 |
# Extract embeddings
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
|
| 614 |
compel_success = True
|
| 615 |
-
print("[OK]
|
| 616 |
|
| 617 |
except Exception as e:
|
| 618 |
-
|
| 619 |
-
print(f"[COMPEL]
|
|
|
|
| 620 |
compel_success = False
|
| 621 |
|
| 622 |
# Use standard prompts if Compel failed or not available
|
|
|
|
| 153 |
if image.mode != 'RGB':
|
| 154 |
image = image.convert('RGB')
|
| 155 |
|
| 156 |
+
# CRITICAL FIX: Convert to numpy and back to ensure clean PIL Image
|
| 157 |
+
# This removes any numpy int64 contamination in image.size
|
| 158 |
+
image_array = np.array(image)
|
| 159 |
+
clean_image = Image.fromarray(image_array)
|
| 160 |
|
| 161 |
+
# Now ZoeDetector receives clean PIL Image with Python int dimensions
|
| 162 |
+
depth_image = self.zoe_depth(clean_image)
|
| 163 |
|
| 164 |
+
print(f"[DEPTH] Zoe depth map generated: {clean_image.size[0]}x{clean_image.size[1]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
return depth_image
|
| 166 |
|
| 167 |
except Exception as e:
|
|
|
|
| 599 |
try:
|
| 600 |
print("Encoding prompts with Compel...")
|
| 601 |
|
| 602 |
+
# Encode prompts separately (Compel handles dual text encoders internally)
|
| 603 |
conditioning = self.compel(prompt)
|
| 604 |
negative_conditioning = self.compel(negative_prompt)
|
| 605 |
|
| 606 |
# Extract embeddings
|
| 607 |
+
prompt_embeds = conditioning[0]
|
| 608 |
+
pooled_prompt_embeds = conditioning[1]
|
| 609 |
+
negative_prompt_embeds = negative_conditioning[0]
|
| 610 |
+
negative_pooled_prompt_embeds = negative_conditioning[1]
|
| 611 |
+
|
| 612 |
+
# Force to 77 tokens (SDXL standard) to ensure compatibility
|
| 613 |
+
target_length = 77
|
| 614 |
+
|
| 615 |
+
# Truncate or pad positive embeddings
|
| 616 |
+
if prompt_embeds.shape[1] > target_length:
|
| 617 |
+
prompt_embeds = prompt_embeds[:, :target_length, :]
|
| 618 |
+
elif prompt_embeds.shape[1] < target_length:
|
| 619 |
+
padding = torch.zeros(
|
| 620 |
+
prompt_embeds.shape[0],
|
| 621 |
+
target_length - prompt_embeds.shape[1],
|
| 622 |
+
prompt_embeds.shape[2],
|
| 623 |
+
dtype=prompt_embeds.dtype,
|
| 624 |
+
device=prompt_embeds.device
|
| 625 |
+
)
|
| 626 |
+
prompt_embeds = torch.cat([prompt_embeds, padding], dim=1)
|
| 627 |
+
|
| 628 |
+
# Truncate or pad negative embeddings
|
| 629 |
+
if negative_prompt_embeds.shape[1] > target_length:
|
| 630 |
+
negative_prompt_embeds = negative_prompt_embeds[:, :target_length, :]
|
| 631 |
+
elif negative_prompt_embeds.shape[1] < target_length:
|
| 632 |
+
padding = torch.zeros(
|
| 633 |
+
negative_prompt_embeds.shape[0],
|
| 634 |
+
target_length - negative_prompt_embeds.shape[1],
|
| 635 |
+
negative_prompt_embeds.shape[2],
|
| 636 |
+
dtype=negative_prompt_embeds.dtype,
|
| 637 |
+
device=negative_prompt_embeds.device
|
| 638 |
+
)
|
| 639 |
+
negative_prompt_embeds = torch.cat([negative_prompt_embeds, padding], dim=1)
|
| 640 |
+
|
| 641 |
+
pipe_kwargs["prompt_embeds"] = prompt_embeds
|
| 642 |
+
pipe_kwargs["pooled_prompt_embeds"] = pooled_prompt_embeds
|
| 643 |
+
pipe_kwargs["negative_prompt_embeds"] = negative_prompt_embeds
|
| 644 |
+
pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled_prompt_embeds
|
| 645 |
|
| 646 |
compel_success = True
|
| 647 |
+
print(f"[OK] Compel encoded: pos={prompt_embeds.shape}, neg={negative_prompt_embeds.shape}")
|
| 648 |
|
| 649 |
except Exception as e:
|
| 650 |
+
# Compel encoding failed - fall back to standard encoding
|
| 651 |
+
print(f"[COMPEL] Failed: {e}")
|
| 652 |
+
print("[COMPEL] Falling back to standard encoding")
|
| 653 |
compel_success = False
|
| 654 |
|
| 655 |
# Use standard prompts if Compel failed or not available
|