Spaces:
Runtime error
Runtime error
Update generator.py
Browse files- generator.py +11 -41
generator.py
CHANGED
|
@@ -54,7 +54,7 @@ class RetroArtConverter:
|
|
| 54 |
self.mediapipe_face, mediapipe_success = load_mediapipe_face_detector()
|
| 55 |
self.models_loaded['mediapipe_face'] = mediapipe_success
|
| 56 |
|
| 57 |
-
# Load Depth detector with fallback hierarchy (Leres
|
| 58 |
self.depth_detector, self.depth_type, depth_success = load_depth_detector()
|
| 59 |
self.models_loaded['depth_detector'] = depth_success
|
| 60 |
self.models_loaded['depth_type'] = self.depth_type
|
|
@@ -627,7 +627,7 @@ class RetroArtConverter:
|
|
| 627 |
if len(faces) > 0:
|
| 628 |
insightface_success = True
|
| 629 |
has_detected_faces = True
|
| 630 |
-
print(f"
|
| 631 |
|
| 632 |
# Get largest face
|
| 633 |
face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
|
|
@@ -688,7 +688,7 @@ class RetroArtConverter:
|
|
| 688 |
print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
|
| 689 |
print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
|
| 690 |
else:
|
| 691 |
-
print("
|
| 692 |
|
| 693 |
except Exception as e:
|
| 694 |
print(f"[ERROR] InsightFace detection failed: {e}")
|
|
@@ -710,14 +710,14 @@ class RetroArtConverter:
|
|
| 710 |
if mediapipe_array.sum() > 1000: # If image has significant content
|
| 711 |
has_detected_faces = True
|
| 712 |
face_kps_image = mediapipe_result
|
| 713 |
-
print(f"
|
| 714 |
print(f"[INFO] Using MediapipeFace keypoints (no embeddings available)")
|
| 715 |
|
| 716 |
# Note: MediapipeFace doesn't provide embeddings or detailed info
|
| 717 |
# So face_embeddings, face_crop_enhanced remain None
|
| 718 |
# InstantID will work with keypoints only (reduced quality)
|
| 719 |
else:
|
| 720 |
-
print("
|
| 721 |
except Exception as e:
|
| 722 |
print(f"[ERROR] MediapipeFace detection failed: {e}")
|
| 723 |
traceback.print_exc()
|
|
@@ -777,8 +777,7 @@ class RetroArtConverter:
|
|
| 777 |
|
| 778 |
pipe_kwargs["generator"] = generator
|
| 779 |
|
| 780 |
-
# --- START FIX: Use our new Cappella module
|
| 781 |
-
cappella_success = False
|
| 782 |
if self.use_cappella and self.cappella is not None:
|
| 783 |
try:
|
| 784 |
print("Encoding prompts with Cappella...")
|
|
@@ -793,44 +792,15 @@ class RetroArtConverter:
|
|
| 793 |
pipe_kwargs["negative_pooled_prompt_embeds"] = conditioning.negative_pooled_embeds
|
| 794 |
|
| 795 |
print(f"[OK] Cappella encoded - Prompt: {pipe_kwargs['prompt_embeds'].shape}, Negative: {pipe_kwargs['negative_prompt_embeds'].shape}")
|
| 796 |
-
cappella_success = True
|
| 797 |
except Exception as e:
|
| 798 |
-
print(f"
|
| 799 |
traceback.print_exc()
|
| 800 |
-
|
| 801 |
-
# Fallback: Manual encoding if Cappella not available or failed
|
| 802 |
-
if not cappella_success:
|
| 803 |
-
print("[FALLBACK] Manually encoding prompts for face embedding compatibility...")
|
| 804 |
-
try:
|
| 805 |
-
# Use pipeline's encode_prompt method
|
| 806 |
-
(
|
| 807 |
-
prompt_embeds,
|
| 808 |
-
negative_prompt_embeds,
|
| 809 |
-
pooled_prompt_embeds,
|
| 810 |
-
negative_pooled_prompt_embeds,
|
| 811 |
-
) = self.pipe.encode_prompt(
|
| 812 |
-
prompt=prompt,
|
| 813 |
-
prompt_2=None,
|
| 814 |
-
device=self.device,
|
| 815 |
-
num_images_per_prompt=1,
|
| 816 |
-
do_classifier_free_guidance=True,
|
| 817 |
-
negative_prompt=negative_prompt,
|
| 818 |
-
negative_prompt_2=None,
|
| 819 |
-
)
|
| 820 |
-
|
| 821 |
-
pipe_kwargs["prompt_embeds"] = prompt_embeds
|
| 822 |
-
pipe_kwargs["pooled_prompt_embeds"] = pooled_prompt_embeds
|
| 823 |
-
pipe_kwargs["negative_prompt_embeds"] = negative_prompt_embeds
|
| 824 |
-
pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled_prompt_embeds
|
| 825 |
-
|
| 826 |
-
print(f"[OK] Manual encoding - Prompt: {prompt_embeds.shape}, Negative: {negative_prompt_embeds.shape}")
|
| 827 |
-
except Exception as e:
|
| 828 |
-
print(f"[ERROR] Manual encoding also failed: {e}")
|
| 829 |
-
traceback.print_exc()
|
| 830 |
-
# Last resort: use raw text (face embeddings won't work)
|
| 831 |
pipe_kwargs["prompt"] = prompt
|
| 832 |
pipe_kwargs["negative_prompt"] = negative_prompt
|
| 833 |
-
|
|
|
|
|
|
|
|
|
|
| 834 |
# --- END FIX ---
|
| 835 |
|
| 836 |
# Add CLIP skip
|
|
|
|
| 54 |
self.mediapipe_face, mediapipe_success = load_mediapipe_face_detector()
|
| 55 |
self.models_loaded['mediapipe_face'] = mediapipe_success
|
| 56 |
|
| 57 |
+
# Load Depth detector with fallback hierarchy (Leres → Zoe → Midas)
|
| 58 |
self.depth_detector, self.depth_type, depth_success = load_depth_detector()
|
| 59 |
self.models_loaded['depth_detector'] = depth_success
|
| 60 |
self.models_loaded['depth_type'] = self.depth_type
|
|
|
|
| 627 |
if len(faces) > 0:
|
| 628 |
insightface_success = True
|
| 629 |
has_detected_faces = True
|
| 630 |
+
print(f"✓ InsightFace detected {len(faces)} face(s)")
|
| 631 |
|
| 632 |
# Get largest face
|
| 633 |
face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
|
|
|
|
| 688 |
print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
|
| 689 |
print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
|
| 690 |
else:
|
| 691 |
+
print("✗ InsightFace found no faces")
|
| 692 |
|
| 693 |
except Exception as e:
|
| 694 |
print(f"[ERROR] InsightFace detection failed: {e}")
|
|
|
|
| 710 |
if mediapipe_array.sum() > 1000: # If image has significant content
|
| 711 |
has_detected_faces = True
|
| 712 |
face_kps_image = mediapipe_result
|
| 713 |
+
print(f"✓ MediapipeFace detected face(s)")
|
| 714 |
print(f"[INFO] Using MediapipeFace keypoints (no embeddings available)")
|
| 715 |
|
| 716 |
# Note: MediapipeFace doesn't provide embeddings or detailed info
|
| 717 |
# So face_embeddings, face_crop_enhanced remain None
|
| 718 |
# InstantID will work with keypoints only (reduced quality)
|
| 719 |
else:
|
| 720 |
+
print("✗ MediapipeFace found no faces")
|
| 721 |
except Exception as e:
|
| 722 |
print(f"[ERROR] MediapipeFace detection failed: {e}")
|
| 723 |
traceback.print_exc()
|
|
|
|
| 777 |
|
| 778 |
pipe_kwargs["generator"] = generator
|
| 779 |
|
| 780 |
+
# --- START FIX: Use our new Cappella module ---
|
|
|
|
| 781 |
if self.use_cappella and self.cappella is not None:
|
| 782 |
try:
|
| 783 |
print("Encoding prompts with Cappella...")
|
|
|
|
| 792 |
pipe_kwargs["negative_pooled_prompt_embeds"] = conditioning.negative_pooled_embeds
|
| 793 |
|
| 794 |
print(f"[OK] Cappella encoded - Prompt: {pipe_kwargs['prompt_embeds'].shape}, Negative: {pipe_kwargs['negative_prompt_embeds'].shape}")
|
|
|
|
| 795 |
except Exception as e:
|
| 796 |
+
print(f"Cappella encoding failed, using standard prompts: {e}")
|
| 797 |
traceback.print_exc()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 798 |
pipe_kwargs["prompt"] = prompt
|
| 799 |
pipe_kwargs["negative_prompt"] = negative_prompt
|
| 800 |
+
else:
|
| 801 |
+
print("[WARNING] Cappella not found, using standard prompt encoding.")
|
| 802 |
+
pipe_kwargs["prompt"] = prompt
|
| 803 |
+
pipe_kwargs["negative_prompt"] = negative_prompt
|
| 804 |
# --- END FIX ---
|
| 805 |
|
| 806 |
# Add CLIP skip
|