primerz commited on
Commit
04fa627
·
verified ·
1 Parent(s): b305aed

Update generator.py

Browse files
Files changed (1) hide show
  1. generator.py +11 -41
generator.py CHANGED
@@ -54,7 +54,7 @@ class RetroArtConverter:
54
  self.mediapipe_face, mediapipe_success = load_mediapipe_face_detector()
55
  self.models_loaded['mediapipe_face'] = mediapipe_success
56
 
57
- # Load Depth detector with fallback hierarchy (Leres → Zoe → Midas)
58
  self.depth_detector, self.depth_type, depth_success = load_depth_detector()
59
  self.models_loaded['depth_detector'] = depth_success
60
  self.models_loaded['depth_type'] = self.depth_type
@@ -627,7 +627,7 @@ class RetroArtConverter:
627
  if len(faces) > 0:
628
  insightface_success = True
629
  has_detected_faces = True
630
- print(f"✓ InsightFace detected {len(faces)} face(s)")
631
 
632
  # Get largest face
633
  face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
@@ -688,7 +688,7 @@ class RetroArtConverter:
688
  print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
689
  print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
690
  else:
691
- print("✗ InsightFace found no faces")
692
 
693
  except Exception as e:
694
  print(f"[ERROR] InsightFace detection failed: {e}")
@@ -710,14 +710,14 @@ class RetroArtConverter:
710
  if mediapipe_array.sum() > 1000: # If image has significant content
711
  has_detected_faces = True
712
  face_kps_image = mediapipe_result
713
- print(f"✓ MediapipeFace detected face(s)")
714
  print(f"[INFO] Using MediapipeFace keypoints (no embeddings available)")
715
 
716
  # Note: MediapipeFace doesn't provide embeddings or detailed info
717
  # So face_embeddings, face_crop_enhanced remain None
718
  # InstantID will work with keypoints only (reduced quality)
719
  else:
720
- print("✗ MediapipeFace found no faces")
721
  except Exception as e:
722
  print(f"[ERROR] MediapipeFace detection failed: {e}")
723
  traceback.print_exc()
@@ -777,8 +777,7 @@ class RetroArtConverter:
777
 
778
  pipe_kwargs["generator"] = generator
779
 
780
- # --- START FIX: Use our new Cappella module with proper fallback ---
781
- cappella_success = False
782
  if self.use_cappella and self.cappella is not None:
783
  try:
784
  print("Encoding prompts with Cappella...")
@@ -793,44 +792,15 @@ class RetroArtConverter:
793
  pipe_kwargs["negative_pooled_prompt_embeds"] = conditioning.negative_pooled_embeds
794
 
795
  print(f"[OK] Cappella encoded - Prompt: {pipe_kwargs['prompt_embeds'].shape}, Negative: {pipe_kwargs['negative_prompt_embeds'].shape}")
796
- cappella_success = True
797
  except Exception as e:
798
- print(f"[WARNING] Cappella encoding failed: {e}")
799
  traceback.print_exc()
800
-
801
- # Fallback: Manual encoding if Cappella not available or failed
802
- if not cappella_success:
803
- print("[FALLBACK] Manually encoding prompts for face embedding compatibility...")
804
- try:
805
- # Use pipeline's encode_prompt method
806
- (
807
- prompt_embeds,
808
- negative_prompt_embeds,
809
- pooled_prompt_embeds,
810
- negative_pooled_prompt_embeds,
811
- ) = self.pipe.encode_prompt(
812
- prompt=prompt,
813
- prompt_2=None,
814
- device=self.device,
815
- num_images_per_prompt=1,
816
- do_classifier_free_guidance=True,
817
- negative_prompt=negative_prompt,
818
- negative_prompt_2=None,
819
- )
820
-
821
- pipe_kwargs["prompt_embeds"] = prompt_embeds
822
- pipe_kwargs["pooled_prompt_embeds"] = pooled_prompt_embeds
823
- pipe_kwargs["negative_prompt_embeds"] = negative_prompt_embeds
824
- pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled_prompt_embeds
825
-
826
- print(f"[OK] Manual encoding - Prompt: {prompt_embeds.shape}, Negative: {negative_prompt_embeds.shape}")
827
- except Exception as e:
828
- print(f"[ERROR] Manual encoding also failed: {e}")
829
- traceback.print_exc()
830
- # Last resort: use raw text (face embeddings won't work)
831
  pipe_kwargs["prompt"] = prompt
832
  pipe_kwargs["negative_prompt"] = negative_prompt
833
- print("[WARNING] Using raw text prompts - face embedding concatenation will be skipped")
 
 
 
834
  # --- END FIX ---
835
 
836
  # Add CLIP skip
 
54
  self.mediapipe_face, mediapipe_success = load_mediapipe_face_detector()
55
  self.models_loaded['mediapipe_face'] = mediapipe_success
56
 
57
+ # Load Depth detector with fallback hierarchy (Leres Zoe Midas)
58
  self.depth_detector, self.depth_type, depth_success = load_depth_detector()
59
  self.models_loaded['depth_detector'] = depth_success
60
  self.models_loaded['depth_type'] = self.depth_type
 
627
  if len(faces) > 0:
628
  insightface_success = True
629
  has_detected_faces = True
630
+ print(f" InsightFace detected {len(faces)} face(s)")
631
 
632
  # Get largest face
633
  face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
 
688
  print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
689
  print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
690
  else:
691
+ print(" InsightFace found no faces")
692
 
693
  except Exception as e:
694
  print(f"[ERROR] InsightFace detection failed: {e}")
 
710
  if mediapipe_array.sum() > 1000: # If image has significant content
711
  has_detected_faces = True
712
  face_kps_image = mediapipe_result
713
+ print(f" MediapipeFace detected face(s)")
714
  print(f"[INFO] Using MediapipeFace keypoints (no embeddings available)")
715
 
716
  # Note: MediapipeFace doesn't provide embeddings or detailed info
717
  # So face_embeddings, face_crop_enhanced remain None
718
  # InstantID will work with keypoints only (reduced quality)
719
  else:
720
+ print(" MediapipeFace found no faces")
721
  except Exception as e:
722
  print(f"[ERROR] MediapipeFace detection failed: {e}")
723
  traceback.print_exc()
 
777
 
778
  pipe_kwargs["generator"] = generator
779
 
780
+ # --- START FIX: Use our new Cappella module ---
 
781
  if self.use_cappella and self.cappella is not None:
782
  try:
783
  print("Encoding prompts with Cappella...")
 
792
  pipe_kwargs["negative_pooled_prompt_embeds"] = conditioning.negative_pooled_embeds
793
 
794
  print(f"[OK] Cappella encoded - Prompt: {pipe_kwargs['prompt_embeds'].shape}, Negative: {pipe_kwargs['negative_prompt_embeds'].shape}")
 
795
  except Exception as e:
796
+ print(f"Cappella encoding failed, using standard prompts: {e}")
797
  traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
798
  pipe_kwargs["prompt"] = prompt
799
  pipe_kwargs["negative_prompt"] = negative_prompt
800
+ else:
801
+ print("[WARNING] Cappella not found, using standard prompt encoding.")
802
+ pipe_kwargs["prompt"] = prompt
803
+ pipe_kwargs["negative_prompt"] = negative_prompt
804
  # --- END FIX ---
805
 
806
  # Add CLIP skip