pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 31

Commit

f4b692c

verified ·

1 Parent(s): ec1cd29

Update generator.py

Browse files

Files changed (1) hide show

generator.py +185 -92

generator.py CHANGED Viewed

@@ -18,9 +18,9 @@ from utils import (
 )
 from models import (
     load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
-    load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
     setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
-    load_openpose_detector
 )
@@ -34,17 +34,25 @@ class RetroArtConverter:
             'custom_checkpoint': False,
             'lora': False,
             'instantid': False,
-            'zoe_depth': False,
             'ip_adapter': False,
-            'openpose': False
         }
-        # Initialize face analysis
         self.face_app, self.face_detection_enabled = load_face_analysis()
-        # Load Zoe Depth detector
-        self.zoe_depth, zoe_success = load_depth_detector()
-        self.models_loaded['zoe_depth'] = zoe_success
         # --- NEW: Load OpenPose detector ---
         self.openpose_detector, openpose_success = load_openpose_detector()
@@ -104,8 +112,8 @@ class RetroArtConverter:
         self.models_loaded['custom_checkpoint'] = checkpoint_success
-        # Load LORA
-        lora_success = load_lora(self.pipe)
         self.models_loaded['lora'] = lora_success
         # Setup IP-Adapter
@@ -155,8 +163,15 @@ class RetroArtConverter:
         """Print model loading status"""
         print("\n=== MODEL STATUS ===")
         for model, loaded in self.models_loaded.items():
-            status = "[OK] LOADED" if loaded else "[FALLBACK/DISABLED]"
-            print(f"{model}: {status}")
         print("===================\n")
         print("=== UPGRADE VERIFICATION ===")
@@ -182,8 +197,11 @@ class RetroArtConverter:
         print("============================\n")
     def get_depth_map(self, image):
-            """Generate depth map using Zoe Depth"""
-            if self.zoe_depth is not None:
                 try:
                     if image.mode != 'RGB':
                         image = image.convert('RGB')
@@ -203,25 +221,27 @@ class RetroArtConverter:
                     image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
                     if target_width != orig_width or target_height != orig_height:
-                        print(f"[DEPTH] Resized for ZoeDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
                     # FIXED: Add torch.no_grad() wrapper
                     with torch.no_grad():
-                        depth_image = self.zoe_depth(image_for_depth)
                     depth_width, depth_height = depth_image.size
                     if depth_width != orig_width or depth_height != orig_height:
                         depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
-                    print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
                     return depth_image
                 except Exception as e:
-                    print(f"[DEPTH] ZoeDetector failed ({e}), falling back to grayscale depth")
                     gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                     depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                     return Image.fromarray(depth_colored)
             else:
                 gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                 depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                 return Image.fromarray(depth_colored)
@@ -482,6 +502,7 @@ class RetroArtConverter:
         depth_control_scale=0.8,
         identity_control_scale=0.85,
         expression_control_scale=0.6,
         lora_scale=1.0,
         identity_preservation=0.8,
         strength=0.75,
@@ -552,81 +573,153 @@ class RetroArtConverter:
         has_detected_faces = False
         face_bbox_original = None
-        if self.instantid_active and self.face_app is not None: # <-- Check instantid_active
-            print("Detecting faces and extracting keypoints...")
-            img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
-            faces = self.face_app.get(img_array)
-            if len(faces) > 0:
-                has_detected_faces = True
-                print(f"Detected {len(faces)} face(s)")
-                # Get largest face
-                face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
-                # ADAPTIVE PARAMETERS
-                adaptive_params = self.detect_face_quality(face)
-                if adaptive_params is not None:
-                    print(f"[ADAPTIVE] {adaptive_params['reason']}")
-                    identity_preservation = adaptive_params['identity_preservation']
-                    identity_control_scale = adaptive_params['identity_control_scale']
-                    guidance_scale = adaptive_params['guidance_scale']
-                    lora_scale = adaptive_params['lora_scale']
-                # Extract face embeddings
-                face_embeddings_base = face.normed_embedding
-                # Extract face crop
-                bbox = face.bbox.astype(int)
-                x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
-                face_bbox_original = [x1, y1, x2, y2]
-                # Add padding
-                face_width = x2 - x1
-                face_height = y2 - y1
-                padding_x = int(face_width * 0.3)
-                padding_y = int(face_height * 0.3)
-                x1 = max(0, x1 - padding_x)
-                y1 = max(0, y1 - padding_y)
-                x2 = min(resized_image.width, x2 + padding_x)
-                y2 = min(resized_image.height, y2 + padding_y)
-                # Crop face region
-                face_crop = resized_image.crop((x1, y1, x2, y2))
-                # MULTI-SCALE PROCESSING
-                face_embeddings = self.extract_multi_scale_face(face_crop, face)
-                # Enhance face crop
-                face_crop_enhanced = enhance_face_crop(face_crop)
-                # Draw keypoints
-                face_kps = face.kps
-                face_kps_image = draw_kps(resized_image, face_kps)
-                # ENHANCED: Extract comprehensive facial attributes
-                from utils import get_facial_attributes, build_enhanced_prompt
-                facial_attrs = get_facial_attributes(face)
-                # Update prompt with detected attributes
-                prompt = build_enhanced_prompt(prompt, facial_attrs, TRIGGER_WORD)
-                # Legacy output for compatibility
-                age = facial_attrs['age']
-                gender_code = facial_attrs['gender']
-                det_score = facial_attrs['quality']
-                gender_str = 'M' if gender_code == 1 else ('F' if gender_code == 0 else 'N/A')
-                print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
-                print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
-        # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
-            try:
-                self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
-                print(f"LORA scale: {lora_scale}")
-            except Exception as e:
-                print(f"Could not set LORA scale: {e}")
         # Prepare generation kwargs
         pipe_kwargs = {
@@ -715,11 +808,11 @@ class RetroArtConverter:
                     print("  Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
             else:
-                # No face, must add a blank image to keep list order
-                print("Using blank map for InstantID (no face/disabled)")
                 control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
                 conditioning_scales.append(0.0) # Set scale to 0
-                scale_debug_str.append("Identity: 0.00")
         # 2. Depth
         if self.depth_active:

 )
 from models import (
     load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
+    load_sdxl_pipeline, load_loras, setup_ip_adapter, setup_compel,
     setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
+    load_openpose_detector, load_mediapipe_face_detector
 )
             'custom_checkpoint': False,
             'lora': False,
             'instantid': False,
+            'depth_detector': False,
+            'depth_type': None,
             'ip_adapter': False,
+            'openpose': False,
+            'mediapipe_face': False
         }
+        self.loaded_loras = {} # Store status of each LORA
+        # Initialize face analysis (InsightFace)
         self.face_app, self.face_detection_enabled = load_face_analysis()
+        # Load MediapipeFaceDetector (alternative face detection)
+        self.mediapipe_face, mediapipe_success = load_mediapipe_face_detector()
+        self.models_loaded['mediapipe_face'] = mediapipe_success
+        # Load Depth detector with fallback hierarchy (Leres → Zoe → Midas)
+        self.depth_detector, self.depth_type, depth_success = load_depth_detector()
+        self.models_loaded['depth_detector'] = depth_success
+        self.models_loaded['depth_type'] = self.depth_type
         # --- NEW: Load OpenPose detector ---
         self.openpose_detector, openpose_success = load_openpose_detector()
         self.models_loaded['custom_checkpoint'] = checkpoint_success
+        # Load LORAs
+        self.loaded_loras, lora_success = load_loras(self.pipe)
         self.models_loaded['lora'] = lora_success
         # Setup IP-Adapter
         """Print model loading status"""
         print("\n=== MODEL STATUS ===")
         for model, loaded in self.models_loaded.items():
+            if model == 'lora':
+                lora_status = 'DISABLED'
+                if loaded:
+                    loaded_count = sum(1 for status in self.loaded_loras.values() if status)
+                    lora_status = f"[OK] LOADED ({loaded_count}/3)"
+                print(f"loras: {lora_status}")
+            else:
+                status = "[OK] LOADED" if loaded else "[FALLBACK/DISABLED]"
+                print(f"{model}: {status}")
         print("===================\n")
         print("=== UPGRADE VERIFICATION ===")
         print("============================\n")
     def get_depth_map(self, image):
+            """
+            Generate depth map using available depth detector.
+            Supports: LeresDetector, ZoeDetector, or MidasDetector.
+            """
+            if self.depth_detector is not None:
                 try:
                     if image.mode != 'RGB':
                         image = image.convert('RGB')
                     image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
                     if target_width != orig_width or target_height != orig_height:
+                        print(f"[DEPTH] Resized for {self.depth_type.upper()}Detector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
                     # FIXED: Add torch.no_grad() wrapper
                     with torch.no_grad():
+                        depth_image = self.depth_detector(image_for_depth)
                     depth_width, depth_height = depth_image.size
                     if depth_width != orig_width or depth_height != orig_height:
                         depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
+                    print(f"[DEPTH] {self.depth_type.upper()} depth map generated: {orig_width}x{orig_height}")
                     return depth_image
                 except Exception as e:
+                    print(f"[DEPTH] {self.depth_type.upper()}Detector failed ({e}), falling back to grayscale depth")
                     gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                     depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                     return Image.fromarray(depth_colored)
             else:
+                # No depth detector available, use grayscale fallback
+                print("[DEPTH] No depth detector available, using grayscale fallback")
                 gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                 depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                 return Image.fromarray(depth_colored)
         depth_control_scale=0.8,
         identity_control_scale=0.85,
         expression_control_scale=0.6,
+        lora_choice="RetroArt",
         lora_scale=1.0,
         identity_preservation=0.8,
         strength=0.75,
         has_detected_faces = False
         face_bbox_original = None
+        if self.instantid_active:
+            # Try InsightFace first (if available)
+            insightface_tried = False
+            insightface_success = False
+            if self.face_app is not None:
+                print("Detecting faces with InsightFace...")
+                insightface_tried = True
+                try:
+                    img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
+                    faces = self.face_app.get(img_array)
+                    if len(faces) > 0:
+                        insightface_success = True
+                        has_detected_faces = True
+                        print(f"✓ InsightFace detected {len(faces)} face(s)")
+                        # Get largest face
+                        face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
+                        # ADAPTIVE PARAMETERS
+                        adaptive_params = self.detect_face_quality(face)
+                        if adaptive_params is not None:
+                            print(f"[ADAPTIVE] {adaptive_params['reason']}")
+                            identity_preservation = adaptive_params['identity_preservation']
+                            identity_control_scale = adaptive_params['identity_control_scale']
+                            guidance_scale = adaptive_params['guidance_scale']
+                            lora_scale = adaptive_params['lora_scale']
+                        # Extract face embeddings
+                        face_embeddings_base = face.normed_embedding
+                        # Extract face crop
+                        bbox = face.bbox.astype(int)
+                        x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
+                        face_bbox_original = [x1, y1, x2, y2]
+                        # Add padding
+                        face_width = x2 - x1
+                        face_height = y2 - y1
+                        padding_x = int(face_width * 0.3)
+                        padding_y = int(face_height * 0.3)
+                        x1 = max(0, x1 - padding_x)
+                        y1 = max(0, y1 - padding_y)
+                        x2 = min(resized_image.width, x2 + padding_x)
+                        y2 = min(resized_image.height, y2 + padding_y)
+                        # Crop face region
+                        face_crop = resized_image.crop((x1, y1, x2, y2))
+                        # MULTI-SCALE PROCESSING
+                        face_embeddings = self.extract_multi_scale_face(face_crop, face)
+                        # Enhance face crop
+                        face_crop_enhanced = enhance_face_crop(face_crop)
+                        # Draw keypoints
+                        face_kps = face.kps
+                        face_kps_image = draw_kps(resized_image, face_kps)
+                        # ENHANCED: Extract comprehensive facial attributes
+                        from utils import get_facial_attributes, build_enhanced_prompt
+                        facial_attrs = get_facial_attributes(face)
+                        # Update prompt with detected attributes
+                        prompt = build_enhanced_prompt(prompt, facial_attrs, TRIGGER_WORD)
+                        # Legacy output for compatibility
+                        age = facial_attrs['age']
+                        gender_code = facial_attrs['gender']
+                        det_score = facial_attrs['quality']
+                        gender_str = 'M' if gender_code == 1 else ('F' if gender_code == 0 else 'N/A')
+                        print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
+                        print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
+                    else:
+                        print("✗ InsightFace found no faces")
+                except Exception as e:
+                    print(f"[ERROR] InsightFace detection failed: {e}")
+                    import traceback
+                    traceback.print_exc()
+            else:
+                print("[INFO] InsightFace not available (face_app is None)")
+            # If InsightFace didn't succeed, try MediapipeFace
+            if not insightface_success:
+                if self.mediapipe_face is not None:
+                    print("Trying MediapipeFaceDetector as fallback...")
+                    try:
+                        # MediapipeFace returns an annotated image with keypoints
+                        mediapipe_result = self.mediapipe_face(resized_image)
+                        # Check if face was detected (result is not blank/black)
+                        mediapipe_array = np.array(mediapipe_result)
+                        if mediapipe_array.sum() > 1000:  # If image has significant content
+                            has_detected_faces = True
+                            face_kps_image = mediapipe_result
+                            print(f"✓ MediapipeFace detected face(s)")
+                            print(f"[INFO] Using MediapipeFace keypoints (no embeddings available)")
+                            # Note: MediapipeFace doesn't provide embeddings or detailed info
+                            # So face_embeddings, face_crop_enhanced remain None
+                            # InstantID will work with keypoints only (reduced quality)
+                        else:
+                            print("✗ MediapipeFace found no faces")
+                    except Exception as e:
+                        print(f"[ERROR] MediapipeFace detection failed: {e}")
+                        import traceback
+                        traceback.print_exc()
+                else:
+                    print("[INFO] MediapipeFaceDetector not available")
+            # Final summary
+            if not has_detected_faces:
+                print("\n[SUMMARY] No faces detected by any detector")
+                if insightface_tried:
+                    print("  - InsightFace: tried, found nothing")
+                else:
+                    print("  - InsightFace: not available")
+                if self.mediapipe_face is not None:
+                    print("  - MediapipeFace: tried, found nothing")
+                else:
+                    print("  - MediapipeFace: not available")
+                print()
+        # Set LORA
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
+            adapter_name = lora_choice.lower() # "retroart", "vga", "lucasart", or "none"
+            if adapter_name != "none" and self.loaded_loras.get(adapter_name, False):
+                try:
+                    self.pipe.set_adapters([adapter_name], adapter_weights=[lora_scale])
+                    print(f"LORA: Set adapter '{adapter_name}' with scale: {lora_scale}")
+                except Exception as e:
+                    print(f"Could not set LORA adapter '{adapter_name}': {e}")
+                    self.pipe.set_adapters([]) # Disable LORAs if setting failed
+            else:
+                if adapter_name == "none":
+                    print("LORAs disabled by user choice.")
+                else:
+                    print(f"LORA '{adapter_name}' not loaded or available, disabling LORAs.")
+                self.pipe.set_adapters([]) # Disable all LORAs
         # Prepare generation kwargs
         pipe_kwargs = {
                     print("  Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
             else:
+                # No face detected - blank map needed to maintain ControlNet list order
+                print("[INSTANTID] Using blank map (scale=0, no effect on generation)")
                 control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
                 conditioning_scales.append(0.0) # Set scale to 0
+                scale_debug_str.append("Identity: 0.00 (no face)")
         # 2. Depth
         if self.depth_active: