pixagram-neo-backup

Runtime error

App Files Files Community

primerz commited on Oct 27

Commit

bddeb26

verified ·

1 Parent(s): 0b87c27

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -50

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ class RetroArtConverter:
         self.device = device
         self.dtype = dtype
-        # Initialize face analysis for InstantID (optional)
         print("Loading face analysis model...")
         try:
             self.face_app = FaceAnalysis(
@@ -54,6 +54,22 @@ class RetroArtConverter:
             torch_dtype=self.dtype
         ).to(self.device)
         # Load custom VAE from HuggingFace Hub
         print("Loading custom VAE (pixelate) from HuggingFace Hub...")
         try:
@@ -83,6 +99,11 @@ class RetroArtConverter:
             device=self.device if self.device == "cuda" else -1
         )
         # Load SDXL checkpoint from HuggingFace Hub
         print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...")
         try:
@@ -93,7 +114,7 @@ class RetroArtConverter:
             )
             self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
                 model_path,
-                controlnet=self.controlnet_depth,
                 vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
@@ -104,7 +125,7 @@ class RetroArtConverter:
             print("Using default SDXL")
             self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
                 "stabilityai/stable-diffusion-xl-base-1.0",
-                controlnet=self.controlnet_depth,
                 vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
@@ -129,15 +150,9 @@ class RetroArtConverter:
             self.pipe.scheduler.config
         )
-        # For ZeroGPU, don't use model_cpu_offload
-        # self.pipe.enable_model_cpu_offload()
         self.pipe.enable_vae_slicing()
-        # Enable attention slicing for memory efficiency
         self.pipe.unet.set_attn_processor(AttnProcessor2_0())
-        # Try to enable xformers if available (only works on GPU)
         if self.device == "cuda":
             try:
                 self.pipe.enable_xformers_memory_efficient_attention()
@@ -152,30 +167,44 @@ class RetroArtConverter:
         depth = self.depth_estimator(image)
         depth_image = depth['depth']
-        # Convert to numpy array
         depth_array = np.array(depth_image)
-        # Normalize to 0-255
         depth_normalized = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min()) * 255
         depth_normalized = depth_normalized.astype(np.uint8)
-        # Convert to 3-channel image
         depth_colored = cv2.cvtColor(depth_normalized, cv2.COLOR_GRAY2RGB)
         return Image.fromarray(depth_colored)
-    def detect_faces(self, image):
-        """Detect faces in the image using antelopev2"""
         if not self.face_detection_enabled or self.face_app is None:
-            return []
         try:
             img_array = np.array(image)
             faces = self.face_app.get(img_array)
-            return faces
         except Exception as e:
-            print(f"Face detection error: {e}")
-            return []
     def calculate_target_size(self, original_width, original_height, max_dimension=1024):
         """Calculate target size maintaining aspect ratio"""
@@ -188,7 +217,7 @@ class RetroArtConverter:
             new_height = min(original_height, max_dimension)
             new_width = int(new_height * aspect_ratio)
-        # Round to nearest multiple of 8 (required for diffusion models)
         new_width = (new_width // 8) * 8
         new_height = (new_height // 8) * 8
@@ -202,7 +231,9 @@ class RetroArtConverter:
         num_inference_steps=30,
         guidance_scale=7.5,
         controlnet_conditioning_scale=0.8,
-        lora_scale=0.85
     ):
         """Main generation function"""
@@ -214,36 +245,70 @@ class RetroArtConverter:
         resized_image = input_image.resize((target_width, target_height), Image.LANCZOS)
-        # Detect faces
-        faces = self.detect_faces(resized_image)
-        has_faces = len(faces) > 0
-        if has_faces:
-            print(f"Detected {len(faces)} face(s)")
-            # Enhance prompt for face preservation
-            prompt = f"portrait, detailed face, {prompt}"
         # Generate depth map
         print("Generating depth map...")
         depth_image = self.get_depth_map(resized_image)
         depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
         # Set LORA scale
-        self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
         # Generate image
         print("Generating retro art...")
-        result = self.pipe(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            image=depth_image,
-            num_inference_steps=num_inference_steps,
-            guidance_scale=guidance_scale,
-            controlnet_conditioning_scale=controlnet_conditioning_scale,
-            width=target_width,
-            height=target_height,
-            generator=torch.Generator(device=self.device).manual_seed(42)
-        )
         return result.images[0]
@@ -260,7 +325,9 @@ def process_image(
     steps,
     guidance_scale,
     controlnet_scale,
-    lora_scale
 ):
     if image is None:
         return None
@@ -273,11 +340,15 @@ def process_image(
             num_inference_steps=int(steps),
             guidance_scale=guidance_scale,
             controlnet_conditioning_scale=controlnet_scale,
-            lora_scale=lora_scale
         )
         return result
     except Exception as e:
         print(f"Error: {e}")
         raise gr.Error(f"Generation failed: {str(e)}")
 # Create Gradio interface
@@ -291,7 +362,7 @@ with gr.Blocks(title="RetroArt Converter") as demo:
     - Custom SDXL checkpoint (Horizon)
     - Pixelate VAE for authentic retro look
     - RetroArt LORA for style enhancement
-    - Face preservation with InstantID
     - Depth-aware generation with ControlNet
     """)
@@ -343,6 +414,23 @@ with gr.Blocks(title="RetroArt Converter") as demo:
                     step=0.05,
                     label="RetroArt LORA Scale"
                 )
             generate_btn = gr.Button("🎨 Generate Retro Art", variant="primary")
@@ -351,9 +439,9 @@ with gr.Blocks(title="RetroArt Converter") as demo:
     gr.Examples(
         examples=[
-            ["example_portrait.jpg", "retro pixel art portrait, 16-bit game character", "blurry, modern", 30, 7.5, 0.8, 0.85],
         ],
-        inputs=[input_image, prompt, negative_prompt, steps, guidance_scale, controlnet_scale, lora_scale],
         outputs=[output_image],
         fn=process_image,
         cache_examples=False
@@ -361,7 +449,7 @@ with gr.Blocks(title="RetroArt Converter") as demo:
     generate_btn.click(
         fn=process_image,
-        inputs=[input_image, prompt, negative_prompt, steps, guidance_scale, controlnet_scale, lora_scale],
         outputs=[output_image]
     )
@@ -372,5 +460,5 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=7860,
         share=False,
-        show_api=True  # Enable API
     )

         self.device = device
         self.dtype = dtype
+        # Initialize face analysis for InstantID
         print("Loading face analysis model...")
         try:
             self.face_app = FaceAnalysis(
             torch_dtype=self.dtype
         ).to(self.device)
+        # Load InstantID ControlNet for identity preservation
+        print("Loading InstantID ControlNet...")
+        try:
+            self.controlnet_instantid = ControlNetModel.from_pretrained(
+                "InstantX/InstantID",
+                subfolder="ControlNetModel",
+                torch_dtype=self.dtype
+            ).to(self.device)
+            print("✓ InstantID ControlNet loaded successfully")
+            self.instantid_enabled = True
+        except Exception as e:
+            print(f"⚠️ InstantID ControlNet not available: {e}")
+            print("Running without InstantID (identity may not be preserved)")
+            self.controlnet_instantid = None
+            self.instantid_enabled = False
         # Load custom VAE from HuggingFace Hub
         print("Loading custom VAE (pixelate) from HuggingFace Hub...")
         try:
             device=self.device if self.device == "cuda" else -1
         )
+        # Determine which controlnets to use
+        controlnets = [self.controlnet_depth]
+        if self.instantid_enabled and self.controlnet_instantid is not None:
+            controlnets.append(self.controlnet_instantid)
         # Load SDXL checkpoint from HuggingFace Hub
         print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...")
         try:
             )
             self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
                 model_path,
+                controlnet=controlnets if len(controlnets) > 1 else controlnets[0],
                 vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             print("Using default SDXL")
             self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
                 "stabilityai/stable-diffusion-xl-base-1.0",
+                controlnet=controlnets if len(controlnets) > 1 else controlnets[0],
                 vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             self.pipe.scheduler.config
         )
         self.pipe.enable_vae_slicing()
         self.pipe.unet.set_attn_processor(AttnProcessor2_0())
         if self.device == "cuda":
             try:
                 self.pipe.enable_xformers_memory_efficient_attention()
         depth = self.depth_estimator(image)
         depth_image = depth['depth']
         depth_array = np.array(depth_image)
         depth_normalized = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min()) * 255
         depth_normalized = depth_normalized.astype(np.uint8)
         depth_colored = cv2.cvtColor(depth_normalized, cv2.COLOR_GRAY2RGB)
         return Image.fromarray(depth_colored)
+    def extract_face_embeddings(self, image):
+        """Extract face embeddings using InsightFace"""
         if not self.face_detection_enabled or self.face_app is None:
+            return None
         try:
             img_array = np.array(image)
             faces = self.face_app.get(img_array)
+            if len(faces) == 0:
+                return None
+            # Use the largest face
+            face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
+            return torch.from_numpy(face.normed_embedding).unsqueeze(0)
         except Exception as e:
+            print(f"Face embedding extraction error: {e}")
+            return None
+    def prepare_face_image(self, image, face_bbox):
+        """Prepare face image for InstantID ControlNet"""
+        x1, y1, x2, y2 = map(int, face_bbox)
+        # Add some padding
+        padding = 20
+        x1 = max(0, x1 - padding)
+        y1 = max(0, y1 - padding)
+        x2 = min(image.width, x2 + padding)
+        y2 = min(image.height, y2 + padding)
+        face_image = image.crop((x1, y1, x2, y2))
+        return face_image
     def calculate_target_size(self, original_width, original_height, max_dimension=1024):
         """Calculate target size maintaining aspect ratio"""
             new_height = min(original_height, max_dimension)
             new_width = int(new_height * aspect_ratio)
+        # Round to nearest multiple of 8
         new_width = (new_width // 8) * 8
         new_height = (new_height // 8) * 8
         num_inference_steps=30,
         guidance_scale=7.5,
         controlnet_conditioning_scale=0.8,
+        lora_scale=0.85,
+        identity_preservation=0.8,  # NEW PARAMETER
+        image_scale=0.2  # NEW PARAMETER for InstantID strength
     ):
         """Main generation function"""
         resized_image = input_image.resize((target_width, target_height), Image.LANCZOS)
         # Generate depth map
         print("Generating depth map...")
         depth_image = self.get_depth_map(resized_image)
         depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
+        # Extract face embeddings if InstantID is enabled
+        face_embeddings = None
+        control_images = [depth_image]
+        conditioning_scales = [controlnet_conditioning_scale]
+        if self.instantid_enabled and self.controlnet_instantid is not None:
+            print("Extracting face embeddings...")
+            img_array = np.array(resized_image)
+            faces = self.face_app.get(img_array) if self.face_app is not None else []
+            if len(faces) > 0:
+                print(f"Detected {len(faces)} face(s), using for identity preservation")
+                # Get the largest face
+                face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
+                face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(self.device, dtype=self.dtype)
+                # Prepare face image for InstantID ControlNet
+                face_control_image = resized_image.resize((target_width, target_height), Image.LANCZOS)
+                control_images.append(face_control_image)
+                conditioning_scales.append(image_scale)
+                # Enhance prompt for face preservation
+                prompt = f"portrait, detailed face, facial features, {prompt}"
         # Set LORA scale
+        if hasattr(self.pipe, 'set_adapters'):
+            try:
+                self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
+            except:
+                print("Could not set LORA adapters, continuing without")
+        # Prepare pipeline kwargs
+        pipe_kwargs = {
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "num_inference_steps": num_inference_steps,
+            "guidance_scale": guidance_scale,
+            "width": target_width,
+            "height": target_height,
+            "generator": torch.Generator(device=self.device).manual_seed(42)
+        }
+        # Add control images and scales
+        if len(control_images) > 1:
+            # Multiple ControlNets
+            pipe_kwargs["image"] = control_images
+            pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
+        else:
+            # Single ControlNet (depth only)
+            pipe_kwargs["image"] = depth_image
+            pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
+        # Add face embeddings if available (for InstantID IP-Adapter)
+        if face_embeddings is not None:
+            pipe_kwargs["cross_attention_kwargs"] = {"ip_adapter_image_embeds": [face_embeddings]}
         # Generate image
         print("Generating retro art...")
+        result = self.pipe(**pipe_kwargs)
         return result.images[0]
     steps,
     guidance_scale,
     controlnet_scale,
+    lora_scale,
+    identity_preservation,  # NEW
+    image_scale  # NEW
 ):
     if image is None:
         return None
             num_inference_steps=int(steps),
             guidance_scale=guidance_scale,
             controlnet_conditioning_scale=controlnet_scale,
+            lora_scale=lora_scale,
+            identity_preservation=identity_preservation,  # NEW
+            image_scale=image_scale  # NEW
         )
         return result
     except Exception as e:
         print(f"Error: {e}")
+        import traceback
+        traceback.print_exc()
         raise gr.Error(f"Generation failed: {str(e)}")
 # Create Gradio interface
     - Custom SDXL checkpoint (Horizon)
     - Pixelate VAE for authentic retro look
     - RetroArt LORA for style enhancement
+    - Face preservation with InstantID (if available)
     - Depth-aware generation with ControlNet
     """)
                     step=0.05,
                     label="RetroArt LORA Scale"
                 )
+                # NEW PARAMETERS
+                identity_preservation = gr.Slider(
+                    minimum=0,
+                    maximum=1.5,
+                    value=0.8,
+                    step=0.1,
+                    label="Identity Preservation (InstantID strength)"
+                )
+                image_scale = gr.Slider(
+                    minimum=0,
+                    maximum=1.0,
+                    value=0.2,
+                    step=0.05,
+                    label="InstantID Image Scale"
+                )
             generate_btn = gr.Button("🎨 Generate Retro Art", variant="primary")
     gr.Examples(
         examples=[
+            ["example_portrait.jpg", "retro pixel art portrait, 16-bit game character", "blurry, modern", 30, 7.5, 0.8, 0.85, 0.8, 0.2],
         ],
+        inputs=[input_image, prompt, negative_prompt, steps, guidance_scale, controlnet_scale, lora_scale, identity_preservation, image_scale],
         outputs=[output_image],
         fn=process_image,
         cache_examples=False
     generate_btn.click(
         fn=process_image,
+        inputs=[input_image, prompt, negative_prompt, steps, guidance_scale, controlnet_scale, lora_scale, identity_preservation, image_scale],
         outputs=[output_image]
     )
         server_name="0.0.0.0",
         server_port=7860,
         share=False,
+        show_api=True
     )