Spaces:

DawnC
/

VividFlow

Runtime error

App Files Files Community

DawnC commited on Jan 19

Commit

0e80bb9

verified ·

1 Parent(s): dc7f2ba

Upload 14 files

Browse files

Files changed (6) hide show

BackgroundEngine.py +205 -3
app.py +3 -1
mask_generator.py +185 -2
requirements.txt +4 -0
style_transfer.py +708 -0
ui_manager.py +557 -13

BackgroundEngine.py CHANGED Viewed

@@ -10,7 +10,7 @@ from typing import Optional, Dict, Any, Callable
 import warnings
 warnings.filterwarnings("ignore")
-from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler
 import open_clip
 from mask_generator import MaskGenerator
 from image_blender import ImageBlender
@@ -39,10 +39,12 @@ class BackgroundEngine:
         self.clip_pretrained = "openai"
         self.pipeline = None
         self.clip_model = None
         self.clip_preprocess = None
         self.clip_tokenizer = None
         self.is_initialized = False
         self.max_image_size = 1024
         self.default_steps = 25
@@ -336,13 +338,15 @@ class BackgroundEngine:
         guidance_scale: float = 7.5,
         progress_callback: Optional[Callable] = None,
         enable_prompt_enhancement: bool = True,
-        feather_radius: int = 0
     ) -> Dict[str, Any]:
         """
         Generate background and combine with foreground.
         Args:
             feather_radius: Gaussian blur radius for mask edge softening (0-20, default 0)
         Returns dict with: combined_image, generated_scene, original_image, mask, success
         """
@@ -391,7 +395,8 @@ class BackgroundEngine:
             combination_mask = self.mask_generator.create_gradient_based_mask(
                 processed_original,
                 combination_mode,
-                focus_mode
             )
             if progress_callback:
@@ -430,3 +435,200 @@ class BackgroundEngine:
                 "success": False,
                 "error": str(e)
             }

 import warnings
 warnings.filterwarnings("ignore")
+from diffusers import StableDiffusionXLPipeline, StableDiffusionXLInpaintPipeline, DPMSolverMultistepScheduler
 import open_clip
 from mask_generator import MaskGenerator
 from image_blender import ImageBlender
         self.clip_pretrained = "openai"
         self.pipeline = None
+        self.inpaint_pipeline = None
         self.clip_model = None
         self.clip_preprocess = None
         self.clip_tokenizer = None
         self.is_initialized = False
+        self.inpaint_initialized = False
         self.max_image_size = 1024
         self.default_steps = 25
         guidance_scale: float = 7.5,
         progress_callback: Optional[Callable] = None,
         enable_prompt_enhancement: bool = True,
+        feather_radius: int = 0,
+        enhance_dark_edges: bool = False
     ) -> Dict[str, Any]:
         """
         Generate background and combine with foreground.
         Args:
             feather_radius: Gaussian blur radius for mask edge softening (0-20, default 0)
+            enhance_dark_edges: Enhance mask edges for dark background images (default False)
         Returns dict with: combined_image, generated_scene, original_image, mask, success
         """
             combination_mask = self.mask_generator.create_gradient_based_mask(
                 processed_original,
                 combination_mode,
+                focus_mode,
+                enhance_dark_edges=enhance_dark_edges
             )
             if progress_callback:
                 "success": False,
                 "error": str(e)
             }
+    def _load_inpaint_pipeline(self) -> bool:
+        """Lazy load SDXL inpainting pipeline"""
+        if self.inpaint_initialized:
+            return True
+        try:
+            logger.info("Loading SDXL inpainting pipeline...")
+            actual_device = "cuda" if torch.cuda.is_available() else self.device
+            self.inpaint_pipeline = StableDiffusionXLInpaintPipeline.from_pretrained(
+                "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
+                torch_dtype=torch.float16 if actual_device == "cuda" else torch.float32,
+                variant="fp16" if actual_device == "cuda" else None,
+                use_safetensors=True
+            )
+            self.inpaint_pipeline.to(actual_device)
+            # Use fast scheduler
+            self.inpaint_pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
+                self.inpaint_pipeline.scheduler.config
+            )
+            # Memory optimization
+            if actual_device == "cuda":
+                try:
+                    self.inpaint_pipeline.enable_xformers_memory_efficient_attention()
+                except Exception:
+                    pass
+            self.inpaint_initialized = True
+            logger.info("✓ SDXL inpainting pipeline loaded")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to load inpainting pipeline: {e}")
+            self.inpaint_initialized = False
+            return False
+    def inpaint_region(
+        self,
+        image: Image.Image,
+        mask: Image.Image,
+        prompt: str,
+        negative_prompt: str = "blurry, low quality, artifacts, seams",
+        num_inference_steps: int = 20,
+        guidance_scale: float = 7.5,
+        strength: float = 0.99
+    ) -> Dict[str, Any]:
+        """
+        Inpaint marked regions with background content.
+        Args:
+            image: The combined image with artifacts to fix
+            mask: Binary mask where white = areas to inpaint
+            prompt: Background description for inpainting
+            negative_prompt: What to avoid
+            num_inference_steps: Denoising steps (20 is usually enough)
+            guidance_scale: How closely to follow prompt
+            strength: How much to change masked area (0.99 = almost complete replacement)
+        Returns:
+            Dict with inpainted_image, success, error
+        """
+        try:
+            # Load inpainting pipeline if not already loaded
+            if not self._load_inpaint_pipeline():
+                # Fallback to OpenCV inpainting
+                return self._opencv_inpaint_fallback(image, mask)
+            logger.info("Starting region inpainting...")
+            # Prepare images
+            image = self._prepare_image(image)
+            mask = mask.resize(image.size, Image.LANCZOS).convert('L')
+            # Ensure mask is properly binarized
+            mask_array = np.array(mask)
+            mask_array = (mask_array > 127).astype(np.uint8) * 255
+            mask = Image.fromarray(mask_array, mode='L')
+            # Dilate mask slightly for better blending
+            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+            mask_dilated = cv2.dilate(mask_array, kernel, iterations=1)
+            mask = Image.fromarray(mask_dilated, mode='L')
+            actual_device = "cuda" if torch.cuda.is_available() else self.device
+            with torch.inference_mode():
+                result = self.inpaint_pipeline(
+                    prompt=prompt,
+                    negative_prompt=negative_prompt,
+                    image=image,
+                    mask_image=mask,
+                    width=image.size[0],
+                    height=image.size[1],
+                    num_inference_steps=num_inference_steps,
+                    guidance_scale=guidance_scale,
+                    strength=strength,
+                    generator=torch.Generator(device=actual_device).manual_seed(42)
+                )
+                inpainted = result.images[0]
+            # Blend edges for smoother transition
+            inpainted = self._blend_inpaint_edges(image, inpainted, mask)
+            self._memory_cleanup()
+            logger.info("✓ Region inpainting completed")
+            return {
+                "inpainted_image": inpainted,
+                "success": True
+            }
+        except Exception as e:
+            logger.error(f"Inpainting failed: {e}")
+            self._memory_cleanup()
+            return {
+                "success": False,
+                "error": str(e)
+            }
+    def _opencv_inpaint_fallback(
+        self,
+        image: Image.Image,
+        mask: Image.Image
+    ) -> Dict[str, Any]:
+        """Fallback to OpenCV inpainting for small areas or when SDXL unavailable"""
+        try:
+            logger.info("Using OpenCV inpainting fallback...")
+            img_array = np.array(image.convert('RGB'))
+            mask_array = np.array(mask.convert('L'))
+            # Binarize mask
+            mask_binary = (mask_array > 127).astype(np.uint8) * 255
+            # Use Telea algorithm for natural results
+            inpainted = cv2.inpaint(
+                img_array,
+                mask_binary,
+                inpaintRadius=5,
+                flags=cv2.INPAINT_TELEA
+            )
+            result = Image.fromarray(inpainted)
+            logger.info("✓ OpenCV inpainting completed")
+            return {
+                "inpainted_image": result,
+                "success": True
+            }
+        except Exception as e:
+            logger.error(f"OpenCV inpainting failed: {e}")
+            return {
+                "success": False,
+                "error": str(e)
+            }
+    def _blend_inpaint_edges(
+        self,
+        original: Image.Image,
+        inpainted: Image.Image,
+        mask: Image.Image,
+        feather_pixels: int = 8
+    ) -> Image.Image:
+        """Blend inpainted region edges for seamless transition"""
+        try:
+            orig_array = np.array(original).astype(np.float32)
+            inpaint_array = np.array(inpainted).astype(np.float32)
+            mask_array = np.array(mask.convert('L')).astype(np.float32) / 255.0
+            # Create feathered mask for smooth blending
+            if feather_pixels > 0:
+                kernel_size = feather_pixels * 2 + 1
+                mask_feathered = cv2.GaussianBlur(
+                    mask_array,
+                    (kernel_size, kernel_size),
+                    feather_pixels / 2
+                )
+            else:
+                mask_feathered = mask_array
+            # Expand mask to 3 channels
+            mask_3d = mask_feathered[:, :, np.newaxis]
+            # Blend: inpainted in masked area, original elsewhere
+            blended = inpaint_array * mask_3d + orig_array * (1 - mask_3d)
+            blended = np.clip(blended, 0, 255).astype(np.uint8)
+            return Image.fromarray(blended)
+        except Exception as e:
+            logger.warning(f"Edge blending failed: {e}, returning inpainted directly")
+            return inpainted

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ import sentencepiece
 from FlowFacade import FlowFacade
 from BackgroundEngine import BackgroundEngine
 from ui_manager import UIManager
@@ -126,7 +127,8 @@ def main():
     try:
         facade = FlowFacade()
         background_engine = BackgroundEngine()
-        ui_manager = UIManager(facade, background_engine)
         interface = ui_manager.create_interface()
         is_colab = 'google.colab' in sys.modules

 from FlowFacade import FlowFacade
 from BackgroundEngine import BackgroundEngine
+from style_transfer import StyleTransferEngine
 from ui_manager import UIManager
     try:
         facade = FlowFacade()
         background_engine = BackgroundEngine()
+        style_engine = StyleTransferEngine()
+        ui_manager = UIManager(facade, background_engine, style_engine)
         interface = ui_manager.create_interface()
         is_colab = 'google.colab' in sys.modules

mask_generator.py CHANGED Viewed

@@ -15,6 +15,13 @@ from rembg import remove, new_session
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 class MaskGenerator:
     """
     Intelligent mask generation using deep learning models with traditional fallback.
@@ -92,6 +99,146 @@ class MaskGenerator:
             gc.collect()
             logger.info("🧹 BiRefNet model unloaded")
     def apply_guided_filter(
         self,
         mask: np.ndarray,
@@ -481,13 +628,25 @@ class MaskGenerator:
             logger.error(f"❌ Scene focus adjustment failed: {e}")
             return mask
-    def create_gradient_based_mask(self, original_image: Image.Image, mode: str = "center", focus_mode: str = "person") -> Image.Image:
         """
         Intelligent foreground extraction: prioritize deep learning models, fallback to traditional methods
         Focus mode: 'person' for tight crop around person, 'scene' for including nearby objects
         """
         width, height = original_image.size
-        logger.info(f"🎯 Creating mask for {width}x{height} image, mode: {mode}, focus: {focus_mode}")
         if mode == "center":
             # Try using deep learning models for intelligent foreground extraction
@@ -495,9 +654,33 @@ class MaskGenerator:
             dl_mask = self.try_deep_learning_mask(original_image)
             if dl_mask is not None:
                 logger.info("✅ Using deep learning generated mask")
                 # Apply focus mode adjustments to deep learning mask
                 if focus_mode == "scene":
                     dl_mask = self._adjust_mask_for_scene_focus(dl_mask, original_image)
                 return dl_mask
             # Fallback to traditional method

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
+# Dark background detection thresholds
+DARK_BG_LUMINANCE_THRESHOLD = 50  # Average luminance below this = dark background
+DARK_BG_EDGE_SAMPLE_WIDTH = 20    # Pixels from edge to sample for background detection
+DARK_BG_DILATION_PIXELS = 5       # Default dilation for dark backgrounds
+DARK_BG_ENHANCED_DILATION = 8     # Enhanced dilation when user enables option
 class MaskGenerator:
     """
     Intelligent mask generation using deep learning models with traditional fallback.
             gc.collect()
             logger.info("🧹 BiRefNet model unloaded")
+    def detect_dark_background(self, image: Image.Image, mask: Optional[np.ndarray] = None) -> Tuple[bool, float]:
+        """
+        Detect if the image has a dark background.
+        Analyzes the edge regions of the image (where background is likely) to determine
+        if the background is predominantly dark, which can cause mask detection issues.
+        Args:
+            image: Input PIL Image
+            mask: Optional existing mask to exclude foreground from analysis
+        Returns:
+            Tuple of (is_dark_background: bool, avg_luminance: float)
+        """
+        try:
+            img_array = np.array(image.convert('RGB'))
+            height, width = img_array.shape[:2]
+            # Convert to grayscale for luminance analysis
+            gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+            # Sample from edge regions (likely background)
+            edge_width = min(DARK_BG_EDGE_SAMPLE_WIDTH, width // 10, height // 10)
+            # Create edge sampling mask
+            edge_sample_mask = np.zeros((height, width), dtype=bool)
+            edge_sample_mask[:edge_width, :] = True  # Top
+            edge_sample_mask[-edge_width:, :] = True  # Bottom
+            edge_sample_mask[:, :edge_width] = True  # Left
+            edge_sample_mask[:, -edge_width:] = True  # Right
+            # Exclude foreground if mask is provided
+            if mask is not None:
+                foreground_mask = mask > 127
+                edge_sample_mask = edge_sample_mask & (~foreground_mask)
+            if not np.any(edge_sample_mask):
+                # Fallback: use corners only
+                corner_pixels = np.array([
+                    gray[0, 0], gray[0, -1],
+                    gray[-1, 0], gray[-1, -1]
+                ])
+                avg_luminance = np.mean(corner_pixels)
+            else:
+                avg_luminance = np.mean(gray[edge_sample_mask])
+            is_dark = avg_luminance < DARK_BG_LUMINANCE_THRESHOLD
+            logger.info(f"🔍 Background analysis - Avg luminance: {avg_luminance:.1f}, Dark: {is_dark}")
+            return is_dark, avg_luminance
+        except Exception as e:
+            logger.error(f"❌ Dark background detection failed: {e}")
+            return False, 128.0  # Default: not dark
+    def enhance_mask_for_dark_background(
+        self,
+        mask: Image.Image,
+        original_image: Image.Image,
+        dilation_pixels: int = DARK_BG_DILATION_PIXELS,
+        enhance_gray_areas: bool = True
+    ) -> Image.Image:
+        """
+        Enhance mask for images with dark backgrounds.
+        Applies dilation and gray area enhancement to capture foreground elements
+        that may have been missed due to low contrast with dark backgrounds.
+        Args:
+            mask: Input mask PIL Image (L mode)
+            original_image: Original image for reference
+            dilation_pixels: Number of pixels to dilate the mask
+            enhance_gray_areas: Whether to boost gray (uncertain) areas
+        Returns:
+            Enhanced mask PIL Image
+        """
+        try:
+            mask_array = np.array(mask)
+            orig_array = np.array(original_image.convert('RGB'))
+            logger.info(f"🔧 Enhancing mask for dark background (dilation: {dilation_pixels}px)")
+            # Step 1: Identify gray (uncertain) areas in the mask
+            if enhance_gray_areas:
+                gray_areas = (mask_array > 30) & (mask_array < 200)
+                if np.any(gray_areas):
+                    # For gray areas, check if they're near high-confidence foreground
+                    high_conf = mask_array >= 200
+                    # Dilate high confidence area to find nearby gray pixels
+                    kernel_check = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
+                    high_conf_dilated = cv2.dilate(high_conf.astype(np.uint8), kernel_check, iterations=2)
+                    # Gray pixels near high confidence foreground -> boost them
+                    boost_candidates = gray_areas & (high_conf_dilated > 0)
+                    # Boost gray areas near foreground
+                    mask_array[boost_candidates] = np.clip(
+                        mask_array[boost_candidates] * 1.5 + 50,
+                        0, 255
+                    ).astype(np.uint8)
+                    logger.info(f"📈 Boosted {np.sum(boost_candidates)} gray pixels near foreground")
+            # Step 2: Apply dilation to expand foreground coverage
+            if dilation_pixels > 0:
+                kernel = cv2.getStructuringElement(
+                    cv2.MORPH_ELLIPSE,
+                    (dilation_pixels * 2 + 1, dilation_pixels * 2 + 1)
+                )
+                # Threshold to get foreground region for dilation
+                fg_binary = (mask_array > 50).astype(np.uint8) * 255
+                fg_dilated = cv2.dilate(fg_binary, kernel, iterations=1)
+                # Blend: keep original high values, expand into new areas
+                # New areas from dilation get moderate confidence
+                new_areas = (fg_dilated > 0) & (mask_array < 50)
+                mask_array[new_areas] = 180  # Moderate confidence for expanded areas
+                logger.info(f"📐 Dilated mask by {dilation_pixels}px, added {np.sum(new_areas)} pixels")
+            # Step 3: Smooth the transitions
+            mask_array = cv2.GaussianBlur(mask_array, (3, 3), 0.8)
+            # Step 4: Re-strengthen core foreground
+            core_fg = np.array(mask) >= 220
+            mask_array[core_fg] = 255
+            logger.info(f"✅ Dark background enhancement complete - Final mean: {mask_array.mean():.1f}")
+            return Image.fromarray(mask_array, mode='L')
+        except Exception as e:
+            logger.error(f"❌ Mask enhancement failed: {e}")
+            return mask
     def apply_guided_filter(
         self,
         mask: np.ndarray,
             logger.error(f"❌ Scene focus adjustment failed: {e}")
             return mask
+    def create_gradient_based_mask(
+        self,
+        original_image: Image.Image,
+        mode: str = "center",
+        focus_mode: str = "person",
+        enhance_dark_edges: bool = False
+    ) -> Image.Image:
         """
         Intelligent foreground extraction: prioritize deep learning models, fallback to traditional methods
         Focus mode: 'person' for tight crop around person, 'scene' for including nearby objects
+        Args:
+            original_image: Input PIL Image
+            mode: Composition mode (center, left_half, right_half, full)
+            focus_mode: 'person' for tight crop, 'scene' for including nearby objects
+            enhance_dark_edges: User toggle to enhance mask for dark backgrounds
         """
         width, height = original_image.size
+        logger.info(f"🎯 Creating mask for {width}x{height} image, mode: {mode}, focus: {focus_mode}, enhance_dark: {enhance_dark_edges}")
         if mode == "center":
             # Try using deep learning models for intelligent foreground extraction
             dl_mask = self.try_deep_learning_mask(original_image)
             if dl_mask is not None:
                 logger.info("✅ Using deep learning generated mask")
                 # Apply focus mode adjustments to deep learning mask
                 if focus_mode == "scene":
                     dl_mask = self._adjust_mask_for_scene_focus(dl_mask, original_image)
+                # === Dark background detection and enhancement ===
+                mask_array = np.array(dl_mask)
+                is_dark_bg, avg_luminance = self.detect_dark_background(original_image, mask_array)
+                if is_dark_bg or enhance_dark_edges:
+                    # Determine dilation amount
+                    if enhance_dark_edges:
+                        # User explicitly enabled - use stronger dilation
+                        dilation = DARK_BG_ENHANCED_DILATION
+                        logger.info(f"🌙 User enabled dark edge enhancement (dilation: {dilation}px)")
+                    else:
+                        # Auto-detected dark background - use moderate dilation
+                        dilation = DARK_BG_DILATION_PIXELS
+                        logger.info(f"🌙 Auto-detected dark background (luminance: {avg_luminance:.1f}), applying enhancement")
+                    dl_mask = self.enhance_mask_for_dark_background(
+                        dl_mask,
+                        original_image,
+                        dilation_pixels=dilation,
+                        enhance_gray_areas=True
+                    )
                 return dl_mask
             # Fallback to traditional method

requirements.txt CHANGED Viewed

@@ -20,6 +20,10 @@ rembg[gpu]
 scipy
 opencv-contrib-python
 # Core Dependencies
 torch>=2.5.0
 numpy

 scipy
 opencv-contrib-python
+# 3D Cartoon Style Dependencies (SDXL + Pixar LoRA)
+# Note: diffusers is already included above for I2V
+# SDXL uses the same diffusers library
 # Core Dependencies
 torch>=2.5.0
 numpy

style_transfer.py ADDED Viewed

	@@ -0,0 +1,708 @@

+import os
+import gc
+from typing import Tuple, Optional, Dict, Any
+from PIL import Image
+import torch
+try:
+    import spaces
+    HAS_SPACES = True
+except ImportError:
+    HAS_SPACES = False
+# Identity preservation keywords (added to all styles) - kept short for CLIP 77 token limit
+IDENTITY_PRESERVE = "same person, same face, same ethnicity, same age"
+IDENTITY_NEGATIVE = "different person, altered face, changed ethnicity, age change, distorted features"
+# Enhanced face restore mode - concise weighted keywords
+FACE_RESTORE_PRESERVE = "(same person:1.4), (preserve face:1.3), (same ethnicity:1.2), same pose, same lighting"
+FACE_RESTORE_NEGATIVE = "(different person:1.4), (deformed face:1.3), wrong ethnicity, age change, western features"
+# IP-Adapter settings for stronger identity preservation
+# Using standard IP-Adapter (not face-specific) to avoid image encoder dependency
+IP_ADAPTER_REPO = "h94/IP-Adapter"
+IP_ADAPTER_SUBFOLDER = "sdxl_models"
+IP_ADAPTER_WEIGHT = "ip-adapter_sdxl.bin"  # Standard model, no extra encoder needed
+IP_ADAPTER_SCALE_DEFAULT = 0.5  # Balance between identity and style
+# Style-specific face_restore settings (some styles are more transformative)
+FACE_RESTORE_STYLE_SETTINGS = {
+    "3d_cartoon": {"max_strength": 0.45, "lora_scale_mult": 0.7, "ip_scale": 0.4},
+    "anime": {"max_strength": 0.45, "lora_scale_mult": 0.7, "ip_scale": 0.4},
+    "illustrated_fantasy": {"max_strength": 0.42, "lora_scale_mult": 0.65, "ip_scale": 0.45},
+    "watercolor": {"max_strength": 0.40, "lora_scale_mult": 0.6, "ip_scale": 0.5},
+    "oil_painting": {"max_strength": 0.35, "lora_scale_mult": 0.5, "ip_scale": 0.6},  # Most transformative
+    "pixel_art": {"max_strength": 0.50, "lora_scale_mult": 0.8, "ip_scale": 0.3},
+}
+# Style configurations
+STYLE_CONFIGS = {
+    "3d_cartoon": {
+        "name": "3D Cartoon",
+        "emoji": "🎬",
+        "lora_repo": "imagepipeline/Samaritan-3d-Cartoon-SDXL",
+        "lora_weight": "Samaritan 3d Cartoon.safetensors",
+        "prompt": "3D cartoon style, smooth rounded features, soft ambient lighting, CGI quality, vibrant colors, cel-shaded, studio render",
+        "negative_prompt": "ugly, deformed, noisy, blurry, low quality, flat, sketch",
+        "lora_scale": 0.75,
+        "recommended_strength": 0.55,
+    },
+    "anime": {
+        "name": "Anime Illustration",
+        "emoji": "🌸",
+        "lora_repo": None,
+        "lora_weight": None,
+        "prompt": "anime illustration, soft lighting, rich colors, delicate linework, smooth gradients, expressive eyes, cel shading, masterpiece",
+        "negative_prompt": "ugly, deformed, bad anatomy, bad hands, blurry, low quality",
+        "lora_scale": 0.0,
+        "recommended_strength": 0.50,
+    },
+    "illustrated_fantasy": {
+        "name": "Illustrated Fantasy",
+        "emoji": "🍃",
+        "lora_repo": "ntc-ai/SDXL-LoRA-slider.Studio-Ghibli-style",
+        "lora_weight": "Studio Ghibli style.safetensors",
+        "prompt": "Ghibli style illustration, hand-painted look, soft watercolor textures, dreamy atmosphere, pastel colors, golden hour lighting, storybook quality",
+        "negative_prompt": "ugly, dark, horror, scary, blurry, low quality, modern",
+        "lora_scale": 1.0,
+        "recommended_strength": 0.50,
+    },
+    "watercolor": {
+        "name": "Watercolor Art",
+        "emoji": "🌊",
+        "lora_repo": "ostris/watercolor_style_lora_sdxl",
+        "lora_weight": "watercolor_style_lora.safetensors",
+        "prompt": "watercolor painting, wet-on-wet technique, soft color bleeds, paper texture, transparent washes, feathered edges, hand-painted",
+        "negative_prompt": "sharp edges, solid flat colors, harsh lines, vector art, airbrushed",
+        "lora_scale": 1.0,
+        "recommended_strength": 0.50,
+    },
+    "oil_painting": {
+        "name": "Classic Oil Paint",
+        "emoji": "🖼️",
+        "lora_repo": "EldritchAdam/ClassipeintXL",
+        "lora_weight": "ClassipeintXL.safetensors",
+        "prompt": "oil painting style, impasto technique, palette knife strokes, visible canvas texture, rich saturated pigments, masterful lighting, museum quality",
+        "negative_prompt": "flat, smooth, cartoon, anime, blurry, low quality, modern, airbrushed",
+        "lora_scale": 0.9,
+        "recommended_strength": 0.50,
+    },
+    "pixel_art": {
+        "name": "Pixel Art",
+        "emoji": "👾",
+        "lora_repo": "nerijs/pixel-art-xl",
+        "lora_weight": "pixel-art-xl.safetensors",
+        "prompt": "pixel art style, crisp blocky pixels, limited color palette, 16-bit aesthetic, retro game vibes, dithering effects, sprite art",
+        "negative_prompt": "smooth, blurry, anti-aliased, soft gradient, painterly",
+        "lora_scale": 0.9,
+        "recommended_strength": 0.60,
+    },
+}
+# Style Blend Presets - combining multiple styles (prompts kept short for CLIP 77 token limit)
+STYLE_BLENDS = {
+    "cartoon_anime": {
+        "name": "3D Anime Fusion",
+        "emoji": "��",
+        "description": "70% 3D Cartoon + 30% Anime linework",
+        "primary_style": "3d_cartoon",
+        "secondary_style": "anime",
+        "primary_weight": 0.7,
+        "secondary_weight": 0.3,
+        "prompt": "3D cartoon with anime linework, smooth features, soft lighting, CGI quality, vibrant colors, cel-shaded",
+        "negative_prompt": "ugly, deformed, noisy, blurry, low quality",
+        "strength": 0.52,
+    },
+    "fantasy_watercolor": {
+        "name": "Dreamy Watercolor",
+        "emoji": "🌈",
+        "description": "60% Illustrated Fantasy + 40% Watercolor",
+        "primary_style": "illustrated_fantasy",
+        "secondary_style": "watercolor",
+        "primary_weight": 0.6,
+        "secondary_weight": 0.4,
+        "prompt": "Ghibli style with watercolor washes, soft color bleeds, storybook atmosphere, paper texture, warm golden lighting",
+        "negative_prompt": "dark, horror, harsh lines, solid colors",
+        "strength": 0.50,
+    },
+    "anime_fantasy": {
+        "name": "Anime Storybook",
+        "emoji": "📖",
+        "description": "50% Anime + 50% Illustrated Fantasy",
+        "primary_style": "anime",
+        "secondary_style": "illustrated_fantasy",
+        "primary_weight": 0.5,
+        "secondary_weight": 0.5,
+        "prompt": "Ghibli anime illustration, hand-painted storybook, soft lighting, pastel colors, expressive eyes, warm glow",
+        "negative_prompt": "ugly, deformed, bad anatomy, dark, horror, blurry",
+        "strength": 0.48,
+    },
+    "oil_classical": {
+        "name": "Renaissance Portrait",
+        "emoji": "👑",
+        "description": "Classical oil painting style",
+        "primary_style": "oil_painting",
+        "secondary_style": "oil_painting",
+        "primary_weight": 1.0,
+        "secondary_weight": 0.0,
+        "prompt": "classical oil portrait, impasto technique, palette knife strokes, chiaroscuro lighting, canvas texture, museum quality",
+        "negative_prompt": "flat, cartoon, anime, modern, minimalist, overexposed",
+        "strength": 0.50,
+    },
+    "pixel_retro": {
+        "name": "Retro Game Art",
+        "emoji": "🕹️",
+        "description": "Pixel art with enhanced retro feel",
+        "primary_style": "pixel_art",
+        "secondary_style": "pixel_art",
+        "primary_weight": 1.0,
+        "secondary_weight": 0.0,
+        "prompt": "retro pixel art, crisp blocky pixels, limited palette, arcade aesthetic, dithering, 16-bit charm, sprite art",
+        "negative_prompt": "smooth, blurry, anti-aliased, modern, gradient",
+        "strength": 0.58,
+    },
+}
+class StyleTransferEngine:
+    """
+    Multi-style image transformation engine using SDXL + LoRAs.
+    Supports: 3D Cartoon, Anime, Watercolor, Oil Painting, Pixel Art styles.
+    With IP-Adapter support for identity preservation.
+    """
+    BASE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
+    def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.pipe = None
+        self.current_lora = None
+        self.is_loaded = False
+        self.ip_adapter_loaded = False
+    def load_model(self) -> None:
+        """Load SDXL base pipeline."""
+        if self.is_loaded:
+            return
+        print("→ Loading SDXL base model...")
+        from diffusers import AutoPipelineForImage2Image
+        actual_device = "cuda" if torch.cuda.is_available() else self.device
+        self.pipe = AutoPipelineForImage2Image.from_pretrained(
+            self.BASE_MODEL,
+            torch_dtype=torch.float16 if actual_device == "cuda" else torch.float32,
+            variant="fp16" if actual_device == "cuda" else None,
+            use_safetensors=True,
+        )
+        self.pipe.to(actual_device)
+        # Enable memory optimizations
+        if actual_device == "cuda":
+            try:
+                self.pipe.enable_xformers_memory_efficient_attention()
+            except Exception:
+                pass
+        self.is_loaded = True
+        self.device = actual_device
+        print(f"✓ SDXL base loaded ({actual_device})")
+    def _load_lora(self, style_key: str) -> None:
+        """Load LoRA for the specified style."""
+        config = STYLE_CONFIGS.get(style_key)
+        if not config:
+            return
+        lora_repo = config.get("lora_repo")
+        # Skip if no LoRA needed or already loaded
+        if lora_repo is None:
+            if self.current_lora is not None:
+                print("→ Unloading previous LoRA...")
+                self.pipe.unload_lora_weights()
+                self.current_lora = None
+            return
+        if self.current_lora == lora_repo:
+            return
+        # Unload previous LoRA if different
+        if self.current_lora is not None:
+            print(f"→ Unloading previous LoRA: {self.current_lora}")
+            self.pipe.unload_lora_weights()
+        # Load new LoRA
+        print(f"→ Loading LoRA: {config['name']}...")
+        try:
+            lora_weight = config.get("lora_weight")
+            if lora_weight:
+                self.pipe.load_lora_weights(lora_repo, weight_name=lora_weight)
+            else:
+                self.pipe.load_lora_weights(lora_repo)
+            self.current_lora = lora_repo
+            print(f"✓ LoRA loaded: {config['name']}")
+        except Exception as e:
+            print(f"⚠ LoRA loading failed: {e}, continuing without LoRA")
+            self.current_lora = None
+    def _load_ip_adapter(self) -> bool:
+        """Load IP-Adapter for identity preservation."""
+        if self.ip_adapter_loaded:
+            return True
+        if self.pipe is None:
+            return False
+        print("→ Loading IP-Adapter for face preservation...")
+        try:
+            self.pipe.load_ip_adapter(
+                IP_ADAPTER_REPO,
+                subfolder=IP_ADAPTER_SUBFOLDER,
+                weight_name=IP_ADAPTER_WEIGHT
+            )
+            self.ip_adapter_loaded = True
+            print("✓ IP-Adapter loaded")
+            return True
+        except Exception as e:
+            print(f"⚠ IP-Adapter loading failed: {e}")
+            self.ip_adapter_loaded = False
+            return False
+    def _unload_ip_adapter(self) -> None:
+        """Unload IP-Adapter to free memory."""
+        if not self.ip_adapter_loaded or self.pipe is None:
+            return
+        try:
+            self.pipe.unload_ip_adapter()
+            self.ip_adapter_loaded = False
+            print("✓ IP-Adapter unloaded")
+        except Exception as e:
+            print(f"⚠ IP-Adapter unload failed: {e}")
+    def unload_model(self) -> None:
+        """Unload model and free memory."""
+        if not self.is_loaded:
+            return
+        # Unload IP-Adapter first if loaded
+        if self.ip_adapter_loaded:
+            self._unload_ip_adapter()
+        if self.pipe is not None:
+            del self.pipe
+            self.pipe = None
+        self.current_lora = None
+        self.ip_adapter_loaded = False
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        self.is_loaded = False
+        print("✓ Model unloaded")
+    def _preprocess_image(self, image: Image.Image) -> Image.Image:
+        """Preprocess image for SDXL - resize to appropriate dimensions."""
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        # SDXL works best with 1024x1024, maintain aspect ratio
+        max_size = 1024
+        width, height = image.size
+        if width > height:
+            new_width = max_size
+            new_height = int(height * (max_size / width))
+        else:
+            new_height = max_size
+            new_width = int(width * (max_size / height))
+        # Round to nearest 8 (SDXL requirement)
+        new_width = (new_width // 8) * 8
+        new_height = (new_height // 8) * 8
+        # Ensure minimum size
+        new_width = max(new_width, 512)
+        new_height = max(new_height, 512)
+        image = image.resize((new_width, new_height), Image.LANCZOS)
+        return image
+    def generate_styled_image(
+        self,
+        image: Image.Image,
+        style_key: str = "3d_cartoon",
+        strength: float = 0.65,
+        guidance_scale: float = 7.5,
+        num_inference_steps: int = 30,
+        custom_prompt: str = "",
+        seed: int = -1,
+        face_restore: bool = False
+    ) -> Tuple[Image.Image, int]:
+        """
+        Convert image to the specified style.
+        Args:
+            image: Input PIL Image
+            style_key: One of: 3d_cartoon, anime, illustrated_fantasy, watercolor, oil_painting, pixel_art
+            strength: How much to transform (0.0-1.0)
+            guidance_scale: How closely to follow the prompt
+            num_inference_steps: Number of denoising steps
+            custom_prompt: Additional prompt text
+            seed: Random seed (-1 for random)
+            face_restore: Enable enhanced face preservation mode
+        Returns:
+            Tuple of (Stylized PIL Image, seed used)
+        """
+        if not self.is_loaded:
+            self.load_model()
+        # Get style config
+        config = STYLE_CONFIGS.get(style_key, STYLE_CONFIGS["3d_cartoon"])
+        # Load appropriate LoRA
+        self._load_lora(style_key)
+        # Preprocess
+        print("→ Preprocessing image...")
+        processed_image = self._preprocess_image(image)
+        # Get style-specific face_restore settings
+        face_settings = FACE_RESTORE_STYLE_SETTINGS.get(style_key, {
+            "max_strength": 0.45, "lora_scale_mult": 0.7, "ip_scale": 0.5
+        })
+        # Build prompt based on face_restore mode
+        base_prompt = config["prompt"]
+        ip_adapter_image = None
+        ip_scale = 0.0
+        if face_restore:
+            # Enhanced face preservation mode with style-specific settings
+            preserve_prompt = FACE_RESTORE_PRESERVE
+            negative_base = FACE_RESTORE_NEGATIVE
+            # Apply style-specific strength cap
+            max_str = face_settings["max_strength"]
+            strength = min(strength, max_str)
+            print(f"→ Face Restore enabled: strength capped at {strength} (style: {style_key})")
+            # Load IP-Adapter for stronger identity preservation
+            if self._load_ip_adapter():
+                ip_adapter_image = processed_image
+                ip_scale = face_settings["ip_scale"]
+                print(f"→ IP-Adapter scale: {ip_scale}")
+        else:
+            preserve_prompt = IDENTITY_PRESERVE
+            negative_base = IDENTITY_NEGATIVE
+            # Unload IP-Adapter if not using face_restore (save memory)
+            if self.ip_adapter_loaded:
+                self._unload_ip_adapter()
+        if custom_prompt:
+            prompt = f"{preserve_prompt}, {base_prompt}, {custom_prompt}"
+        else:
+            prompt = f"{preserve_prompt}, {base_prompt}"
+        # Build negative prompt
+        negative_prompt = f"{negative_base}, {config['negative_prompt']}"
+        # Set LoRA scale (reduce for face restore mode with style-specific multiplier)
+        lora_scale = config.get("lora_scale", 1.0)
+        if face_restore:
+            lora_scale = lora_scale * face_settings["lora_scale_mult"]
+        # Handle seed
+        if seed == -1:
+            seed = torch.randint(0, 2147483647, (1,)).item()
+        generator = torch.Generator(device=self.device).manual_seed(seed)
+        # Generate
+        print(f"→ Generating {config['name']} style (strength: {strength}, steps: {num_inference_steps}, seed: {seed})...")
+        # Build generation kwargs
+        gen_kwargs = {
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "image": processed_image,
+            "strength": strength,
+            "guidance_scale": guidance_scale,
+            "num_inference_steps": num_inference_steps,
+            "generator": generator,
+        }
+        # Add cross_attention_kwargs only if LoRA is loaded
+        if self.current_lora is not None:
+            gen_kwargs["cross_attention_kwargs"] = {"scale": lora_scale}
+        # Add IP-Adapter settings for face restoration
+        if ip_adapter_image is not None and self.ip_adapter_loaded:
+            self.pipe.set_ip_adapter_scale(ip_scale)
+            gen_kwargs["ip_adapter_image"] = ip_adapter_image
+        result = self.pipe(**gen_kwargs).images[0]
+        print(f"✓ {config['name']} style generated (seed: {seed})")
+        # Cleanup
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return result, seed
+    def generate_blended_style(
+        self,
+        image: Image.Image,
+        blend_key: str,
+        custom_prompt: str = "",
+        seed: int = -1,
+        face_restore: bool = False
+    ) -> Tuple[Image.Image, int]:
+        """
+        Generate image using a style blend preset.
+        Args:
+            image: Input PIL Image
+            blend_key: Key from STYLE_BLENDS
+            custom_prompt: Additional prompt text
+            seed: Random seed (-1 for random)
+            face_restore: Enable enhanced face preservation mode
+        Returns:
+            Tuple of (Stylized PIL Image, seed used)
+        """
+        if not self.is_loaded:
+            self.load_model()
+        blend_config = STYLE_BLENDS.get(blend_key)
+        if not blend_config:
+            return self.generate_styled_image(image, "3d_cartoon", seed=seed, face_restore=face_restore)
+        # Get primary style for LoRA
+        primary_style = blend_config["primary_style"]
+        self._load_lora(primary_style)
+        # Preprocess
+        print("→ Preprocessing image...")
+        processed_image = self._preprocess_image(image)
+        # Get style-specific face_restore settings (use primary style)
+        face_settings = FACE_RESTORE_STYLE_SETTINGS.get(primary_style, {
+            "max_strength": 0.45, "lora_scale_mult": 0.7, "ip_scale": 0.5
+        })
+        # Build prompt based on face_restore mode
+        base_prompt = blend_config["prompt"]
+        ip_adapter_image = None
+        ip_scale = 0.0
+        if face_restore:
+            preserve_prompt = FACE_RESTORE_PRESERVE
+            negative_base = FACE_RESTORE_NEGATIVE
+            # Apply style-specific strength cap
+            max_str = face_settings["max_strength"]
+            strength = min(blend_config["strength"], max_str)
+            print(f"→ Face Restore enabled: strength capped at {strength} (blend: {blend_key})")
+            # Load IP-Adapter for stronger identity preservation
+            if self._load_ip_adapter():
+                ip_adapter_image = processed_image
+                ip_scale = face_settings["ip_scale"]
+                print(f"→ IP-Adapter scale: {ip_scale}")
+        else:
+            preserve_prompt = IDENTITY_PRESERVE
+            negative_base = IDENTITY_NEGATIVE
+            strength = blend_config["strength"]
+            # Unload IP-Adapter if not using face_restore
+            if self.ip_adapter_loaded:
+                self._unload_ip_adapter()
+        if custom_prompt:
+            prompt = f"{preserve_prompt}, {base_prompt}, {custom_prompt}"
+        else:
+            prompt = f"{preserve_prompt}, {base_prompt}"
+        # Build negative prompt
+        negative_prompt = f"{negative_base}, {blend_config['negative_prompt']}"
+        # Get LoRA scale from primary style (reduce for face restore with style-specific multiplier)
+        primary_config = STYLE_CONFIGS.get(primary_style, {})
+        lora_scale = primary_config.get("lora_scale", 1.0) * blend_config["primary_weight"]
+        if face_restore:
+            lora_scale = lora_scale * face_settings["lora_scale_mult"]
+        # Handle seed
+        if seed == -1:
+            seed = torch.randint(0, 2147483647, (1,)).item()
+        generator = torch.Generator(device=self.device).manual_seed(seed)
+        # Generate
+        print(f"→ Generating {blend_config['name']} blend (seed: {seed})...")
+        gen_kwargs = {
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "image": processed_image,
+            "strength": strength,
+            "guidance_scale": 7.5,
+            "num_inference_steps": 30,
+            "generator": generator,
+        }
+        if self.current_lora is not None:
+            gen_kwargs["cross_attention_kwargs"] = {"scale": lora_scale}
+        # Add IP-Adapter settings for face restoration
+        if ip_adapter_image is not None and self.ip_adapter_loaded:
+            self.pipe.set_ip_adapter_scale(ip_scale)
+            gen_kwargs["ip_adapter_image"] = ip_adapter_image
+        result = self.pipe(**gen_kwargs).images[0]
+        print(f"✓ {blend_config['name']} blend generated (seed: {seed})")
+        # Cleanup
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return result, seed
+    def generate_all_outputs(
+        self,
+        image: Image.Image,
+        style_key: str = "3d_cartoon",
+        strength: float = 0.65,
+        guidance_scale: float = 7.5,
+        num_inference_steps: int = 30,
+        custom_prompt: str = "",
+        seed: int = -1,
+        is_blend: bool = False,
+        face_restore: bool = False
+    ) -> dict:
+        """
+        Generate styled image output.
+        Returns dict with success status, stylized image, and seed used.
+        """
+        result = {
+            "success": False,
+            "stylized_image": None,
+            "preview_image": None,
+            "style_name": "",
+            "seed_used": 0,
+            "error": None
+        }
+        try:
+            if is_blend:
+                # Use blend preset
+                blend_config = STYLE_BLENDS.get(style_key, {})
+                result["style_name"] = blend_config.get("name", "Unknown Blend")
+                stylized, seed_used = self.generate_blended_style(
+                    image=image,
+                    blend_key=style_key,
+                    custom_prompt=custom_prompt,
+                    seed=seed,
+                    face_restore=face_restore
+                )
+            else:
+                # Use single style
+                config = STYLE_CONFIGS.get(style_key, STYLE_CONFIGS["3d_cartoon"])
+                result["style_name"] = config["name"]
+                stylized, seed_used = self.generate_styled_image(
+                    image=image,
+                    style_key=style_key,
+                    strength=strength,
+                    guidance_scale=guidance_scale,
+                    num_inference_steps=num_inference_steps,
+                    custom_prompt=custom_prompt,
+                    seed=seed,
+                    face_restore=face_restore
+                )
+            result["stylized_image"] = stylized
+            result["preview_image"] = stylized
+            result["seed_used"] = seed_used
+            result["success"] = True
+            print(f"✓ {result['style_name']} conversion completed (seed: {seed_used})")
+        except Exception as e:
+            result["error"] = str(e)
+            print(f"✗ Style conversion failed: {e}")
+        return result
+    @staticmethod
+    def get_available_styles() -> Dict[str, Dict[str, Any]]:
+        """Return available style configurations."""
+        return {
+            key: {
+                "name": config["name"],
+                "emoji": config["emoji"],
+            }
+            for key, config in STYLE_CONFIGS.items()
+        }
+    @staticmethod
+    def get_style_choices() -> list:
+        """Return style choices for UI dropdown."""
+        return [
+            f"{config['emoji']} {config['name']}"
+            for config in STYLE_CONFIGS.values()
+        ]
+    @staticmethod
+    def get_style_key_from_choice(choice: str) -> str:
+        """Convert UI choice back to style key."""
+        for key, config in STYLE_CONFIGS.items():
+            if config["name"] in choice:
+                return key
+        return "3d_cartoon"
+    @staticmethod
+    def get_blend_choices() -> list:
+        """Return blend preset choices for UI dropdown."""
+        return [
+            f"{config['emoji']} {config['name']} - {config['description']}"
+            for config in STYLE_BLENDS.values()
+        ]
+    @staticmethod
+    def get_blend_key_from_choice(choice: str) -> str:
+        """Convert UI blend choice back to blend key."""
+        for key, config in STYLE_BLENDS.items():
+            if config["name"] in choice:
+                return key
+        return "cartoon_anime"
+    @staticmethod
+    def get_all_choices() -> dict:
+        """Return both style and blend choices for UI."""
+        styles = [
+            f"{config['emoji']} {config['name']}"
+            for config in STYLE_CONFIGS.values()
+        ]
+        blends = [
+            f"{config['emoji']} {config['name']}"
+            for config in STYLE_BLENDS.values()
+        ]
+        return {
+            "styles": styles,
+            "blends": blends,
+            "all": styles + ["─── Style Blends ───"] + blends
+        }

ui_manager.py CHANGED Viewed

@@ -6,6 +6,7 @@ import logging
 from FlowFacade import FlowFacade
 from BackgroundEngine import BackgroundEngine
 from scene_templates import SceneTemplateManager
 from css_style import DELTAFLOW_CSS
 from prompt_examples import PROMPT_EXAMPLES
@@ -20,9 +21,10 @@ logger = logging.getLogger(__name__)
 class UIManager:
-    def __init__(self, facade: FlowFacade, background_engine: BackgroundEngine):
         self.facade = facade
         self.background_engine = background_engine
         self.template_manager = SceneTemplateManager()
     def create_interface(self) -> gr.Blocks:
@@ -45,15 +47,19 @@ class UIManager:
             # Main Tabs
             with gr.Tabs() as main_tabs:
-                # Tab 1: Image to Video (Original Functionality)
                 with gr.Tab("🎬 Image to Video"):
                     self._create_i2v_tab()
-                # Tab 2: Background Generation (New Feature)
                 with gr.Tab("🎨 Background Generation"):
                     self._create_background_tab()
             # Footer
             gr.HTML("""
                 <div class="footer">
@@ -341,8 +347,21 @@ class UIManager:
                     gr.HTML("""
                         <div style="padding: 8px; background: #f0f4ff; border-radius: 6px; margin-bottom: 12px; font-size: 13px;">
                             <strong>💡 When to Adjust:</strong><br>
                             • <strong>Feather Radius:</strong> Use 5-10 for complex scenes with fine details (hair, fur, foliage). 0 = sharp edges for clean portraits.<br>
-                            • <strong>Mask Preview:</strong> Check the "Mask Preview" tab after generation. White = kept, Black = replaced. Helps diagnose edge issues.
                         </div>
                     """)
@@ -393,7 +412,7 @@ class UIManager:
                 gr.HTML("""
                     <div class="patience-banner">
-                        <strong>⏱️ First-time users:</strong> Initial model loading takes 1-2 minutes.
                         Subsequent generations are much faster (~30s).
                     </div>
                 """)
@@ -443,6 +462,77 @@ class UIManager:
                         elem_classes=["secondary-button"]
                     )
         # Event handlers for Background Generation tab
         def apply_template(display_name: str, current_negative: str) -> Tuple[str, str, float]:
             if not display_name:
@@ -474,7 +564,7 @@ class UIManager:
             inputs=[
                 bg_image_input, bg_prompt_input, combination_mode,
                 focus_mode, bg_negative_prompt, bg_steps_slider, bg_guidance_slider,
-                feather_radius_slider
             ],
             outputs=[
                 bg_combined_output, bg_generated_output,
@@ -495,6 +585,132 @@ class UIManager:
             outputs=[bg_status_output]
         )
     def _generate_background_handler(
         self,
         image: Image.Image,
@@ -504,7 +720,8 @@ class UIManager:
         negative_prompt: str,
         steps: int,
         guidance: float,
-        feather_radius: int
     ) -> Tuple[Optional[Image.Image], Optional[Image.Image], Optional[Image.Image], Optional[Image.Image], str]:
         """Handler for background generation"""
         if image is None:
@@ -522,7 +739,7 @@ class UIManager:
             result = generate_fn(
                 image, prompt, combination_mode, focus_mode,
-                negative_prompt, steps, guidance, feather_radius
             )
             if result["success"]:
@@ -550,7 +767,8 @@ class UIManager:
         negative_prompt: str,
         steps: int,
         guidance: float,
-        feather_radius: int
     ) -> Dict[str, Any]:
         """Core background generation with models"""
         if not self.background_engine.is_initialized:
@@ -566,7 +784,333 @@ class UIManager:
             num_inference_steps=int(steps),
             guidance_scale=float(guidance),
             enable_prompt_enhancement=True,
-            feather_radius=int(feather_radius)
         )
-        return result

 from FlowFacade import FlowFacade
 from BackgroundEngine import BackgroundEngine
+from style_transfer import StyleTransferEngine
 from scene_templates import SceneTemplateManager
 from css_style import DELTAFLOW_CSS
 from prompt_examples import PROMPT_EXAMPLES
 class UIManager:
+    def __init__(self, facade: FlowFacade, background_engine: BackgroundEngine, style_engine: StyleTransferEngine):
         self.facade = facade
         self.background_engine = background_engine
+        self.style_engine = style_engine
         self.template_manager = SceneTemplateManager()
     def create_interface(self) -> gr.Blocks:
             # Main Tabs
             with gr.Tabs() as main_tabs:
+                # Tab 1: Image to Video
                 with gr.Tab("🎬 Image to Video"):
                     self._create_i2v_tab()
+                # Tab 2: Background Generation
                 with gr.Tab("🎨 Background Generation"):
                     self._create_background_tab()
+                # Tab 3: AI Style Transfer
+                with gr.Tab("✨ Style Transfer"):
+                    self._create_3d_tab()
             # Footer
             gr.HTML("""
                 <div class="footer">
                     gr.HTML("""
                         <div style="padding: 8px; background: #f0f4ff; border-radius: 6px; margin-bottom: 12px; font-size: 13px;">
                             <strong>💡 When to Adjust:</strong><br>
+                            • <strong>Enhance Dark Edges:</strong> Enable for images with dark/black backgrounds where foreground parts get lost.<br>
                             • <strong>Feather Radius:</strong> Use 5-10 for complex scenes with fine details (hair, fur, foliage). 0 = sharp edges for clean portraits.<br>
+                            • <strong>Mask Preview:</strong> Check the "Mask Preview" tab after generation. White = kept, Black = replaced.
+                        </div>
+                    """)
+                    enhance_dark_edges = gr.Checkbox(
+                        label="🌙 Enhance Dark Edges",
+                        value=False,
+                        info="Enable if dark foreground parts blend into dark backgrounds"
+                    )
+                    gr.HTML("""
+                        <div style="padding: 6px 8px; background: #fff3cd; border-radius: 4px; font-size: 11px; margin-bottom: 12px;">
+                            <strong>When to use:</strong> If mask preview shows gray areas where foreground should be white (e.g., dark hair/clothing on dark background).
+                            Auto-detection is enabled by default, but this toggle forces stronger enhancement.
                         </div>
                     """)
                 gr.HTML("""
                     <div class="patience-banner">
+                        <strong>⏱️ First-time users:</strong> Initial model loading takes 30-60 seconds.
                         Subsequent generations are much faster (~30s).
                     </div>
                 """)
                         elem_classes=["secondary-button"]
                     )
+                # Touch Up Section for manual artifact removal
+                with gr.Accordion("🖌️ Touch Up (Remove Artifacts)", open=False) as touchup_accordion:
+                    gr.HTML("""
+                        <div style="padding: 10px; background: #e8f4fd; border-radius: 6px; margin-bottom: 12px; font-size: 13px;">
+                            <strong>✨ How to Use Touch Up:</strong><br>
+                            1. After generating, if you see unwanted artifacts (gray edges, leftover objects)<br>
+                            2. Click "Load Result for Touch Up" to load the image<br>
+                            3. Use the brush to paint over areas you want to remove<br>
+                            4. Click "Remove & Fill" to replace painted areas with background
+                        </div>
+                    """)
+                    # State to store the current result and prompt
+                    touchup_source_image = gr.State(value=None)
+                    touchup_background_prompt = gr.State(value="")
+                    load_touchup_btn = gr.Button(
+                        "📥 Load Result for Touch Up",
+                        elem_classes=["secondary-button"]
+                    )
+                    touchup_editor = gr.ImageEditor(
+                        label="Draw on areas to remove (use brush tool)",
+                        type="pil",
+                        height=400,
+                        brush=gr.Brush(
+                            colors=["#FF0000"],
+                            default_color="#FF0000",
+                            default_size=20
+                        ),
+                        layers=False,
+                        interactive=True,
+                        visible=True
+                    )
+                    with gr.Row():
+                        brush_size_slider = gr.Slider(
+                            label="Brush Size",
+                            minimum=5,
+                            maximum=50,
+                            value=20,
+                            step=5,
+                            scale=2
+                        )
+                        touchup_strength = gr.Slider(
+                            label="Fill Strength",
+                            minimum=0.8,
+                            maximum=1.0,
+                            value=0.99,
+                            step=0.01,
+                            scale=2,
+                            info="Higher = more complete replacement"
+                        )
+                    remove_fill_btn = gr.Button(
+                        "🎨 Remove & Fill",
+                        variant="primary",
+                        elem_classes="primary-button"
+                    )
+                    touchup_result = gr.Image(
+                        label="Touch Up Result",
+                        elem_classes=["result-gallery"]
+                    )
+                    touchup_status = gr.Textbox(
+                        label="Touch Up Status",
+                        value="Load an image to start touch up.",
+                        interactive=False
+                    )
         # Event handlers for Background Generation tab
         def apply_template(display_name: str, current_negative: str) -> Tuple[str, str, float]:
             if not display_name:
             inputs=[
                 bg_image_input, bg_prompt_input, combination_mode,
                 focus_mode, bg_negative_prompt, bg_steps_slider, bg_guidance_slider,
+                feather_radius_slider, enhance_dark_edges
             ],
             outputs=[
                 bg_combined_output, bg_generated_output,
             outputs=[bg_status_output]
         )
+        # Touch Up event handlers
+        def load_for_touchup(combined_image, prompt):
+            """Load the generated result into touch up editor"""
+            if combined_image is None:
+                return None, None, "", "Please generate a background first!"
+            return combined_image, combined_image, prompt, "✓ Image loaded! Use brush to paint areas to remove."
+        load_touchup_btn.click(
+            fn=load_for_touchup,
+            inputs=[bg_combined_output, bg_prompt_input],
+            outputs=[touchup_editor, touchup_source_image, touchup_background_prompt, touchup_status]
+        )
+        remove_fill_btn.click(
+            fn=self._touchup_inpaint_handler,
+            inputs=[touchup_editor, touchup_background_prompt, touchup_strength],
+            outputs=[touchup_result, touchup_status]
+        )
+    def _touchup_inpaint_handler(
+        self,
+        editor_data: dict,
+        background_prompt: str,
+        strength: float
+    ) -> Tuple[Optional[Image.Image], str]:
+        """Handler for touch up inpainting"""
+        if editor_data is None:
+            return None, "Please load an image first!"
+        try:
+            # Extract image and mask from editor
+            # Gradio ImageEditor returns a dict with 'background', 'layers', 'composite'
+            if isinstance(editor_data, dict):
+                base_image = editor_data.get("background") or editor_data.get("composite")
+                layers = editor_data.get("layers", [])
+                if base_image is None:
+                    return None, "No image found in editor!"
+                # Create mask from drawn layers (red brush strokes)
+                mask = self._extract_mask_from_editor(base_image, layers)
+                if mask is None or not self._has_painted_area(mask):
+                    return None, "Please draw on areas you want to remove!"
+            else:
+                # Fallback for PIL Image
+                return None, "Invalid editor data format!"
+            # Apply ZeroGPU decorator if available
+            if SPACES_AVAILABLE:
+                inpaint_fn = spaces.GPU(duration=60)(self._touchup_inpaint_core)
+            else:
+                inpaint_fn = self._touchup_inpaint_core
+            result = inpaint_fn(base_image, mask, background_prompt, strength)
+            if result["success"]:
+                return result["inpainted_image"], "✓ Touch up completed!"
+            else:
+                return None, f"Error: {result.get('error', 'Unknown error')}"
+        except Exception as e:
+            logger.error(f"Touch up failed: {e}")
+            return None, f"Error: {str(e)}"
+    def _extract_mask_from_editor(self, base_image: Image.Image, layers: list) -> Optional[Image.Image]:
+        """Extract painted mask from ImageEditor layers"""
+        import numpy as np
+        if not layers:
+            return None
+        # Create blank mask
+        width, height = base_image.size
+        mask_array = np.zeros((height, width), dtype=np.uint8)
+        for layer in layers:
+            if layer is None:
+                continue
+            # Convert layer to numpy array
+            if isinstance(layer, Image.Image):
+                layer_array = np.array(layer.convert('RGBA'))
+            else:
+                continue
+            # Find non-transparent pixels (painted areas)
+            # The alpha channel indicates where user drew
+            if layer_array.shape[2] >= 4:
+                alpha = layer_array[:, :, 3]
+                # Also check for red color (our brush color)
+                red = layer_array[:, :, 0]
+                # Painted areas have high alpha and red channel
+                painted = (alpha > 50) | (red > 100)
+                mask_array[painted] = 255
+        return Image.fromarray(mask_array, mode='L')
+    def _has_painted_area(self, mask: Image.Image) -> bool:
+        """Check if mask has any painted area"""
+        import numpy as np
+        mask_array = np.array(mask)
+        return np.sum(mask_array > 127) > 100  # At least 100 white pixels
+    def _touchup_inpaint_core(
+        self,
+        image: Image.Image,
+        mask: Image.Image,
+        prompt: str,
+        strength: float
+    ) -> dict:
+        """Core inpainting function"""
+        # Use the background prompt to fill in the masked areas
+        inpaint_prompt = f"{prompt}, seamless, natural continuation, no artifacts" if prompt else "natural background, seamless continuation"
+        return self.background_engine.inpaint_region(
+            image=image,
+            mask=mask,
+            prompt=inpaint_prompt,
+            negative_prompt="blurry, artifacts, seams, inconsistent, unnatural",
+            num_inference_steps=20,
+            guidance_scale=7.5,
+            strength=float(strength)
+        )
     def _generate_background_handler(
         self,
         image: Image.Image,
         negative_prompt: str,
         steps: int,
         guidance: float,
+        feather_radius: int,
+        enhance_dark_edges: bool = False
     ) -> Tuple[Optional[Image.Image], Optional[Image.Image], Optional[Image.Image], Optional[Image.Image], str]:
         """Handler for background generation"""
         if image is None:
             result = generate_fn(
                 image, prompt, combination_mode, focus_mode,
+                negative_prompt, steps, guidance, feather_radius, enhance_dark_edges
             )
             if result["success"]:
         negative_prompt: str,
         steps: int,
         guidance: float,
+        feather_radius: int,
+        enhance_dark_edges: bool = False
     ) -> Dict[str, Any]:
         """Core background generation with models"""
         if not self.background_engine.is_initialized:
             num_inference_steps=int(steps),
             guidance_scale=float(guidance),
             enable_prompt_enhancement=True,
+            feather_radius=int(feather_radius),
+            enhance_dark_edges=enhance_dark_edges
         )
+        return result
+    def _create_3d_tab(self):
+        """Create Style Transfer tab - converts images to various artistic styles"""
+        with gr.Row():
+            # Left Panel: Input & Settings
+            with gr.Column(scale=1, elem_classes="feature-card"):
+                gr.Markdown("### 🎨 AI Style Transfer")
+                # How It Works Guide
+                gr.HTML("""
+                    <div class="quality-banner">
+                        <strong>📖 Transform Your Photos</strong><br><br>
+                        Convert your images into <strong>stunning artistic styles</strong>!<br><br>
+                        <strong>🎨 Single Styles:</strong> Pure artistic transformations<br>
+                        <strong>🎭 Style Blends:</strong> Unique combinations for distinctive looks<br><br>
+                        <strong>💡 Tips:</strong><br>
+                        • Use <strong>Seed</strong> to recreate the exact same result<br>
+                        • Try different blends for unique artistic effects
+                    </div>
+                """)
+                # Step 1: Upload
+                gr.Markdown("#### Step 1: Upload Image")
+                style3d_image_input = gr.Image(
+                    label="Upload Your Image",
+                    type="pil",
+                    height=280
+                )
+                # Step 2: Choose Style
+                gr.Markdown("#### Step 2: Choose Style")
+                # Hidden state to track which mode is active (updated by tab selection)
+                is_blend_mode = gr.State(value=False)
+                with gr.Tabs() as style_tabs:
+                    with gr.TabItem("🎨 Single Styles", id="single_tab") as single_tab:
+                        style_dropdown = gr.Dropdown(
+                            choices=self.style_engine.get_style_choices(),
+                            value="🎬 3D Cartoon",
+                            label="Art Style",
+                            info="Select a single artistic style"
+                        )
+                        style_strength = gr.Slider(
+                            label="Style Strength",
+                            minimum=0.3,
+                            maximum=0.7,
+                            value=0.50,
+                            step=0.05,
+                            info="Lower = keep more original | Higher = stronger style (0.45-0.55 recommended)"
+                        )
+                    with gr.TabItem("🎭 Style Blends", id="blend_tab") as blend_tab:
+                        blend_dropdown = gr.Dropdown(
+                            choices=self.style_engine.get_blend_choices(),
+                            value=self.style_engine.get_blend_choices()[0] if self.style_engine.get_blend_choices() else None,
+                            label="Blend Preset",
+                            info="Pre-configured style combinations"
+                        )
+                        gr.HTML("""
+                            <div style="padding: 8px; background: #f0f4ff; border-radius: 6px; font-size: 12px; margin-top: 8px;">
+                                <strong>Available Blends:</strong><br>
+                                • 🎭 3D Anime Fusion - 3D + Anime linework<br>
+                                • 🌈 Dreamy Watercolor - Fantasy + Watercolor<br>
+                                • 📖 Anime Storybook - Anime + Fantasy<br>
+                                • 👑 Renaissance Portrait - Classical oil painting<br>
+                                • 🕹️ Retro Game Art - Enhanced pixel art
+                            </div>
+                        """)
+                # Face Restore option for identity preservation
+                face_restore = gr.Checkbox(
+                    label="🛡️ Face Restore (Preserve Identity)",
+                    value=False,
+                    info="Enable to better preserve facial features and prevent identity changes"
+                )
+                gr.HTML("""
+                    <div style="padding: 6px 8px; background: #fff3cd; border-radius: 4px; font-size: 11px; margin-top: 4px;">
+                        <strong>💡 When to use:</strong> Enable if the style changes the person's face, age, or ethnicity too much.
+                        Auto-reduces strength to preserve original features.
+                    </div>
+                """)
+                with gr.Accordion("⚙️ Advanced Settings", open=False):
+                    guidance_scale = gr.Slider(
+                        label="Guidance Scale",
+                        minimum=5.0,
+                        maximum=12.0,
+                        value=7.5,
+                        step=0.5,
+                        info="How closely to follow the style"
+                    )
+                    num_steps = gr.Slider(
+                        label="Quality Steps",
+                        minimum=20,
+                        maximum=50,
+                        value=30,
+                        step=5,
+                        info="More steps = better quality but slower"
+                    )
+                    custom_prompt = gr.Textbox(
+                        label="Additional Description (optional)",
+                        placeholder="e.g., smiling, dramatic lighting, vibrant colors...",
+                        lines=2
+                    )
+                    gr.Markdown("##### 🎲 Seed Control")
+                    randomize_seed = gr.Checkbox(
+                        label="Randomize Seed",
+                        value=True,
+                        info="Uncheck to use manual seed for reproducible results"
+                    )
+                    seed_input = gr.Number(
+                        label="Manual Seed",
+                        value=42,
+                        precision=0,
+                        info="Use same seed to reproduce exact results"
+                    )
+                # Step 3: Generate
+                gr.Markdown("#### Step 3: Generate")
+                gr.HTML("""
+                    <div class="patience-banner">
+                        <strong>⏱️ Generation Time:</strong> ~20-30 seconds.
+                        First-time model loading may take 30-60 seconds.
+                    </div>
+                """)
+                generate_style_btn = gr.Button(
+                    "🎨 Transform Image",
+                    variant="primary",
+                    elem_classes="primary-button",
+                    size="lg"
+                )
+            # Right Panel: Output
+            with gr.Column(scale=1, elem_classes="feature-card"):
+                gr.Markdown("### 📤 Results")
+                with gr.Tabs():
+                    with gr.TabItem("Stylized Result"):
+                        style3d_output = gr.Image(
+                            label="Stylized Result",
+                            elem_classes=["result-gallery"]
+                        )
+                    with gr.TabItem("Original"):
+                        style3d_original = gr.Image(
+                            label="Original Image",
+                            elem_classes=["result-gallery"]
+                        )
+                    with gr.TabItem("Comparison"):
+                        with gr.Row():
+                            style3d_compare_original = gr.Image(
+                                label="Before",
+                                elem_classes=["result-gallery"]
+                            )
+                            style3d_compare_result = gr.Image(
+                                label="After",
+                                elem_classes=["result-gallery"]
+                            )
+                with gr.Row():
+                    style3d_status_output = gr.Textbox(
+                        label="Status",
+                        value="Ready! Upload an image and select a style to transform.",
+                        interactive=False,
+                        elem_classes=["status-panel"],
+                        scale=3
+                    )
+                    seed_output = gr.Number(
+                        label="Seed Used",
+                        value=0,
+                        interactive=False,
+                        precision=0,
+                        scale=1
+                    )
+                with gr.Row():
+                    clear_style_btn = gr.Button(
+                        "Clear All",
+                        elem_classes=["secondary-button"]
+                    )
+                    memory_style_btn = gr.Button(
+                        "Clean Memory",
+                        elem_classes=["secondary-button"]
+                    )
+        # Event handlers - detect mode from TAB selection (not just dropdown)
+        single_tab.select(
+            fn=lambda: False,  # Single Styles tab clicked -> is_blend = False
+            inputs=[],
+            outputs=[is_blend_mode]
+        )
+        blend_tab.select(
+            fn=lambda: True,  # Style Blends tab clicked -> is_blend = True
+            inputs=[],
+            outputs=[is_blend_mode]
+        )
+        generate_style_btn.click(
+            fn=self._generate_3d_style_handler,
+            inputs=[
+                style3d_image_input, style_dropdown, blend_dropdown, is_blend_mode,
+                style_strength, guidance_scale, num_steps, custom_prompt,
+                randomize_seed, seed_input, face_restore
+            ],
+            outputs=[
+                style3d_output, style3d_original,
+                style3d_compare_original, style3d_compare_result,
+                style3d_status_output, seed_output
+            ]
+        )
+        clear_style_btn.click(
+            fn=lambda: (None, None, None, None, "Ready! Upload an image and select a style to transform.", 0),
+            outputs=[
+                style3d_output, style3d_original,
+                style3d_compare_original, style3d_compare_result,
+                style3d_status_output, seed_output
+            ]
+        )
+        memory_style_btn.click(
+            fn=self._cleanup_3d_memory,
+            outputs=[style3d_status_output]
+        )
+    def _generate_3d_style_handler(
+        self,
+        image: Image.Image,
+        style_choice: str,
+        blend_choice: str,
+        is_blend_mode: bool,
+        strength: float,
+        guidance_scale: float,
+        num_steps: int,
+        custom_prompt: str,
+        randomize_seed: bool,
+        manual_seed: int,
+        face_restore: bool = False
+    ) -> Tuple[Optional[Image.Image], Optional[Image.Image], Optional[Image.Image], Optional[Image.Image], str, int]:
+        """Handler for style transfer generation"""
+        if image is None:
+            return None, None, None, None, "Please upload an image first!", 0
+        try:
+            # Determine style key based on mode (detected from last dropdown interaction)
+            if is_blend_mode:
+                style_key = self.style_engine.get_blend_key_from_choice(blend_choice)
+                is_blend = True
+            else:
+                style_key = self.style_engine.get_style_key_from_choice(style_choice)
+                is_blend = False
+            # Handle seed
+            seed = -1 if randomize_seed else int(manual_seed)
+            if SPACES_AVAILABLE:
+                generate_fn = spaces.GPU(duration=120)(self._3d_style_generate_core)
+            else:
+                generate_fn = self._3d_style_generate_core
+            result = generate_fn(
+                image, style_key, is_blend, strength,
+                guidance_scale, num_steps, custom_prompt, seed, face_restore
+            )
+            if result["success"]:
+                stylized = result["stylized_image"]
+                style_name = result.get("style_name", "Style")
+                seed_used = result.get("seed_used", 0)
+                return (
+                    stylized,
+                    image,
+                    image,
+                    stylized,
+                    f"✓ {style_name} completed! (seed: {seed_used})",
+                    seed_used
+                )
+            else:
+                error_msg = result.get("error", "Unknown error")
+                return None, None, None, None, f"Error: {error_msg}", 0
+        except Exception as e:
+            logger.error(f"Style generation failed: {e}")
+            return None, None, None, None, f"Error: {str(e)}", 0
+    def _3d_style_generate_core(
+        self,
+        image: Image.Image,
+        style_key: str,
+        is_blend: bool,
+        strength: float,
+        guidance_scale: float,
+        num_steps: int,
+        custom_prompt: str,
+        seed: int,
+        face_restore: bool = False
+    ) -> dict:
+        """Core style transfer generation"""
+        return self.style_engine.generate_all_outputs(
+            image=image,
+            style_key=style_key,
+            strength=float(strength),
+            guidance_scale=float(guidance_scale),
+            num_inference_steps=int(num_steps),
+            custom_prompt=custom_prompt if custom_prompt else "",
+            seed=seed,
+            is_blend=is_blend,
+            face_restore=face_restore
+        )
+    def _cleanup_3d_memory(self) -> str:
+        """Clean up 3D engine memory"""
+        self.style_engine.unload_model()
+        return "Memory cleaned!"