Spaces:

ruslanmv
/

HomePilot

Running

File size: 13,402 Bytes

23b413b

"""
Edit flags parser for advanced edit controls.

This module allows the frontend to pass edit parameters via hidden flags
appended to the edit instruction, without modifying the /chat API schema.

Example:
    User input: "Make the sky dramatic --steps 30 --cfg 6.5 --denoise 0.55"

    Parsed result:
        - clean_text: "Make the sky dramatic"
        - flags: EditFlags(steps=30, cfg=6.5, denoise=0.55, ...)
"""

import re
from dataclasses import dataclass, field
from typing import List, Optional, Tuple, Dict, Any

# Pattern to match flags like --key value
FLAG_RE = re.compile(r"(--[a-zA-Z0-9_-]+)\s+([^\s-][^\n\r]*?)(?=\s+--|$)")

# Pattern to match URLs (http/https)
URL_RE = re.compile(r"https?://\S+", re.I)

# Pattern to match edit command keywords at the start
EDIT_CMD_RE = re.compile(r"^\s*(edit|inpaint|modify)\s+", re.I)


@dataclass
class EditFlags:
    """
    Container for parsed edit flags.

    All values have sensible defaults for quality image editing.
    """
    # Edit mode: auto, global, inpaint
    mode: str = "auto"

    # Generation parameters
    steps: int = 30
    cfg: float = 5.5
    denoise: float = 0.55
    seed: int = 0
    sampler_name: str = "euler"
    scheduler: str = "normal"

    # Model controls
    ckpt_name: Optional[str] = None
    controlnet_name: Optional[str] = None
    controlnet_strength: float = 1.0
    cn_enabled: bool = False

    # Optional mask (URL or filename; backend will preprocess)
    mask_url: Optional[str] = None

    # LoRA adapters: list of {"id": str, "weight": float}
    loras: List[Dict[str, Any]] = field(default_factory=list)

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for workflow variables."""
        return {
            "mode": self.mode,
            "steps": self.steps,
            "cfg": self.cfg,
            "denoise": self.denoise,
            "seed": self.seed,
            "sampler_name": self.sampler_name,
            "scheduler": self.scheduler,
            "ckpt_name": self.ckpt_name,
            "controlnet_name": self.controlnet_name,
            "controlnet_strength": self.controlnet_strength,
            "cn_enabled": self.cn_enabled,
            "mask_url": self.mask_url,
            "loras": self.loras,
        }


def parse_edit_flags(text: str) -> Tuple[str, EditFlags]:
    """
    Extract flags from user text and return (clean_text, flags).

    Cleans the prompt by removing:
    - Edit command prefix (edit, inpaint, modify)
    - URLs (image URLs that are handled separately)
    - Flags like --steps 30, --cfg 6.5

    Args:
        text: User's edit instruction potentially containing flags and URLs

    Returns:
        Tuple of (cleaned text without flags/URLs, parsed EditFlags object)
    """
    flags = EditFlags()
    clean = text

    # First, parse all flags
    matches = list(FLAG_RE.finditer(text))

    for m in matches:
        key = (m.group(1) or "").strip().lower()
        value = (m.group(2) or "").strip()

        try:
            if key == "--mode":
                flags.mode = value.lower()
            elif key == "--steps":
                flags.steps = int(float(value))
            elif key == "--cfg":
                flags.cfg = float(value)
            elif key == "--denoise":
                flags.denoise = float(value)
            elif key == "--seed":
                flags.seed = int(float(value))
            elif key == "--sampler":
                flags.sampler_name = value
            elif key == "--scheduler":
                flags.scheduler = value
            elif key == "--ckpt":
                flags.ckpt_name = value
            elif key == "--cn":
                flags.cn_enabled = value.lower() in ("1", "true", "yes", "on")
            elif key == "--cn-strength":
                flags.controlnet_strength = float(value)
            elif key == "--controlnet":
                flags.controlnet_name = value
            elif key == "--mask":
                flags.mask_url = value
            elif key == "--lora":
                # Format: "lora_id:weight" or just "lora_id" (default weight 0.8)
                for lora_token in value.split():
                    if ":" in lora_token:
                        lora_id, lora_w = lora_token.rsplit(":", 1)
                        try:
                            w = float(lora_w)
                        except ValueError:
                            w = 0.8
                    else:
                        lora_id = lora_token
                        w = 0.8
                    lora_id = lora_id.strip()
                    if lora_id:
                        flags.loras.append({"id": lora_id, "weight": w, "enabled": True})
        except (ValueError, TypeError):
            # Skip invalid flag values
            pass

    # Remove all flags from the text
    for m in reversed(matches):
        clean = clean[:m.start()] + clean[m.end():]

    # Remove URLs from the prompt (image URL is passed separately)
    clean = URL_RE.sub("", clean)

    # Remove edit command prefix (e.g., "edit ", "inpaint ")
    clean = EDIT_CMD_RE.sub("", clean)

    # Clean up extra whitespace
    clean = " ".join(clean.split()).strip()

    return clean, flags


def infer_edit_mode(user_text: str) -> str:
    """
    Heuristic routing: inpaint for localized edits, global for style changes.

    Args:
        user_text: Cleaned user edit instruction

    Returns:
        Inferred edit mode: "inpaint" or "global"
    """
    t = user_text.lower()

    # Localized edit keywords suggest inpainting
    inpaint_keywords = (
        "remove", "erase", "delete", "replace", "change", "add", "swap",
        "object", "logo", "text", "person", "background", "hat", "shirt",
        "hair", "face", "eye", "nose", "mouth", "hand", "arm", "leg",
        "building", "car", "tree", "sky", "water", "cloud"
    )

    # Global style keywords suggest full image regeneration
    global_keywords = (
        "cinematic", "anime", "oil painting", "watercolor", "cartoon",
        "night", "sunset", "dramatic lighting", "color grade", "style",
        "filter", "tone", "mood", "atmosphere", "aesthetic", "artistic",
        "vintage", "retro", "modern", "futuristic", "cyberpunk", "noir"
    )

    # Check for localized edit keywords
    for kw in inpaint_keywords:
        if kw in t:
            return "inpaint"

    # Check for global style keywords
    for kw in global_keywords:
        if kw in t:
            return "global"

    # Default to auto (let the workflow decide)
    return "auto"


def build_edit_workflow_vars(
    image_url: str,
    prompt: str,
    flags: EditFlags,
    negative_prompt: str = "",
    img_model: str = "",
) -> Dict[str, Any]:
    """
    Build workflow variables dictionary from parsed flags.

    Args:
        image_url: URL of the source image
        prompt: Cleaned edit instruction
        flags: Parsed edit flags
        negative_prompt: Negative prompt (optional)
        img_model: User-selected image model from frontend (e.g. "dreamshaper_8.safetensors")

    Returns:
        Dictionary of variables for ComfyUI workflow
    """
    import random

    # Default checkpoints based on workflow type
    # If the user selected a specific model in the frontend, use that as
    # the global default — this ensures SD1.5 LoRAs are not silently
    # skipped when the user has a SD1.5 checkpoint selected.
    ckpt_default_global = img_model or "sd_xl_base_1.0.safetensors"
    ckpt_default_inpaint = "sd_xl_base_1.0_inpainting_0.1.safetensors"
    ckpt_default_sd15_inpaint = "sd-v1-5-inpainting.ckpt"
    cn_default = "control_v11p_sd15_inpaint.safetensors"

    # Generate random seed if not explicitly set (prevents ComfyUI caching)
    seed = flags.seed
    if seed == 0 or seed == -1:
        seed = random.randint(1, 2147483647)

    vars_dict: Dict[str, Any] = {
        "image_path": image_url,
        "prompt": prompt,
        "negative_prompt": negative_prompt,
        "steps": flags.steps,
        "cfg": flags.cfg,
        "seed": seed,
        "denoise": flags.denoise,
        "sampler_name": flags.sampler_name,
        "scheduler": flags.scheduler,
        "filename_prefix": "homepilot_edit",
    }

    # Set checkpoint and mask based on whether mask is provided
    # IMPORTANT: Only set mask_path when there's actually a mask
    if flags.mask_url:
        # Inpaint mode - mask is provided
        vars_dict["mask_path"] = flags.mask_url
        vars_dict["ckpt_name"] = flags.ckpt_name or ckpt_default_inpaint

        # Optional ControlNet (recommended with SD1.5 inpaint)
        if flags.cn_enabled:
            vars_dict["ckpt_name"] = flags.ckpt_name or ckpt_default_sd15_inpaint
            vars_dict["controlnet_name"] = flags.controlnet_name or cn_default
            vars_dict["controlnet_strength"] = flags.controlnet_strength
    else:
        # Global edit (img2img) - no mask needed
        vars_dict["ckpt_name"] = flags.ckpt_name or ckpt_default_global

    # Pass LoRA list (will be consumed by comfy.py to inject LoraLoader nodes)
    # Auto-detect: if no --lora flags given, scan installed LoRAs for trigger word matches
    # Only selects LoRAs compatible with the current checkpoint architecture.
    if not flags.loras:
        try:
            from .models.lora_loader import scan_installed_loras, get_lora_dir, is_lora_compatible
            from .models.lora_registry import get_lora_by_id
            from .model_config import get_architecture

            lora_dir = get_lora_dir()
            if lora_dir.exists():
                prompt_lower = vars_dict["prompt"].lower()
                ckpt_name = vars_dict.get("ckpt_name", "")
                ckpt_arch = get_architecture(ckpt_name) if ckpt_name else ""
                installed = scan_installed_loras()
                # Track which trigger words already matched (pick best arch variant)
                matched_triggers: set = set()
                for lora_info in installed:
                    if not lora_info.get("healthy", True):
                        continue
                    lid = lora_info.get("id", "")
                    entry = get_lora_by_id(lid)
                    if not entry or not entry.trigger_words:
                        continue
                    # Check if any trigger word appears in the prompt
                    matched = any(tw.lower() in prompt_lower for tw in entry.trigger_words if len(tw) >= 3)
                    if not matched:
                        continue
                    # Skip if incompatible with current checkpoint
                    if ckpt_arch and entry.base:
                        if not is_lora_compatible(entry.base, ckpt_arch):
                            print(f"[LORA] Skipping auto-detect '{lid}' (base={entry.base}) — incompatible with checkpoint '{ckpt_arch}'")
                            continue
                    # Avoid duplicates: if another LoRA with the same trigger words
                    # was already added, skip (e.g. undressing_sd15 vs undressing_sdxl)
                    tw_key = frozenset(tw.lower() for tw in entry.trigger_words)
                    if tw_key in matched_triggers:
                        continue
                    matched_triggers.add(tw_key)
                    flags.loras.append({"id": lid, "weight": 0.8, "enabled": True})
                    print(f"[LORA] Auto-detected '{lid}' (base={entry.base}) from trigger words in prompt")

                # Cap auto-detected LoRAs at 4
                flags.loras = flags.loras[:4]
        except (ImportError, Exception) as e:
            print(f"[LORA] Auto-detect skipped: {e}")

    if flags.loras:
        vars_dict["_loras"] = flags.loras

        # Inject trigger words into prompt (industry best practice for LoRA activation)
        try:
            from .models.lora_registry import get_lora_by_id
            trigger_parts: list[str] = []
            for lr in flags.loras:
                entry = get_lora_by_id(lr["id"])
                if entry and entry.trigger_words:
                    for tw in entry.trigger_words:
                        if tw.lower() not in vars_dict["prompt"].lower():
                            trigger_parts.append(tw)
            if trigger_parts:
                vars_dict["prompt"] = ", ".join(trigger_parts) + ", " + vars_dict["prompt"]
        except ImportError:
            pass

    return vars_dict


def determine_workflow(flags: EditFlags, prompt: str) -> str:
    """
    Determine which workflow to use based on flags and prompt.

    IMPORTANT: Inpaint workflows REQUIRE a mask. If no mask is provided,
    we MUST use the standard edit (img2img) workflow, even if keywords
    suggest inpainting. The keyword-based mode detection is informational
    only - actual workflow selection depends on mask availability.

    Args:
        flags: Parsed edit flags
        prompt: Cleaned edit instruction

    Returns:
        Workflow name to use
    """
    # Critical: Only use inpaint workflows when a mask is actually provided
    # The InpaintModelConditioning node requires a noise_mask input
    if flags.mask_url:
        if flags.cn_enabled:
            return "edit_inpaint_cn"
        return "edit_inpaint"

    # Without a mask, use standard img2img edit workflow
    # This applies regardless of detected keywords (remove, replace, etc.)
    return "edit"