sdas

File size: 19,649 Bytes

8c0bf45

NAME = 'XL Vec'

import logging
import traceback
from threading import Lock
from torch import Tensor, FloatTensor, nn
import gradio as gr
from modules.processing import StableDiffusionProcessing
from modules import scripts

from scripts.sdhook import SDHook
from scripts.xl_clip import CLIP_SDXL, get_pooled
from scripts.xl_vec_xyz import init_xyz

# --- LOGGING ---
logger = logging.getLogger(__name__)

# --- CONSTANTS ---
SDXL_POOLED_DIM = 1280       # Размер pooled embedding вектора SDXL
AESTHETIC_SCORE_EPS = 0.01   # Допуск для сравнения float значений (aesthetic score)
DEFAULT_AESTHETIC_SCORE = 6.0

# --- PRESETS ---
PRESETS = {
    "Manual / Custom": None,
    "1:1 Square (1024x1024)": (1024, 1024),
    "4:3 Photo (1152x896)": (1152, 896),
    "3:4 Portrait (896x1152)": (896, 1152),
    "16:9 Cinema (1344x768)": (1344, 768),
    "9:16 Mobile (768x1344)": (768, 1344),
    "21:9 Wide (1536x640)": (1536, 640),
    "2:3 Classic (832x1216)": (832, 1216),
}


def hook_input(args: 'Hook', mod: nn.Module, inputs: tuple[dict[str, Tensor]]) -> tuple[dict[str, Tensor]]:
    """
    Перехватывает входные данные CLIP модели для подмены параметров conditioning
    (размеры, кроп, эстетическая оценка).
    
    Args:
        args: Экземпляр Hook с параметрами
        mod: CLIP модуль
        inputs: Tuple с входными данными
        
    Returns:
        Модифицированные входные данные
    """
    if not args.enabled:
        return inputs
    
    assert isinstance(mod, CLIP_SDXL), f"Expected CLIP_SDXL, got {type(mod)}"
    input_data = inputs[0]
    
    def create(v: list[float], src: FloatTensor) -> FloatTensor:
        """Создает тензор с правильным device и dtype."""
        return FloatTensor(v).to(dtype=src.dtype, device=src.device)
    
    def put(name: str, v: list[float]) -> None:
        """Безопасно заменяет значение в input_data."""
        if name in input_data:
            src = input_data[name]
            input_data[name] = create(v, src).reshape(src.shape)
    
    # Применяем геометрические параметры
    put('original_size_as_tuple', [args.original_height, args.original_width])
    put('crop_coords_top_left', [args.crop_top, args.crop_left])
    put('target_size_as_tuple', [args.target_height, args.target_width])
    
    # Логика определения Positive/Negative промпта через Aesthetic Score
    try:
        current_score = input_data['aesthetic_score'].item()
        if args.is_positive_prompt(current_score):
            put('aesthetic_score', [args.aesthetic_score])
        else:
            put('aesthetic_score', [args.negative_aesthetic_score])
    except (KeyError, AttributeError) as e:
        logger.warning(f"[XL Vec] Cannot access aesthetic_score: {e}")
    
    return inputs


def hook_output(args: 'Hook', mod: nn.Module, inputs: tuple[dict[str, Tensor]], output: dict) -> dict:
    """
    Перехватывает выход CLIP модели для замены векторов токенов.
    
    Args:
        args: Экземпляр Hook с параметрами
        mod: CLIP модуль
        inputs: Входные данные
        output: Выходные данные с ключом 'vector'
        
    Returns:
        Модифицированные выходные данные
    """
    if not args.enabled:
        return output
    
    try:
        # Определяем, работаем ли мы с Positive или Negative промптом
        current_score = inputs[0]['aesthetic_score'].item()
        prompt, index, multiplier = args.get_prompt_params(current_score)
        
        # Если параметры не заданы пользователем, ничего не делаем
        if (prompt is None or len(prompt) == 0) and (index == -1 and multiplier == 1.0):
            return output

        # Если текст замены не задан, используем оригинальный промпт
        if prompt is None or len(prompt) == 0:
            prompt = inputs[0]['txt'][0]
        
        assert isinstance(mod, CLIP_SDXL), f"Expected CLIP_SDXL, got {type(mod)}"
        
        # Получаем новый pooled embedding с защитой от рекурсии
        with args._lock:
            args.enabled = False
            try:
                pooled, token_idx = get_pooled(mod, prompt, index=index)
            finally:
                args.enabled = True
        
        # Подмена вектора с проверкой размерности
        if output['vector'].shape[1] >= SDXL_POOLED_DIM:
            output['vector'][:, 0:SDXL_POOLED_DIM] = pooled[:] * multiplier
            logger.info(
                f"[XL Vec] Vector override: '{inputs[0]['txt']}' -> '{prompt}' "
                f"@ token {token_idx} [x{multiplier:.2f}]"
            )
        else:
            logger.error(
                f"[XL Vec] Vector dimension mismatch: expected >={SDXL_POOLED_DIM}, "
                f"got {output['vector'].shape[1]}"
            )

    except Exception as e:
        logger.error(f"[XL Vec] Error in hook_output: {e}")
        traceback.print_exc()

    return output


class Hook(SDHook):
    """Хук для модификации CLIP conditioning в SDXL."""
    
    def __init__(
        self,
        enabled: bool,
        p: StableDiffusionProcessing,
        crop_left: float, crop_top: float,
        original_width: float, original_height: float,
        target_width: float, target_height: float,
        aesthetic_score: float, negative_aesthetic_score: float,
        extra_prompt: str | None, extra_negative_prompt: str | None,
        token_index: int | float, negative_token_index: int | float,
        eot_multiplier: float, negative_eot_multiplier: float,
        with_hr: bool,
        base_aesthetic_score: float,
    ):
        super().__init__(enabled)
        
        # Валидация параметров
        self._validate_params(
            aesthetic_score, negative_aesthetic_score, base_aesthetic_score,
            original_width, original_height, target_width, target_height
        )
        
        self.p = p
        self.crop_left = float(crop_left)
        self.crop_top = float(crop_top)
        self.original_width = float(original_width)
        self.original_height = float(original_height)
        self.target_width = float(target_width)
        self.target_height = float(target_height)
        self.aesthetic_score = float(aesthetic_score)
        self.negative_aesthetic_score = float(negative_aesthetic_score)
        self.extra_prompt = extra_prompt
        self.extra_negative_prompt = extra_negative_prompt
        self.token_index = int(token_index)
        self.negative_token_index = int(negative_token_index)
        self.eot_multiplier = float(eot_multiplier)
        self.negative_eot_multiplier = float(negative_eot_multiplier)
        self.with_hr = bool(with_hr)
        self.base_aesthetic_score = float(base_aesthetic_score)
        
        # Thread safety для предотвращения race conditions
        self._lock = Lock()
    
    @staticmethod
    def _validate_params(
        aesthetic_score: float,
        negative_aesthetic_score: float,
        base_aesthetic_score: float,
        original_width: float,
        original_height: float,
        target_width: float,
        target_height: float
    ) -> None:
        """Валидирует входные параметры."""
        for score, name in [
            (aesthetic_score, "aesthetic_score"),
            (negative_aesthetic_score, "negative_aesthetic_score"),
            (base_aesthetic_score, "base_aesthetic_score")
        ]:
            if not (0 <= score <= 10):
                raise ValueError(f"{name} должен быть в диапазоне [0, 10], получено {score}")
        
        for size, name in [
            (original_width, "original_width"),
            (original_height, "original_height"),
            (target_width, "target_width"),
            (target_height, "target_height")
        ]:
            if size < 0:
                raise ValueError(f"{name} не может быть отрицательным, получено {size}")
    
    def is_positive_prompt(self, aesthetic_score: float) -> bool:
        """
        Определяет, является ли текущий промпт положительным.
        
        Args:
            aesthetic_score: Текущее значение aesthetic score
            
        Returns:
            True если это positive prompt, False если negative
        """
        return abs(aesthetic_score - self.base_aesthetic_score) < AESTHETIC_SCORE_EPS
    
    def get_prompt_params(self, aesthetic_score: float) -> tuple[str | None, int, float]:
        """
        Возвращает параметры промпта в зависимости от aesthetic_score.
        
        Args:
            aesthetic_score: Текущее значение aesthetic score
            
        Returns:
            Tuple (prompt, token_index, multiplier)
        """
        if self.is_positive_prompt(aesthetic_score):
            return self.extra_prompt, self.token_index, self.eot_multiplier
        else:
            return self.extra_negative_prompt, self.negative_token_index, self.negative_eot_multiplier
    
    def cleanup(self) -> None:
        """Корректно удаляет все хуки."""
        try:
            self.__exit__(None, None, None)
        except Exception as e:
            logger.warning(f"[XL Vec] Error during cleanup: {e}")
    
    def hook_clip(self, p: StableDiffusionProcessing, clip: nn.Module) -> None:
        """Устанавливает хуки на CLIP модель."""
        if not hasattr(p.sd_model, 'is_sdxl') or not p.sd_model.is_sdxl:
            logger.debug("[XL Vec] Model is not SDXL, skipping hooks")
            return
        
        def inp(*args, **kwargs):
            return hook_input(self, *args, **kwargs)
        
        def outp(*args, **kwargs):
            return hook_output(self, *args, **kwargs)
        
        self.hook_layer_pre(clip, inp)
        self.hook_layer(clip, outp)


class Script(scripts.Script):
    """Скрипт для управления SDXL conditioning параметрами."""
    
    def title(self) -> str:
        return NAME
    
    def show(self, is_img2img) -> scripts.AlwaysVisible:
        return scripts.AlwaysVisible
    
    def ui(self, is_img2img):
        with gr.Accordion(NAME, open=False):
            with gr.Row():
                enabled = gr.Checkbox(label='Enable XL Vec', value=False)
                with_hr = gr.Checkbox(label='Active on Hires Fix', value=False, visible=False)

            # --- GEOMETRY SECTION ---
            with gr.Group():
                gr.Markdown("### 📐 SDXL Geometry & Size")
                
                preset_dropdown = gr.Dropdown(
                    label="⚡ Quick Resolution Preset",
                    choices=list(PRESETS.keys()),
                    value="Manual / Custom",
                    type="value"
                )

                with gr.Row():
                    original_width = gr.Slider(
                        minimum=-1, maximum=4096, step=1, value=-1,
                        label='Original Width (-1=auto)'
                    )
                    original_height = gr.Slider(
                        minimum=-1, maximum=4096, step=1, value=-1,
                        label='Original Height (-1=auto)'
                    )
                
                with gr.Row():
                    target_width = gr.Slider(
                        minimum=-1, maximum=4096, step=1, value=-1,
                        label='Target Width (-1=auto)'
                    )
                    target_height = gr.Slider(
                        minimum=-1, maximum=4096, step=1, value=-1,
                        label='Target Height (-1=auto)'
                    )

                # Callback: Dropdown -> Sliders
                def apply_preset(choice):
                    if choice in PRESETS and PRESETS[choice] is not None:
                        w, h = PRESETS[choice]
                        return w, h, w, h
                    return gr.update(), gr.update(), gr.update(), gr.update()

                preset_dropdown.change(
                    fn=apply_preset,
                    inputs=[preset_dropdown],
                    outputs=[original_width, original_height, target_width, target_height]
                )

                # Callback: Sliders -> Dropdown (Reset to Manual)
                def reset_dropdown():
                    return "Manual / Custom"

                for slider in [original_width, original_height, target_width, target_height]:
                    slider.change(fn=reset_dropdown, inputs=None, outputs=[preset_dropdown])

                with gr.Accordion("✂️ Crop Settings", open=False):
                    with gr.Row():
                        crop_left = gr.Slider(
                            minimum=-10000, maximum=10000, step=1, value=0,
                            label='Crop Left'
                        )
                        crop_top = gr.Slider(
                            minimum=-10000, maximum=10000, step=1, value=0,
                            label='Crop Top'
                        )

            # --- AESTHETICS SECTION ---
            with gr.Group():
                gr.Markdown("### 🎨 Aesthetics")
                with gr.Row():
                    aesthetic_score = gr.Slider(
                        minimum=0.0, maximum=10.0, step=0.1, value=6.0,
                        label="Positive Aesthetic Score"
                    )
                    negative_aesthetic_score = gr.Slider(
                        minimum=0.0, maximum=10.0, step=0.1, value=2.5,
                        label="Negative Aesthetic Score"
                    )
                
                with gr.Accordion("⚙️ Detection Threshold (Advanced)", open=False):
                    base_aesthetic_score = gr.Slider(
                        minimum=0.0, maximum=10.0, step=0.1, value=6.0,
                        label="Base Score Threshold"
                    )
                    gr.Info("Change this ONLY if you modified 'SDXL Aesthetic Score' in WebUI settings.")

            # --- VECTORS SECTION ---
            with gr.Accordion("🧠 Token & Vector Control", open=False):
                with gr.Row():
                    eot_multiplier = gr.Slider(
                        minimum=-4.0, maximum=8.0, step=0.05, value=1.0,
                        label='Pos. Vector Mult'
                    )
                    negative_eot_multiplier = gr.Slider(
                        minimum=-4.0, maximum=8.0, step=0.05, value=1.0,
                        label='Neg. Vector Mult'
                    )
                with gr.Row():
                    token_index = gr.Slider(
                        minimum=-77, maximum=76, step=1, value=-1,
                        label='Pos. Token Index'
                    )
                    negative_token_index = gr.Slider(
                        minimum=-77, maximum=76, step=1, value=-1,
                        label='Neg. Token Index'
                    )
                with gr.Row():
                    extra_prompt = gr.Textbox(
                        lines=1, label='Extra Prompt',
                        placeholder="Override positive prompt text..."
                    )
                    extra_negative_prompt = gr.Textbox(
                        lines=1, label='Extra Negative',
                        placeholder="Override negative prompt text..."
                    )

        return [
            enabled, crop_left, crop_top, original_width, original_height,
            target_width, target_height, aesthetic_score, negative_aesthetic_score,
            extra_prompt, extra_negative_prompt, token_index, negative_token_index,
            eot_multiplier, negative_eot_multiplier, with_hr,
            base_aesthetic_score
        ]
    
    def process(
        self, p, enabled, crop_left, crop_top, original_width, original_height,
        target_width, target_height, aesthetic_score, negative_aesthetic_score,
        extra_prompt, extra_negative_prompt, token_index, negative_token_index,
        eot_multiplier, negative_eot_multiplier, with_hr,
        base_aesthetic_score=DEFAULT_AESTHETIC_SCORE
    ):
        """Обрабатывает параметры и устанавливает хуки перед генерацией."""
        
        # Очистка предыдущего хука (если остался)
        if getattr(self, 'last_hooker', None) is not None:
            self.last_hooker.cleanup()
            self.last_hooker = None
        
        if not enabled:
            return

        # Автозаполнение размеров
        if original_width < 0:
            original_width = p.width
        if original_height < 0:
            original_height = p.height
        if target_width < 0:
            target_width = p.width
        if target_height < 0:
            target_height = p.height
        
        try:
            self.last_hooker = Hook(
                enabled=True, p=p,
                crop_left=crop_left, crop_top=crop_top,
                original_width=original_width, original_height=original_height,
                target_width=target_width, target_height=target_height,
                aesthetic_score=aesthetic_score,
                negative_aesthetic_score=negative_aesthetic_score,
                extra_prompt=extra_prompt, extra_negative_prompt=extra_negative_prompt,
                token_index=token_index, negative_token_index=negative_token_index,
                eot_multiplier=eot_multiplier,
                negative_eot_multiplier=negative_eot_multiplier,
                with_hr=with_hr, base_aesthetic_score=base_aesthetic_score
            )
        except ValueError as e:
            logger.error(f"[XL Vec] Invalid parameters: {e}")
            return

        self.last_hooker.setup(p)
        self.last_hooker.__enter__()
        
        # Обновление метаданных (Infotext)
        p.extra_generation_params.update({
            f'[{NAME}] Enabled': enabled,
            f'[{NAME}] Original Size': f"{int(original_width)}x{int(original_height)}",
            f'[{NAME}] Target Size': f"{int(target_width)}x{int(target_height)}",
            f'[{NAME}] Aesthetic Score': aesthetic_score,
        })
        
        if crop_left != 0 or crop_top != 0:
            p.extra_generation_params[f'[{NAME}] Crop'] = f"{crop_left},{crop_top}"
        
        if abs(base_aesthetic_score - DEFAULT_AESTHETIC_SCORE) > AESTHETIC_SCORE_EPS:
            p.extra_generation_params[f'[{NAME}] Base Score'] = base_aesthetic_score
        
        if eot_multiplier != 1.0:
            p.extra_generation_params[f'[{NAME}] Token Mult'] = eot_multiplier

        # Сброс кэша для применения новых условий
        if hasattr(p, 'cached_c'):
            p.cached_c = [None, None]
        if hasattr(p, 'cached_uc'):
            p.cached_uc = [None, None]


init_xyz(Script, NAME)