File size: 9,526 Bytes
b701455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
"""Image-to-image processor for LightDiffusion-Next.

This processor handles image-to-image generation and upscaling
using the Ultimate SD Upscale approach.
"""

import logging
from typing import TYPE_CHECKING, Any, Optional, Callable

import numpy as np
import torch
from PIL import Image

if TYPE_CHECKING:
    from src.Core.PipelineContext import PipelineContext
    from src.Core.AbstractModel import AbstractModel


class Img2Img:
    """Image-to-image generation and upscaling processor.
    
    Uses Ultimate SD Upscale for high-quality image transformation
    and super-resolution.
    """
    
    # Default settings
    DEFAULT_UPSCALE_BY = 2
    DEFAULT_STEPS = 8
    DEFAULT_CFG = 6
    DEFAULT_DENOISE = 0.3
    DEFAULT_SCHEDULER = "karras"
    DEFAULT_TILE_WIDTH = 512
    DEFAULT_TILE_HEIGHT = 512
    DEFAULT_MASK_BLUR = 16
    DEFAULT_TILE_PADDING = 32
    DEFAULT_UPSCALER = "RealESRGAN_x4plus.pth"
    
    @classmethod
    def apply(
        cls,
        ctx: "PipelineContext",
        model: "AbstractModel",
        positive: Any,
        negative: Any,
        image_path: str = None,
        image_tensor: torch.Tensor = None,
        upscale_by: float = None,
        denoise: float = None,
        callback: Optional[Callable] = None,
    ) -> torch.Tensor:
        """Apply image-to-image transformation.
        
        Args:
            ctx: Pipeline context with configuration
            model: The loaded model instance
            positive: Positive conditioning
            negative: Negative conditioning
            image_path: Path to input image (used if image_tensor not provided)
            image_tensor: Input image tensor [B, H, W, C] or [H, W, C]
            upscale_by: Upscale factor (default: 2)
            denoise: Denoising strength (default: 0.3)
            callback: Optional callback for live previews
            
        Returns:
            Processed image tensor
        """
        logger = logging.getLogger(__name__)
        
        # Determine input source
        if image_tensor is None:
            source_path = image_path or ctx.features.img2img_image
            if source_path is None:
                raise ValueError("No input image provided for img2img")
            
            # Load image from path
            image_tensor = cls._load_image(source_path)
        
        # Determine upscale factor from context dimensions if not provided
        if upscale_by is None:
            input_w = image_tensor.shape[2] # [B, H, W, C]
            target_w = ctx.generation.width
            if target_w and target_w != input_w:
                upscale_by = target_w / input_w
                logger.info(f"Img2Img: calculated upscale_by={upscale_by:.2f} from target width {target_w}")
            else:
                upscale_by = cls.DEFAULT_UPSCALE_BY
        
        denoise = denoise or cls.DEFAULT_DENOISE
        
        # Determine model flags
        is_flux = getattr(model.capabilities, "is_flux", False)
        is_flux2 = getattr(model.capabilities, "is_flux2", False)
        
        # Adjust CFG for Flux models
        img2img_cfg = cls.DEFAULT_CFG
        if is_flux or is_flux2:
            img2img_cfg = 1.0
        
        try:
            # Import required modules
            from src.UltimateSDUpscale import UltimateSDUpscale, USDU_upscaler
            
            # Load upscaler model
            upscale_loader = USDU_upscaler.UpscaleModelLoader()
            upscale_model = upscale_loader.load_model(cls.DEFAULT_UPSCALER)[0]
            
            # Initialize Ultimate SD Upscale
            upscaler = UltimateSDUpscale.UltimateSDUpscale()
            
            # Get current seed from context
            current_seed = ctx.seed
            
            logger.info(f"Img2Img: processing with {upscale_by}x upscale, denoise={denoise}")
            
            # Run upscaling
            result = upscaler.upscale(
                upscale_by=upscale_by,
                seed=current_seed,
                steps=cls.DEFAULT_STEPS,
                cfg=img2img_cfg,
                sampler_name=ctx.sampling.sampler,
                scheduler=cls.DEFAULT_SCHEDULER,
                denoise=denoise,
                mode_type="Linear",
                tile_width=cls.DEFAULT_TILE_WIDTH,
                tile_height=cls.DEFAULT_TILE_HEIGHT,
                mask_blur=cls.DEFAULT_MASK_BLUR,
                tile_padding=cls.DEFAULT_TILE_PADDING,
                seam_fix_mode="Half Tile",
                seam_fix_denoise=0.2,
                seam_fix_width=64,
                seam_fix_mask_blur=16,
                seam_fix_padding=32,
                force_uniform_tiles="enable",
                image=image_tensor,
                model=model.model,
                positive=positive,
                negative=negative,
                vae=model.vae,
                upscale_model=upscale_model,
                pipeline=True,
                callback=callback or ctx.callback,
            )
            
            logger.info("Img2Img: completed successfully")
            return result[0]
            
        except Exception as e:
            logger.exception(f"Img2Img failed: {e}")
            # Return original image on failure
            return image_tensor
    
    @classmethod
    def _load_image(cls, path: str) -> torch.Tensor:
        """Load an image from disk and convert to tensor.
        
        Args:
            path: Path to the image file
            
        Returns:
            Image tensor in [B, H, W, C] format, normalized to [0, 1]
        """
        img = Image.open(path)
        img_array = np.array(img)
        img_tensor = torch.from_numpy(img_array).float().to("cpu") / 255.0
        
        # Add batch dimension
        if img_tensor.dim() == 3:
            img_tensor = img_tensor.unsqueeze(0)
        
        return img_tensor
    
    @classmethod
    def simple_img2img(
        cls,
        ctx: "PipelineContext",
        model: "AbstractModel",
        positive: Any,
        negative: Any,
        image_tensor: torch.Tensor,
        denoise: float = 0.75,
        last_step: Optional[int] = None,
        callback: Optional[Callable] = None,
    ) -> dict:
        """Simple image-to-image without upscaling.
        
        Encodes the input image to latents and runs diffusion with
        the specified denoising strength.
        
        Args:
            ctx: Pipeline context
            model: The loaded model
            positive: Positive conditioning
            negative: Negative conditioning
            image_tensor: Input image tensor
            denoise: Denoising strength (0.0 = no change, 1.0 = full generation)
            last_step: Optional step to stop at (for refiner handoff)
            
        Returns:
            Dictionary with 'samples' key containing generated latents
        """
        logger = logging.getLogger(__name__)
        
        try:
            from src.AutoEncoders import VariationalAE
            from src.sample import sampling
            from src.hidiffusion import msw_msa_attention
            
            # Determine model flags
            is_flux = getattr(model.capabilities, "is_flux", False)
            is_flux2 = getattr(model.capabilities, "is_flux2", False)
            
            # Encode image to latents (pass flux flag for correct encoding)
            vae_encode = VariationalAE.VAEEncode()
            latents = vae_encode.encode(
                vae=model.vae,
                pixels=image_tensor,
                flux=is_flux or is_flux2,
            )[0]
            
            # Apply HiDiffusion optimizer (not for Flux)
            if not is_flux:
                try:
                    hidiff = msw_msa_attention.ApplyMSWMSAAttentionSimple()
                    optimized_model = hidiff.go(model_type="auto", model=model.model)[0]
                except Exception:
                    optimized_model = model.model
            else:
                optimized_model = model.model
            
            # Run sampling with denoise < 1.0
            ksampler = sampling.KSampler()
            result = ksampler.sample(
                seed=ctx.seed,
                steps=ctx.sampling.steps,
                cfg=ctx.sampling.cfg if not is_flux else 1.0,
                sampler_name=ctx.sampling.sampler,
                scheduler=ctx.sampling.scheduler,
                denoise=denoise,
                model=optimized_model,
                positive=positive,
                negative=negative,
                latent_image=latents,
                pipeline=True,
                flux=is_flux,
                flux2=is_flux2,
                enable_multiscale=False if is_flux else ctx.sampling.enable_multiscale,
                cfg_free_enabled=ctx.sampling.cfg_free_enabled,
                cfg_free_start_percent=ctx.sampling.cfg_free_start_percent,
                last_step=last_step,
                callback=callback or ctx.callback,  # Enable live previews during sampling
            )
            
            return result[0]
            
        except Exception as e:
            logger.exception(f"Simple img2img failed: {e}")
            raise
    
    @classmethod
    def is_enabled(cls, ctx: "PipelineContext") -> bool:
        """Check if Img2Img mode is enabled.
        
        Args:
            ctx: Pipeline context
            
        Returns:
            True if img2img mode is enabled
        """
        return ctx.features.img2img