diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..cb439ecdab12cd34ab3152fc58f56de8f7321e04 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,64 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh/image_0.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh/image_1.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh/image_10.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh/image_2.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh/image_3.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh/image_4.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh/image_5.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh/image_6.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh/image_7.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh/image_8.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/aesthetic_model2_vangogh/image_9.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow/image_0.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow/image_1.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow/image_10.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow/image_2.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow/image_3.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow/image_4.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow/image_5.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow/image_6.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow/image_7.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow/image_8.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/enfield3_winter_snow/image_9.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_0.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_1.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_10.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_11.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_2.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_3.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_4.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_5.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_6.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_7.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_8.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/jackson_fluffy/image_9.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side/image_0.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side/image_1.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side/image_10.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side/image_2.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side/image_3.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side/image_4.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side/image_5.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side/image_6.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side/image_7.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side/image_8.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/light_lamp_blue_side/image_9.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy/image_0.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy/image_1.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy/image_10.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy/image_2.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy/image_3.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy/image_4.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy/image_5.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy/image_6.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy/image_7.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy/image_8.png filter=lfs diff=lfs merge=lfs -text
+sample_images/precomputed/venice1_grow_ivy/image_9.png filter=lfs diff=lfs merge=lfs -text
diff --git a/model/.DS_Store b/model/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
Binary files /dev/null and b/model/.DS_Store differ
diff --git a/model/__init__.py b/model/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/model/__pycache__/__init__.cpython-310.pyc b/model/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e3e2ed7271863db3577ee585e2394c9c392806df
Binary files /dev/null and b/model/__pycache__/__init__.cpython-310.pyc differ
diff --git a/model/__pycache__/sliders_model.cpython-310.pyc b/model/__pycache__/sliders_model.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7a292aece372141abf725caeb2af44a2bc90ddfb
Binary files /dev/null and b/model/__pycache__/sliders_model.cpython-310.pyc differ
diff --git a/model/__pycache__/sliders_pipeline.cpython-310.pyc b/model/__pycache__/sliders_pipeline.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..670ecd3e3b19af1279542a5bc19f385428881a92
Binary files /dev/null and b/model/__pycache__/sliders_pipeline.cpython-310.pyc differ
diff --git a/model/__pycache__/transformer_flux.cpython-310.pyc b/model/__pycache__/transformer_flux.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b1189e170b67ede1a5aef207661d731823368349
Binary files /dev/null and b/model/__pycache__/transformer_flux.cpython-310.pyc differ
diff --git a/model/sliders_model.py b/model/sliders_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..53d86effa6ee68e372018aea53f3afc615276757
--- /dev/null
+++ b/model/sliders_model.py
@@ -0,0 +1,102 @@
+import torch
+import numpy as np
+
+
+class SliderProjector(torch.nn.Module):
+ def __init__(
+ self,
+ out_dim, # Dimension of the output token that the projector will generate
+ pe_dim, # The dimension of positional embedding that will be applied
+ n_layers = 4,
+ is_clip_input = True, # This function will check whether the clip embeddings are the input of the projector net or not
+ ):
+ super().__init__()
+ self.out_dim = out_dim
+ self.pe_dim = pe_dim
+ self.is_clip_input = is_clip_input
+
+ # Add the layers here in defining, assume n_layers is another parameter
+ layers = []
+ pe_extender_dim = 768
+
+ # if the clip embeddings are to be passed along with the input of the slider scalar value, we will increase the dimensions of the input of the projector net
+ if is_clip_input:
+ in_dim = pe_extender_dim + 768
+ else:
+ in_dim = pe_extender_dim
+
+ # iterating over the layers and accumulating the layers in a list for defining the model
+ for i in range(n_layers - 1):
+ layers.append(torch.nn.Linear(in_dim, out_dim))
+ layers.append(torch.nn.ReLU())
+ in_dim = out_dim
+ layers.append(torch.nn.Linear(in_dim, out_dim))
+
+ # a simple linear layer to extend the pe into a higher dimensional space
+ self.pe_extender = torch.nn.Linear(pe_dim, 768)
+ # then we will pass it through a projector network
+ self.projector = torch.nn.Sequential(*layers)
+
+ # A simple encoding function for the scalar input for a pe embedding
+ def posEnc(self, s):
+ pe = torch.stack([torch.sin(torch.pi * s), torch.cos(torch.pi * s)], dim=-1)
+ return pe
+
+ # A forward function that will take the input x and then projects it to a token embedding to condition the diffusion model.
+ def forward(self, s, clip_embeddings = None):
+ # Apply the positional embedding to the input scalar
+ x_pe = self.posEnc(s)
+ x_scale_embedding = self.pe_extender(x_pe) # (1, 768)
+
+ if clip_embeddings is not None: # if the clip input is passed, we will concatenated it with the scalar embeddings for processing
+ # print("clip embeddings shape: {}".format(clip_embeddings.shape))
+ x_combined_embedding = torch.cat([x_scale_embedding, clip_embeddings], dim=-1) # (1, 768 + 768)
+
+ x_proj = self.projector(x_combined_embedding)
+ # print("x proj shape: {}".format(x_proj.shape))
+ return x_proj
+
+
+class SliderProjector_wo_clip(torch.nn.Module):
+ def __init__(
+ self,
+ out_dim, # Dimension of the output token that the projector will generate
+ pe_dim, # The dimension of positional embedding
+ n_layers = 4,
+ is_clip_input = False, # This function will check whether the clip embeddings are the input of the projector net or not
+ ):
+ super().__init__()
+ self.out_dim = out_dim
+ self.pe_dim = pe_dim
+
+ # Add the layers here in defining, assume n_layers is another parameter
+ layers = []
+ pe_extender_dim = 768
+
+ # extending the input dimenstion to the 768 with a linear layer to keep the dimensions consistent with other clip based model.
+ in_dim = pe_extender_dim
+
+ # iterating over the layers and accumulating the layers in a list for defining the model
+ for i in range(n_layers - 1):
+ layers.append(torch.nn.Linear(in_dim, out_dim))
+ layers.append(torch.nn.ReLU())
+ in_dim = out_dim
+ layers.append(torch.nn.Linear(in_dim, out_dim))
+
+ # adding a pe extender to have the same dimension as clip embeddings
+ self.pe_extender = torch.nn.Linear(pe_dim, 768)
+ # then we will pass it through a projector network
+ self.projector = torch.nn.Sequential(*layers)
+
+ def posEnc(self, s):
+ pe = torch.stack([torch.sin(torch.pi * s), torch.cos(torch.pi * s)], dim=-1)
+ return pe
+
+ # A forward function that will take the input x and then projects it to a token embedding to condition the diffusion model.
+ def forward(self, s):
+ x_pe = self.posEnc(s)
+ x_scale_embedding = self.pe_extender(x_pe)
+
+ x_proj = self.projector(x_scale_embedding)
+ return x_proj
+
diff --git a/model/sliders_pipeline.py b/model/sliders_pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..de5889940cc8da8d2b5969a80dc20fc1c5e00c60
--- /dev/null
+++ b/model/sliders_pipeline.py
@@ -0,0 +1,468 @@
+# kontext_sliders_pipeline.py
+import torch
+from diffusers import FluxKontextPipeline # Base pipeline from Diffusers
+import inspect
+from typing import Any, Callable, Dict, List, Optional, Union
+from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
+from diffusers.pipelines.flux.pipeline_output import FluxPipelineOutput
+import numpy as np
+from diffusers.pipelines.flux.pipeline_flux_kontext import *
+
+# custom import for transformer models
+from model.transformer_flux import FluxTransformer2DModelwithSliderConditioning
+
+
+from diffusers.utils import (
+ USE_PEFT_BACKEND,
+ is_torch_xla_available,
+ logging,
+ replace_example_docstring,
+ scale_lora_layers,
+ unscale_lora_layers,
+)
+
+if is_torch_xla_available():
+ import torch_xla.core.xla_model as xm
+
+ XLA_AVAILABLE = True
+else:
+ XLA_AVAILABLE = False
+
+
+# defining the custom pipeline allowing for inference with the pretrained slider projector and the flux-kontext model.
+class FluxKontextSliderPipeline(FluxKontextPipeline):
+ """
+ Custom pipeline extending FluxKontextPipeline with slider conditioning.
+ Minimal changes: Override __init__ to load slider_projector, and __call__ for slider-aware inference.
+ """
+
+ def __init__(
+ self,
+ scheduler: FlowMatchEulerDiscreteScheduler,
+ vae: AutoencoderKL,
+ text_encoder: CLIPTextModel,
+ tokenizer: CLIPTokenizer,
+ text_encoder_2: T5EncoderModel,
+ tokenizer_2: T5TokenizerFast,
+ transformer: FluxTransformer2DModelwithSliderConditioning,
+ image_encoder: CLIPVisionModelWithProjection = None,
+ feature_extractor: CLIPImageProcessor = None,
+ slider_projector=None, # the slider projector model loaded with the weights
+ text_condn: bool = False,
+ ):
+ # Calling the parent __init__ with the base arguments that are passed in the pipeline
+ super().__init__(
+ scheduler=scheduler,
+ vae=vae,
+ text_encoder=text_encoder,
+ tokenizer=tokenizer,
+ text_encoder_2=text_encoder_2,
+ tokenizer_2=tokenizer_2,
+ transformer=transformer,
+ image_encoder=image_encoder,
+ feature_extractor=feature_extractor,
+ )
+
+ device = self._execution_device
+ # Minimal addition: Load your custom slider_projector
+ self.slider_projector = slider_projector
+
+ self.text_condn = text_condn # whether we are conditioning in the text space or the modulation space
+ self.slider_projector.eval() # Set to eval mode for inference
+
+ def __call__(
+ self,
+ image: Optional[PipelineImageInput] = None,
+ prompt: Union[str, List[str]] = None,
+ prompt_2: Optional[Union[str, List[str]]] = None,
+ negative_prompt: Union[str, List[str]] = None,
+ negative_prompt_2: Optional[Union[str, List[str]]] = None,
+ true_cfg_scale: float = 1.0,
+ height: Optional[int] = None,
+ width: Optional[int] = None,
+ num_inference_steps: int = 28,
+ sigmas: Optional[List[float]] = None,
+ guidance_scale: float = 3.5,
+ num_images_per_prompt: Optional[int] = 1,
+ generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+ latents: Optional[torch.FloatTensor] = None,
+ prompt_embeds: Optional[torch.FloatTensor] = None,
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ ip_adapter_image: Optional[PipelineImageInput] = None,
+ ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
+ negative_ip_adapter_image: Optional[PipelineImageInput] = None,
+ negative_ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
+ negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ output_type: Optional[str] = "pil",
+ return_dict: bool = True,
+ joint_attention_kwargs: Optional[Dict[str, Any]] = None,
+ callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
+ callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+ max_sequence_length: int = 512,
+ max_area: int = 1024**2,
+ _auto_resize: bool = True,
+ # slider values as additional input for the pipeline ------------- #
+ # slider projector is already initialized in the parent call so that we can call it to obtain the embeddings for the sliders
+ text_condn: bool = False,
+ modulation_condn: bool = False,
+ slider_value: Optional[torch.FloatTensor] = None,
+ is_clip_input: bool = False, # This is to check whether the slider projector takes the clip text embedding as input for modulating
+ ):
+ # small modification to keep all the values on the same device, and the device is passed along with the pipeline to the model.
+
+ height = height or self.default_sample_size * self.vae_scale_factor
+ width = width or self.default_sample_size * self.vae_scale_factor
+
+ # print("vae scale factor: {}".format(self.vae_scale_factor))
+ # print("default sample size: {}".format(self.default_sample_size))
+ # print("default sample size: height: {}, width: {}".format(height, width))
+
+ original_height, original_width = height, width
+ aspect_ratio = width / height
+ width = round((max_area * aspect_ratio) ** 0.5)
+ height = round((max_area / aspect_ratio) ** 0.5)
+
+ multiple_of = self.vae_scale_factor * 2
+ width = width // multiple_of * multiple_of
+ height = height // multiple_of * multiple_of
+ # print("after width and height quantized: height: {}, width: {}".format(height, width))
+
+
+ # not checking for the height and width are matching to the predefined dimensions for inferences.
+ # if height != original_height or width != original_width:
+ # print("height and width are not matching the original dimensions ..")
+
+ # 1. Check inputs. Raise error if not correct
+ self.check_inputs(
+ prompt,
+ prompt_2,
+ height,
+ width,
+ negative_prompt=negative_prompt,
+ negative_prompt_2=negative_prompt_2,
+ prompt_embeds=prompt_embeds,
+ negative_prompt_embeds=negative_prompt_embeds,
+ pooled_prompt_embeds=pooled_prompt_embeds,
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+ callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
+ max_sequence_length=max_sequence_length,
+ )
+
+ self._guidance_scale = guidance_scale
+ self._joint_attention_kwargs = joint_attention_kwargs
+ self._current_timestep = None
+ self._interrupt = False
+
+ # 2. Define call parameters
+ if prompt is not None and isinstance(prompt, str):
+ batch_size = 1
+ elif prompt is not None and isinstance(prompt, list):
+ batch_size = len(prompt)
+ else:
+ batch_size = len(prompt_embeds)
+
+ device = self._execution_device
+ # print("execution device: {}".format(device))
+
+ lora_scale = (
+ self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None
+ )
+ has_neg_prompt = negative_prompt is not None or (
+ negative_prompt_embeds is not None and negative_pooled_prompt_embeds is not None
+ )
+ do_true_cfg = true_cfg_scale > 1 and has_neg_prompt
+ (
+ prompt_embeds,
+ pooled_prompt_embeds,
+ text_ids,
+ ) = self.encode_prompt(
+ prompt=prompt,
+ prompt_2=prompt_2,
+ prompt_embeds=prompt_embeds,
+ pooled_prompt_embeds=pooled_prompt_embeds,
+ device=device,
+ num_images_per_prompt=num_images_per_prompt,
+ max_sequence_length=max_sequence_length,
+ lora_scale=lora_scale,
+ )
+ if do_true_cfg:
+ (
+ negative_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ negative_text_ids,
+ ) = self.encode_prompt(
+ prompt=negative_prompt,
+ prompt_2=negative_prompt_2,
+ prompt_embeds=negative_prompt_embeds,
+ pooled_prompt_embeds=negative_pooled_prompt_embeds,
+ device=device,
+ num_images_per_prompt=num_images_per_prompt,
+ max_sequence_length=max_sequence_length,
+ lora_scale=lora_scale,
+ )
+
+ # 3. Preprocess image ---------- this is the older preprocessing function that is forcing the images to be of the size 1024x1024, but we are training with 512x512 so changing the output to be of the same dimensions.
+ # if image is not None and not (isinstance(image, torch.Tensor) and image.size(1) == self.latent_channels):
+ # img = image[0] if isinstance(image, list) else image
+ # image_height, image_width = self.image_processor.get_default_height_width(img)
+ # aspect_ratio = image_width / image_height
+ # if _auto_resize:
+ # # Kontext is trained on specific resolutions, using one of them is recommended
+ # _, image_width, image_height = min(
+ # (abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_KONTEXT_RESOLUTIONS
+ # )
+ # image_width = image_width // multiple_of * multiple_of
+ # image_height = image_height // multiple_of * multiple_of
+ # image = self.image_processor.resize(image, image_height, image_width)
+ # image = self.image_processor.preprocess(image, image_height, image_width)
+
+ # 3.1 Custom image preprocessing module that will reshape the images to the given input dimensions
+ # overriding the height and the width for the original model components as we have a fixed size for images in our dataset.
+ height = original_height
+ width = original_width
+
+ image = self.image_processor.resize(image, height, width)
+ image = self.image_processor.preprocess(image, height, width)
+ # print("image shape after preprocessing: {}".format(image.shape))
+
+ # 3. -------------------------------------- Preparing the slider values -------------------------------------- #
+ # This is a correct way to check the device for a tensor and a model in PyTorch.
+ # print(f"slider_value device: {slider_value.device}") # tensor device
+ # print(f"slider_projector device: {next(self.slider_projector.parameters()).device}") # model device
+
+ # if clip input is enabled, we will compute the embeddings using both the slider values and the pooled prompt embeddings
+ if (is_clip_input):
+ # TODO: This may not work with larget batch size then 1, please validate this once then run the output.
+ # this takes vector as input as the slider_value is also a list
+ # Ensure pooled_prompt_embeds is a tensor of shape [1, 1, ...] (one higher dimension)
+ pooled_prompt_embeds_tensor = torch.tensor(pooled_prompt_embeds).unsqueeze(0).to(device)
+ slider_value = slider_value.to(device)
+
+ self.slider_projector = self.slider_projector.to(device)
+ # print("pooled prompt device: {}".format(pooled_prompt_embeds_tensor.device))
+ # print("slider value device: {}".format(slider_value.device))
+ # print("slider projector device: {}".format(next(self.slider_projector.parameters()).device))
+
+ slider_embeddings = self.slider_projector(slider_value, pooled_prompt_embeds_tensor).to(device)
+ else:
+ slider_embeddings = self.slider_projector(slider_value).to(device)
+
+
+ # print("slider embeddings device: {}".format(slider_embeddings.device))
+ # multiplying the slider embeddings with a random value to check whether there is any effect of changing the slider in the input
+ # slider_embeddings = slider_embeddings * (np.random.rand() * 4 - 2)
+
+ # print("slider embeddings norm: {}".format(slider_embeddings.norm()))
+ # print("slider value inside the pipeline: {}".format(slider_value))
+ # print("slider embeddings: {}".format(slider_embeddings.shape)) # (1, 1, 64)
+ slider_id = torch.tensor([0,0,2]).reshape(1,3).to(device)
+
+ # replicating the same slider embeddings with n_repeat times
+ n_repeats = 1
+ repeated_slider_token = slider_embeddings.repeat(1, n_repeats, 1)
+ repeated_slider_id = slider_id.repeat(n_repeats, 1)
+
+ # ------------------------------- concatenating the slider embeddings with the text embeddings --------------- #
+ # if we are conditioning in the text space then will concatenate the slider tokens to the conditioning
+
+ if text_condn:
+ print("using text conditioning ...")
+ extended_text_ids = torch.cat([text_ids, repeated_slider_id], dim=0)
+ extended_prompt_embeds = torch.cat([prompt_embeds, repeated_slider_token], dim=1)
+ else:
+ extended_text_ids = text_ids
+ extended_prompt_embeds = prompt_embeds
+
+ if modulation_condn:
+ modulation_embeddings = repeated_slider_token
+ else:
+ modulation_embeddings = None
+
+ # print("concatenated text ids shape: {}".format(extended_text_ids.shape)) # (640, 3)
+ # print("concatenated prompt embeds shape: {}".format(extended_prompt_embeds.shape)) # (1, 640, 4096)
+
+ # print("slider id: {}".format(slider_id.shape)) # (1, 3)
+ #--------------------- defined the slider components that I will use along with the other inputs to perform the forward pass of the model. ---------------------#
+
+ # 4. Prepare latent variables
+ num_channels_latents = self.transformer.config.in_channels // 4
+ latents, image_latents, latent_ids, image_ids = self.prepare_latents(
+ image,
+ batch_size * num_images_per_prompt,
+ num_channels_latents,
+ height,
+ width,
+ prompt_embeds.dtype,
+ device,
+ generator,
+ latents,
+ )
+ if image_ids is not None:
+ # latent_ids = torch.cat([latent_ids, image_ids], dim=0) # dim 0 is sequence dimension
+ # TODO: Verify the shapes here, adding the slider id along with the ids for the input and target images
+ # print("original latent ids: {}".format(latent_ids.shape))
+ ## --- not using the slider id along with the visual tokens, we are adding them along with the text tokens --- ##
+ # latent_ids = torch.cat([latent_ids, image_ids, slider_id], dim=0)
+
+ # --- using the standard image and text latent conditioning and not adding the slider ids in the model --- ##
+ latent_ids = torch.cat([latent_ids, image_ids], dim=0)
+
+ # print("latent ids after concatenation: {}".format(latent_ids.shape))
+
+ # 5. Prepare timesteps
+ sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps) if sigmas is None else sigmas
+ image_seq_len = latents.shape[1]
+ mu = calculate_shift(
+ image_seq_len,
+ self.scheduler.config.get("base_image_seq_len", 256),
+ self.scheduler.config.get("max_image_seq_len", 4096),
+ self.scheduler.config.get("base_shift", 0.5),
+ self.scheduler.config.get("max_shift", 1.15),
+ )
+ timesteps, num_inference_steps = retrieve_timesteps(
+ self.scheduler,
+ num_inference_steps,
+ device,
+ sigmas=sigmas,
+ mu=mu,
+ )
+ num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+ self._num_timesteps = len(timesteps)
+
+ # handle guidance
+ if self.transformer.config.guidance_embeds:
+ guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32)
+ guidance = guidance.expand(latents.shape[0])
+ else:
+ guidance = None
+
+ # -------------- Logic for ip adapter, we can remove this ----------------------- #
+ if (ip_adapter_image is not None or ip_adapter_image_embeds is not None) and (
+ negative_ip_adapter_image is None and negative_ip_adapter_image_embeds is None
+ ):
+ negative_ip_adapter_image = np.zeros((width, height, 3), dtype=np.uint8)
+ negative_ip_adapter_image = [negative_ip_adapter_image] * self.transformer.encoder_hid_proj.num_ip_adapters
+
+ elif (ip_adapter_image is None and ip_adapter_image_embeds is None) and (
+ negative_ip_adapter_image is not None or negative_ip_adapter_image_embeds is not None
+ ):
+ ip_adapter_image = np.zeros((width, height, 3), dtype=np.uint8)
+ ip_adapter_image = [ip_adapter_image] * self.transformer.encoder_hid_proj.num_ip_adapters
+
+ if self.joint_attention_kwargs is None:
+ self._joint_attention_kwargs = {}
+
+ image_embeds = None
+ negative_image_embeds = None
+ if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
+ image_embeds = self.prepare_ip_adapter_image_embeds(
+ ip_adapter_image,
+ ip_adapter_image_embeds,
+ device,
+ batch_size * num_images_per_prompt,
+ )
+ if negative_ip_adapter_image is not None or negative_ip_adapter_image_embeds is not None:
+ negative_image_embeds = self.prepare_ip_adapter_image_embeds(
+ negative_ip_adapter_image,
+ negative_ip_adapter_image_embeds,
+ device,
+ batch_size * num_images_per_prompt,
+ )
+
+ # 6. Denoising loop
+ # We set the index here to remove DtoH sync, helpful especially during compilation.
+ # Check out more details here: https://github.com/huggingface/diffusers/pull/11696
+ self.scheduler.set_begin_index(0)
+ with self.progress_bar(total=num_inference_steps) as progress_bar:
+ for i, t in enumerate(timesteps):
+ if self.interrupt:
+ continue
+
+ self._current_timestep = t
+ if image_embeds is not None:
+ self._joint_attention_kwargs["ip_adapter_image_embeds"] = image_embeds
+
+ # stacking the latents for the generated latent and the input image latent
+ latent_model_input = latents
+ if image_latents is not None:
+ latent_model_input = torch.cat([latents, image_latents], dim=1)
+
+ # print("latent model shape after concatenation: {}".format(latent_model_input.shape))
+ timestep = t.expand(latents.shape[0]).to(latents.dtype)
+
+ noise_pred = self.transformer(
+ hidden_states=latent_model_input,
+ timestep=timestep / 1000,
+ guidance=guidance,
+ pooled_projections=pooled_prompt_embeds,
+ encoder_hidden_states=extended_prompt_embeds,
+ txt_ids=extended_text_ids,
+ img_ids=latent_ids,
+ joint_attention_kwargs=self.joint_attention_kwargs,
+ return_dict=False,
+ ## adding the modulation token, if we are working with modulation space conditioning
+ modulation_embeddings=modulation_embeddings, # passing the modulation embeddings that will be defined based on whether the modulation inference is enabled or not
+ )[0]
+ noise_pred = noise_pred[:, : latents.size(1)]
+
+ if do_true_cfg:
+ if negative_image_embeds is not None:
+ self._joint_attention_kwargs["ip_adapter_image_embeds"] = negative_image_embeds
+ neg_noise_pred = self.transformer(
+ hidden_states=latent_model_input,
+ timestep=timestep / 1000,
+ guidance=guidance,
+ pooled_projections=negative_pooled_prompt_embeds,
+ encoder_hidden_states=negative_prompt_embeds,
+ txt_ids=negative_text_ids,
+ img_ids=latent_ids,
+ joint_attention_kwargs=self.joint_attention_kwargs,
+ return_dict=False,
+ modulation_embeddings=modulation_embeddings,
+ )[0]
+ neg_noise_pred = neg_noise_pred[:, : latents.size(1)]
+ noise_pred = neg_noise_pred + true_cfg_scale * (noise_pred - neg_noise_pred)
+
+ # compute the previous noisy sample x_t -> x_t-1
+ latents_dtype = latents.dtype
+ latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
+
+ if latents.dtype != latents_dtype:
+ if torch.backends.mps.is_available():
+ # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
+ latents = latents.to(latents_dtype)
+
+ if callback_on_step_end is not None:
+ callback_kwargs = {}
+ for k in callback_on_step_end_tensor_inputs:
+ callback_kwargs[k] = locals()[k]
+ callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+
+ latents = callback_outputs.pop("latents", latents)
+ prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
+
+ # call the callback, if provided
+ if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+ progress_bar.update()
+
+ if XLA_AVAILABLE:
+ xm.mark_step()
+
+ self._current_timestep = None
+
+ if output_type == "latent":
+ image = latents
+ else:
+ latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
+ latents = (latents / self.vae.config.scaling_factor) + self.vae.config.shift_factor
+ image = self.vae.decode(latents, return_dict=False)[0]
+ image = self.image_processor.postprocess(image, output_type=output_type)
+
+ # Offload all models
+ self.maybe_free_model_hooks()
+
+ if not return_dict:
+ return (image,)
+
+ return FluxPipelineOutput(images=image)
diff --git a/model/transformer_flux.py b/model/transformer_flux.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6ff631bf2a2738fbe3ffc435b33cb8d64b7ddb6
--- /dev/null
+++ b/model/transformer_flux.py
@@ -0,0 +1,608 @@
+# Copyright 2025 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import Any, Dict, Optional, Tuple, Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.loaders import FluxTransformer2DLoadersMixin, FromOriginalModelMixin, PeftAdapterMixin
+from diffusers.utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
+from diffusers.utils.import_utils import is_torch_npu_available
+from diffusers.utils.torch_utils import maybe_allow_in_graph
+from diffusers.models.attention import FeedForward
+from diffusers.models.attention_processor import (
+ Attention,
+ AttentionProcessor,
+ FluxAttnProcessor2_0,
+ FluxAttnProcessor2_0_NPU,
+ FusedFluxAttnProcessor2_0,
+)
+from diffusers.models.cache_utils import CacheMixin
+from diffusers.models.embeddings import CombinedTimestepGuidanceTextProjEmbeddings, CombinedTimestepTextProjEmbeddings, FluxPosEmbed
+from diffusers.models.modeling_outputs import Transformer2DModelOutput
+from diffusers.models.modeling_utils import ModelMixin
+from diffusers.models.normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle
+
+
+logger = logging.get_logger(__name__) # pylint: disable=invalid-name
+
+
+@maybe_allow_in_graph
+class FluxSingleTransformerBlock(nn.Module):
+ def __init__(self, dim: int, num_attention_heads: int, attention_head_dim: int, mlp_ratio: float = 4.0):
+ super().__init__()
+ self.mlp_hidden_dim = int(dim * mlp_ratio)
+
+ self.norm = AdaLayerNormZeroSingle(dim)
+ self.proj_mlp = nn.Linear(dim, self.mlp_hidden_dim)
+ self.act_mlp = nn.GELU(approximate="tanh")
+ self.proj_out = nn.Linear(dim + self.mlp_hidden_dim, dim)
+
+ if is_torch_npu_available():
+ deprecation_message = (
+ "Defaulting to FluxAttnProcessor2_0_NPU for NPU devices will be removed. Attention processors "
+ "should be set explicitly using the `set_attn_processor` method."
+ )
+ deprecate("npu_processor", "0.34.0", deprecation_message)
+ processor = FluxAttnProcessor2_0_NPU()
+ else:
+ processor = FluxAttnProcessor2_0()
+
+ self.attn = Attention(
+ query_dim=dim,
+ cross_attention_dim=None,
+ dim_head=attention_head_dim,
+ heads=num_attention_heads,
+ out_dim=dim,
+ bias=True,
+ processor=processor,
+ qk_norm="rms_norm",
+ eps=1e-6,
+ pre_only=True,
+ )
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ temb: torch.Tensor,
+ image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+ joint_attention_kwargs: Optional[Dict[str, Any]] = None,
+ ) -> torch.Tensor:
+ residual = hidden_states
+ norm_hidden_states, gate = self.norm(hidden_states, emb=temb)
+ mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states))
+ joint_attention_kwargs = joint_attention_kwargs or {}
+ attn_output = self.attn(
+ hidden_states=norm_hidden_states,
+ image_rotary_emb=image_rotary_emb,
+ **joint_attention_kwargs,
+ )
+
+ hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2)
+ gate = gate.unsqueeze(1)
+ hidden_states = gate * self.proj_out(hidden_states)
+ hidden_states = residual + hidden_states
+ if hidden_states.dtype == torch.float16:
+ hidden_states = hidden_states.clip(-65504, 65504)
+
+ return hidden_states
+
+
+@maybe_allow_in_graph
+class FluxTransformerBlock(nn.Module):
+ def __init__(
+ self, dim: int, num_attention_heads: int, attention_head_dim: int, qk_norm: str = "rms_norm", eps: float = 1e-6
+ ):
+ super().__init__()
+
+ self.norm1 = AdaLayerNormZero(dim)
+ self.norm1_context = AdaLayerNormZero(dim)
+
+ self.attn = Attention(
+ query_dim=dim,
+ cross_attention_dim=None,
+ added_kv_proj_dim=dim,
+ dim_head=attention_head_dim,
+ heads=num_attention_heads,
+ out_dim=dim,
+ context_pre_only=False,
+ bias=True,
+ processor=FluxAttnProcessor2_0(),
+ qk_norm=qk_norm,
+ eps=eps,
+ )
+
+ self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6)
+ self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate")
+
+ self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6)
+ self.ff_context = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate")
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ encoder_hidden_states: torch.Tensor,
+ temb: torch.Tensor,
+ image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+ joint_attention_kwargs: Optional[Dict[str, Any]] = None,
+ ## ---- adding the modulation conditioning vector for controlling the strength ---- ##
+ modulation_condn: Optional[torch.Tensor] = None,
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
+
+ # add logic here for conditioning
+ norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb)
+
+ norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context(
+ encoder_hidden_states, emb=temb
+ )
+
+ # If modulation conditioning is passed, then we will use that to adjust the scale and shift parameters otherwise we will proceed with regular modulation function
+ if modulation_condn is not None:
+ modulation_condn = modulation_condn.squeeze(1)
+
+ # chunking the modulation space here
+ modulation_scale, modulation_shift = modulation_condn.chunk(2, dim=1) # dividing the output into two parts, one for scale and another one for shift.
+ # print("modulation condn shape: {}".format(modulation_condn.shape)) # [1, out_dim]
+
+ # adding a delta shift to the shift modulation vector
+ c_shift_mlp = c_shift_mlp + modulation_shift
+ # adding a delta scale to the scale modulation vector
+ c_scale_mlp = c_scale_mlp + modulation_scale
+
+ joint_attention_kwargs = joint_attention_kwargs or {}
+ # Attention.
+ attention_outputs = self.attn(
+ hidden_states=norm_hidden_states,
+ encoder_hidden_states=norm_encoder_hidden_states,
+ image_rotary_emb=image_rotary_emb,
+ **joint_attention_kwargs,
+ )
+
+ if len(attention_outputs) == 2:
+ attn_output, context_attn_output = attention_outputs
+ elif len(attention_outputs) == 3:
+ attn_output, context_attn_output, ip_attn_output = attention_outputs
+
+ # Process attention outputs for the `hidden_states`.
+ attn_output = gate_msa.unsqueeze(1) * attn_output
+ hidden_states = hidden_states + attn_output
+
+ norm_hidden_states = self.norm2(hidden_states)
+ norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
+
+ ff_output = self.ff(norm_hidden_states)
+ ff_output = gate_mlp.unsqueeze(1) * ff_output
+
+ hidden_states = hidden_states + ff_output
+ if len(attention_outputs) == 3:
+ hidden_states = hidden_states + ip_attn_output
+
+ # Process attention outputs for the `encoder_hidden_states`.
+
+ context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output
+ encoder_hidden_states = encoder_hidden_states + context_attn_output
+
+ norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states)
+ norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None]
+
+ context_ff_output = self.ff_context(norm_encoder_hidden_states)
+ encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output
+ if encoder_hidden_states.dtype == torch.float16:
+ encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504)
+
+ return encoder_hidden_states, hidden_states
+
+
+class FluxTransformer2DModelwithSliderConditioning(
+ ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, FluxTransformer2DLoadersMixin, CacheMixin
+):
+ """
+ The Transformer model introduced in Flux.
+
+ Reference: https://blackforestlabs.ai/announcing-black-forest-labs/
+
+ Args:
+ patch_size (`int`, defaults to `1`):
+ Patch size to turn the input data into small patches.
+ in_channels (`int`, defaults to `64`):
+ The number of channels in the input.
+ out_channels (`int`, *optional*, defaults to `None`):
+ The number of channels in the output. If not specified, it defaults to `in_channels`.
+ num_layers (`int`, defaults to `19`):
+ The number of layers of dual stream DiT blocks to use.
+ num_single_layers (`int`, defaults to `38`):
+ The number of layers of single stream DiT blocks to use.
+ attention_head_dim (`int`, defaults to `128`):
+ The number of dimensions to use for each attention head.
+ num_attention_heads (`int`, defaults to `24`):
+ The number of attention heads to use.
+ joint_attention_dim (`int`, defaults to `4096`):
+ The number of dimensions to use for the joint attention (embedding/channel dimension of
+ `encoder_hidden_states`).
+ pooled_projection_dim (`int`, defaults to `768`):
+ The number of dimensions to use for the pooled projection.
+ guidance_embeds (`bool`, defaults to `False`):
+ Whether to use guidance embeddings for guidance-distilled variant of the model.
+ axes_dims_rope (`Tuple[int]`, defaults to `(16, 56, 56)`):
+ The dimensions to use for the rotary positional embeddings.
+ """
+
+ _supports_gradient_checkpointing = True
+ _no_split_modules = ["FluxTransformerBlock", "FluxSingleTransformerBlock"]
+ _skip_layerwise_casting_patterns = ["pos_embed", "norm"]
+ _repeated_blocks = ["FluxTransformerBlock", "FluxSingleTransformerBlock"]
+
+ @register_to_config
+ def __init__(
+ self,
+ patch_size: int = 1,
+ in_channels: int = 64,
+ out_channels: Optional[int] = None,
+ num_layers: int = 19,
+ num_single_layers: int = 38,
+ attention_head_dim: int = 128,
+ num_attention_heads: int = 24,
+ joint_attention_dim: int = 4096,
+ pooled_projection_dim: int = 768,
+ guidance_embeds: bool = False,
+ axes_dims_rope: Tuple[int, int, int] = (16, 56, 56),
+ ):
+ super().__init__()
+ self.out_channels = out_channels or in_channels
+ self.inner_dim = num_attention_heads * attention_head_dim
+
+ self.pos_embed = FluxPosEmbed(theta=10000, axes_dim=axes_dims_rope)
+
+ text_time_guidance_cls = (
+ CombinedTimestepGuidanceTextProjEmbeddings if guidance_embeds else CombinedTimestepTextProjEmbeddings
+ )
+ self.time_text_embed = text_time_guidance_cls(
+ embedding_dim=self.inner_dim, pooled_projection_dim=pooled_projection_dim
+ )
+
+ self.context_embedder = nn.Linear(joint_attention_dim, self.inner_dim)
+ self.x_embedder = nn.Linear(in_channels, self.inner_dim)
+
+ self.transformer_blocks = nn.ModuleList(
+ [ # we will add conditioning logic in this block for training with modulation space
+ FluxTransformerBlock(
+ dim=self.inner_dim,
+ num_attention_heads=num_attention_heads,
+ attention_head_dim=attention_head_dim,
+ )
+ for _ in range(num_layers)
+ ]
+ )
+
+ self.single_transformer_blocks = nn.ModuleList(
+ [
+ FluxSingleTransformerBlock(
+ dim=self.inner_dim,
+ num_attention_heads=num_attention_heads,
+ attention_head_dim=attention_head_dim,
+ )
+ for _ in range(num_single_layers)
+ ]
+ )
+
+ self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6)
+ self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True)
+
+ self.gradient_checkpointing = False
+
+ @property
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.attn_processors
+ def attn_processors(self) -> Dict[str, AttentionProcessor]:
+ r"""
+ Returns:
+ `dict` of attention processors: A dictionary containing all attention processors used in the model with
+ indexed by its weight name.
+ """
+ # set recursively
+ processors = {}
+
+ def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
+ if hasattr(module, "get_processor"):
+ processors[f"{name}.processor"] = module.get_processor()
+
+ for sub_name, child in module.named_children():
+ fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
+
+ return processors
+
+ for name, module in self.named_children():
+ fn_recursive_add_processors(name, module, processors)
+
+ return processors
+
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_attn_processor
+ def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
+ r"""
+ Sets the attention processor to use to compute attention.
+
+ Parameters:
+ processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
+ The instantiated processor class or a dictionary of processor classes that will be set as the processor
+ for **all** `Attention` layers.
+
+ If `processor` is a dict, the key needs to define the path to the corresponding cross attention
+ processor. This is strongly recommended when setting trainable attention processors.
+
+ """
+ count = len(self.attn_processors.keys())
+
+ if isinstance(processor, dict) and len(processor) != count:
+ raise ValueError(
+ f"A dict of processors was passed, but the number of processors {len(processor)} does not match the"
+ f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
+ )
+
+ def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
+ if hasattr(module, "set_processor"):
+ if not isinstance(processor, dict):
+ module.set_processor(processor)
+ else:
+ module.set_processor(processor.pop(f"{name}.processor"))
+
+ for sub_name, child in module.named_children():
+ fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
+
+ for name, module in self.named_children():
+ fn_recursive_attn_processor(name, module, processor)
+
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections with FusedAttnProcessor2_0->FusedFluxAttnProcessor2_0
+ def fuse_qkv_projections(self):
+ """
+ Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value)
+ are fused. For cross-attention modules, key and value projection matrices are fused.
+
+
+
+ This API is 🧪 experimental.
+
+
+ """
+ self.original_attn_processors = None
+
+ for _, attn_processor in self.attn_processors.items():
+ if "Added" in str(attn_processor.__class__.__name__):
+ raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.")
+
+ self.original_attn_processors = self.attn_processors
+
+ for module in self.modules():
+ if isinstance(module, Attention):
+ module.fuse_projections(fuse=True)
+
+ self.set_attn_processor(FusedFluxAttnProcessor2_0())
+
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
+ def unfuse_qkv_projections(self):
+ """Disables the fused QKV projection if enabled.
+
+
+
+ This API is 🧪 experimental.
+
+
+
+ """
+ if self.original_attn_processors is not None:
+ self.set_attn_processor(self.original_attn_processors)
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ encoder_hidden_states: torch.Tensor = None,
+ pooled_projections: torch.Tensor = None,
+ timestep: torch.LongTensor = None,
+ img_ids: torch.Tensor = None,
+ txt_ids: torch.Tensor = None,
+ guidance: torch.Tensor = None,
+ joint_attention_kwargs: Optional[Dict[str, Any]] = None,
+ controlnet_block_samples=None,
+ controlnet_single_block_samples=None,
+ return_dict: bool = True,
+ controlnet_blocks_repeat: bool = False,
+ # adding a modulation conditioning, where an embedding is passed that can be used to modulate features of diffusion model
+ modulation_embeddings: Optional[torch.Tensor] = None,
+ ) -> Union[torch.Tensor, Transformer2DModelOutput]:
+ """
+ The [`FluxTransformer2DModelwithSliderConditioning`] forward method.
+
+ Args:
+ hidden_states (`torch.Tensor` of shape `(batch_size, image_sequence_length, in_channels)`):
+ Input `hidden_states`.
+ encoder_hidden_states (`torch.Tensor` of shape `(batch_size, text_sequence_length, joint_attention_dim)`):
+ Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
+ pooled_projections (`torch.Tensor` of shape `(batch_size, projection_dim)`): Embeddings projected
+ from the embeddings of input conditions.
+ timestep ( `torch.LongTensor`):
+ Used to indicate denoising step.
+ block_controlnet_hidden_states: (`list` of `torch.Tensor`):
+ A list of tensors that if specified are added to the residuals of transformer blocks.
+ joint_attention_kwargs (`dict`, *optional*):
+ A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
+ `self.processor` in
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
+ return_dict (`bool`, *optional*, defaults to `True`):
+ Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
+ tuple.
+
+ Returns:
+ If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
+ `tuple` where the first element is the sample tensor.
+ """
+
+ # if modulation_embeddings is not None:
+ # print("working with modulation space conditioning ...")
+ # print("modulation condn in main transformer call: {}".format(modulation_embeddings.shape))
+
+
+ if joint_attention_kwargs is not None:
+ joint_attention_kwargs = joint_attention_kwargs.copy()
+ lora_scale = joint_attention_kwargs.pop("scale", 1.0)
+ else:
+ lora_scale = 1.0
+
+ if USE_PEFT_BACKEND:
+ # weight the lora layers by setting `lora_scale` for each PEFT layer
+ scale_lora_layers(self, lora_scale)
+ else:
+ if joint_attention_kwargs is not None and joint_attention_kwargs.get("scale", None) is not None:
+ logger.warning(
+ "Passing `scale` via `joint_attention_kwargs` when not using the PEFT backend is ineffective."
+ )
+
+ hidden_states = self.x_embedder(hidden_states)
+
+ timestep = timestep.to(hidden_states.dtype) * 1000
+ if guidance is not None:
+ guidance = guidance.to(hidden_states.dtype) * 1000
+
+ temb = (
+ self.time_text_embed(timestep, pooled_projections)
+ if guidance is None
+ else self.time_text_embed(timestep, guidance, pooled_projections)
+ )
+
+ # print("temb shape: {}".format(temb.shape)) # [1, 3072]
+
+ # ------------------------ Logic to add the predicted embedding at the root of the modulation branch ------------------------- #
+ # modulation_embeddings = modulation_embeddings.squeeze(1)
+ # scale_factor = 10
+ # temb = temb + modulation_embeddings * scale_factor
+ # ---------------------------------------------------------------------------------------------------------------------------- #
+
+ encoder_hidden_states = self.context_embedder(encoder_hidden_states)
+
+ if txt_ids.ndim == 3:
+ logger.warning(
+ "Passing `txt_ids` 3d torch.Tensor is deprecated."
+ "Please remove the batch dimension and pass it as a 2d torch Tensor"
+ )
+ txt_ids = txt_ids[0]
+ if img_ids.ndim == 3:
+ logger.warning(
+ "Passing `img_ids` 3d torch.Tensor is deprecated."
+ "Please remove the batch dimension and pass it as a 2d torch Tensor"
+ )
+ img_ids = img_ids[0]
+
+ ids = torch.cat((txt_ids, img_ids), dim=0)
+ image_rotary_emb = self.pos_embed(ids)
+
+ if joint_attention_kwargs is not None and "ip_adapter_image_embeds" in joint_attention_kwargs:
+ ip_adapter_image_embeds = joint_attention_kwargs.pop("ip_adapter_image_embeds")
+ ip_hidden_states = self.encoder_hid_proj(ip_adapter_image_embeds)
+ joint_attention_kwargs.update({"ip_hidden_states": ip_hidden_states})
+
+
+ # Iterating over the transformer blocks that process the text and image conditionings separately and then will be combined later
+ for index_block, block in enumerate(self.transformer_blocks):
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
+
+ # TODO: This we have to test and validate later, there is a possibility there is a bug in this and will not work for us.
+ # # copied from syncd codebase for gradient checkpoint with new arugments
+ # def create_custom_forward(module, return_dict=None):
+ # def custom_forward(*inputs):
+ # if return_dict is not None:
+ # return module(*inputs, return_dict=return_dict)
+ # else:
+ # return module(*inputs)
+
+ # return custom_forward
+
+ # new_kwargs = {
+ # "modulation_condn": modulation_condn,
+ # }
+
+ # This line applies gradient checkpointing to the transformer block, allowing for reduced memory usage during training by recomputing intermediate activations in the backward pass.
+ encoder_hidden_states, hidden_states = self._gradient_checkpointing_func(
+ block,
+ hidden_states,
+ encoder_hidden_states,
+ temb,
+ image_rotary_emb,
+ )
+
+ else:
+ # adding the modulation conditioning vector in the separate transformer blocks that will use it for adjusting the features
+ encoder_hidden_states, hidden_states = block(
+ hidden_states=hidden_states,
+ encoder_hidden_states=encoder_hidden_states,
+ temb=temb, # the temb vector is modified and a delta based on the new conditioning is added to it.
+ image_rotary_emb=image_rotary_emb,
+ joint_attention_kwargs=joint_attention_kwargs,
+ # adding the modulation conditioning vector that can control the editing quality
+ modulation_condn = modulation_embeddings, # modulation_embeddings, | Not passing the modulation conditioning as it is already incroported in the temb vector
+ )
+
+ # controlnet residual
+ if controlnet_block_samples is not None:
+ interval_control = len(self.transformer_blocks) / len(controlnet_block_samples)
+ interval_control = int(np.ceil(interval_control))
+ # For Xlabs ControlNet.
+ if controlnet_blocks_repeat:
+ hidden_states = (
+ hidden_states + controlnet_block_samples[index_block % len(controlnet_block_samples)]
+ )
+ else:
+ hidden_states = hidden_states + controlnet_block_samples[index_block // interval_control]
+ hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
+
+ # single stream transformer blocks where the processing is happening in a single pass for the text and the image conditionings.
+ for index_block, block in enumerate(self.single_transformer_blocks):
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
+ hidden_states = self._gradient_checkpointing_func(
+ block,
+ hidden_states,
+ temb,
+ image_rotary_emb,
+ )
+
+ else:
+ hidden_states = block(
+ hidden_states=hidden_states,
+ temb=temb,
+ image_rotary_emb=image_rotary_emb,
+ joint_attention_kwargs=joint_attention_kwargs,
+ )
+
+ # controlnet residual
+ if controlnet_single_block_samples is not None:
+ interval_control = len(self.single_transformer_blocks) / len(controlnet_single_block_samples)
+ interval_control = int(np.ceil(interval_control))
+ hidden_states[:, encoder_hidden_states.shape[1] :, ...] = (
+ hidden_states[:, encoder_hidden_states.shape[1] :, ...]
+ + controlnet_single_block_samples[index_block // interval_control]
+ )
+
+ hidden_states = hidden_states[:, encoder_hidden_states.shape[1] :, ...]
+
+ hidden_states = self.norm_out(hidden_states, temb)
+ output = self.proj_out(hidden_states)
+
+ if USE_PEFT_BACKEND:
+ # remove `lora_scale` from each PEFT layer
+ unscale_lora_layers(self, lora_scale)
+
+ if not return_dict:
+ return (output,)
+
+ return Transformer2DModelOutput(sample=output)
diff --git a/model_weights/.DS_Store b/model_weights/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
Binary files /dev/null and b/model_weights/.DS_Store differ
diff --git a/model_weights/pytorch_lora_weights.safetensors b/model_weights/pytorch_lora_weights.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5bbc92c25564e1404ccf43d87eae95bc633f79f2
--- /dev/null
+++ b/model_weights/pytorch_lora_weights.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5746f688412bbc53ca3eb4042c35021d2dc055f4db8bb36bc8f2368e7ec9ecb1
+size 22505648
diff --git a/model_weights/random_states_0.pkl b/model_weights/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..2a9f7dc87a2ddba24199ee67d16dc593eca2860f
--- /dev/null
+++ b/model_weights/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e391836cc75c0818205e6a36b62a71c6257a78138fb911e2fc7368f419fa870
+size 16513
diff --git a/model_weights/scheduler.bin b/model_weights/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e35c82bfa0da1bdcebfa25ef9c0336cafcc7c398
--- /dev/null
+++ b/model_weights/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48f9175e41bb943ae6b74237893b92279cffc2863e7dc98c14140fdb00cc418f
+size 1465
diff --git a/model_weights/slider_projector.pth b/model_weights/slider_projector.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2215ebf5511f7e2c6db93e88889749328ad7b1b8
--- /dev/null
+++ b/model_weights/slider_projector.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d7ad270dd8ddf2d120ee493171cc94cab3edddfb7d320a53906c8792a2573b8
+size 245425109
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh.png b/sample_images/precomputed/aesthetic_model2_vangogh.png
new file mode 100644
index 0000000000000000000000000000000000000000..8c0f1aca36a759be0db8a1c8b2f2c624ff164194
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40a5c7dcd1eeb2cdcf4a59ea131565746efda7152b6c8965af46a8feff982fcd
+size 1049558
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh/image_0.png b/sample_images/precomputed/aesthetic_model2_vangogh/image_0.png
new file mode 100644
index 0000000000000000000000000000000000000000..ba1469a1782c8b6686f93cced34ecd36f1b09648
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh/image_0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccee067e0c4228689976afc5c5dafcf67656b1556199f31a5c9a4b6f0c255dca
+size 1293741
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh/image_1.png b/sample_images/precomputed/aesthetic_model2_vangogh/image_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..bcd77333ba36130e4ce33ff4b41bc73c1695e188
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh/image_1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70ef8ed0c006e79d61e1f97a1425baf3c4c49437585f5a71a795dd640039b84f
+size 1270093
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh/image_10.png b/sample_images/precomputed/aesthetic_model2_vangogh/image_10.png
new file mode 100644
index 0000000000000000000000000000000000000000..19fdab370671eb1449ba4e437985fd51d1dfefad
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh/image_10.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c09368b75267fa394ac8ec07ae78caded81f2b8d29b070b1f775d0b6ccdae4a
+size 2098698
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh/image_2.png b/sample_images/precomputed/aesthetic_model2_vangogh/image_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..c9b761f6afc0ffd8544b7843121750506423e225
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh/image_2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1406e6561b6636db239db19ae099417d370c7b972edcf5296418f9f083e60a2a
+size 1334873
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh/image_3.png b/sample_images/precomputed/aesthetic_model2_vangogh/image_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..ca8f8c0a1ed7a5d828e0709f4a41c71e29ca3e6a
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh/image_3.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a0c88a983e7fc785c6a5ed956492ffbdbaeca89bd9eed392c7892fb6639cf7c
+size 1386486
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh/image_4.png b/sample_images/precomputed/aesthetic_model2_vangogh/image_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..5e9cf14da84660dc72721030b48acef267f38c6f
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh/image_4.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aed877ac7f9679737ed92b089878b388bd431db83490de48e38b670438616233
+size 1453376
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh/image_5.png b/sample_images/precomputed/aesthetic_model2_vangogh/image_5.png
new file mode 100644
index 0000000000000000000000000000000000000000..194dcc4b61cc52030034dd64cb44bf68286bfb07
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh/image_5.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07f35c6defb0cecffa02ff2050f814128683110f1039fca7df8f4439fcbcaf37
+size 1570743
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh/image_6.png b/sample_images/precomputed/aesthetic_model2_vangogh/image_6.png
new file mode 100644
index 0000000000000000000000000000000000000000..88f24bfb6200bac3e970b50942121ddf59826234
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh/image_6.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75e504622e5ccaa8b955814028659520baa33c1914d6f1d6fba798dd662a8019
+size 1714141
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh/image_7.png b/sample_images/precomputed/aesthetic_model2_vangogh/image_7.png
new file mode 100644
index 0000000000000000000000000000000000000000..46022f6a1059587cf27205431cb432e8a96eac33
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh/image_7.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8a31ebca13426a52b0c4741624b831a5e0ddc876b43ddc1548dd3d9cf12998e
+size 1845286
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh/image_8.png b/sample_images/precomputed/aesthetic_model2_vangogh/image_8.png
new file mode 100644
index 0000000000000000000000000000000000000000..33ba9cf2e0e5da42ca6c01f41209d5246ffad997
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh/image_8.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50f003883e6ae9a33803ec496dc61ce4804fa54b413021f43ef0c73e74003cd7
+size 1949407
diff --git a/sample_images/precomputed/aesthetic_model2_vangogh/image_9.png b/sample_images/precomputed/aesthetic_model2_vangogh/image_9.png
new file mode 100644
index 0000000000000000000000000000000000000000..f5f2a6bb28d863a20c49796671a98234c1ca0057
--- /dev/null
+++ b/sample_images/precomputed/aesthetic_model2_vangogh/image_9.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06430d50fe132b516a1b06b1ce66091d889c2f1cb9c287e5873efa45090fd46c
+size 2039493
diff --git a/sample_images/precomputed/enfield3_winter_snow.png b/sample_images/precomputed/enfield3_winter_snow.png
new file mode 100644
index 0000000000000000000000000000000000000000..5c78896990debc7b04c83588f4dc8f2d83c59aa3
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7da9bb49158c845e6f64175ab0c8be765566110f5db94ed06000366c6070f5cd
+size 1161126
diff --git a/sample_images/precomputed/enfield3_winter_snow/image_0.png b/sample_images/precomputed/enfield3_winter_snow/image_0.png
new file mode 100644
index 0000000000000000000000000000000000000000..f0bd245beb28ed493279607680450e126f64aa99
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow/image_0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:697b1ad79bfc4706ee49bb45bb4aa4931819f696c2386f60cbfdec164d2b53b6
+size 1448616
diff --git a/sample_images/precomputed/enfield3_winter_snow/image_1.png b/sample_images/precomputed/enfield3_winter_snow/image_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..cd162b82a56386a1952719ca5f499722d62ecfe8
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow/image_1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cffee97f44ea7b96c662645941737ab267e2f7bb157632a2b6ab356756a1777e
+size 1210232
diff --git a/sample_images/precomputed/enfield3_winter_snow/image_10.png b/sample_images/precomputed/enfield3_winter_snow/image_10.png
new file mode 100644
index 0000000000000000000000000000000000000000..bb320026edbeac10a1210bdcc19b5035af8c8956
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow/image_10.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6ea9e053852ad2ce9e37918912c6a30495644ee1e2a14ef254b6f9e801cb869
+size 1303491
diff --git a/sample_images/precomputed/enfield3_winter_snow/image_2.png b/sample_images/precomputed/enfield3_winter_snow/image_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1191bf6426750b4ae4e4240b65a115a5a4c0daa6
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow/image_2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8e8347be2460ac636c920e39c23c487d149491aebafae37d07d6a85d1243d3a
+size 1218167
diff --git a/sample_images/precomputed/enfield3_winter_snow/image_3.png b/sample_images/precomputed/enfield3_winter_snow/image_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..89155a4402f8dc3faad9fb1fc1001836326ec6be
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow/image_3.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:868d5fe662b030e8735d31066de70cc4af44b6bb1ff6ed8abaaa23f82c16dca4
+size 1218837
diff --git a/sample_images/precomputed/enfield3_winter_snow/image_4.png b/sample_images/precomputed/enfield3_winter_snow/image_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..9d6f0fe36439c993f9882cba7c4945473880b627
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow/image_4.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da9b5643b1a8b810556108af1aff9bee2f55b076c844fbe9054a51bd213c52b8
+size 1223703
diff --git a/sample_images/precomputed/enfield3_winter_snow/image_5.png b/sample_images/precomputed/enfield3_winter_snow/image_5.png
new file mode 100644
index 0000000000000000000000000000000000000000..e0db5d5e66d40cbc90554193712dad7626e86578
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow/image_5.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3088b90c06640ca9b969eade9d189b5aff41afef8689d4eacaa20e44c3f8223
+size 1225788
diff --git a/sample_images/precomputed/enfield3_winter_snow/image_6.png b/sample_images/precomputed/enfield3_winter_snow/image_6.png
new file mode 100644
index 0000000000000000000000000000000000000000..44d4183187d0054ac56f7f9ab4f4423336c9ee49
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow/image_6.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c038f494d112fade0382addf8182efcfd6e484ab9e00c1b9979a053d1caca5ac
+size 1223506
diff --git a/sample_images/precomputed/enfield3_winter_snow/image_7.png b/sample_images/precomputed/enfield3_winter_snow/image_7.png
new file mode 100644
index 0000000000000000000000000000000000000000..c787e11af5f82f17ffd52cee822457d56d34e50f
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow/image_7.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55c5c842a64e5556cea8f8006f0d206d969e742ccc89eb1cb41004292dba4798
+size 1227574
diff --git a/sample_images/precomputed/enfield3_winter_snow/image_8.png b/sample_images/precomputed/enfield3_winter_snow/image_8.png
new file mode 100644
index 0000000000000000000000000000000000000000..84050c0827fd7f4289465ac63c1051932e463339
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow/image_8.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:761372cb2ac03280c3c928c2bef56a65ebe029eafdd08a8e1b682c566dcce954
+size 1246099
diff --git a/sample_images/precomputed/enfield3_winter_snow/image_9.png b/sample_images/precomputed/enfield3_winter_snow/image_9.png
new file mode 100644
index 0000000000000000000000000000000000000000..aedd41a5e7cbbc0c84bfa2945f347b0fe89468f8
--- /dev/null
+++ b/sample_images/precomputed/enfield3_winter_snow/image_9.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cadb36626fa1354c56218a2cff54a342701388d8378a1d507bfeb892a4b25181
+size 1266261
diff --git a/sample_images/precomputed/jackson_fluffy.png b/sample_images/precomputed/jackson_fluffy.png
new file mode 100644
index 0000000000000000000000000000000000000000..8ecf43e18d91ce30f2cd2c3df1c25d6e349f7149
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4090275dfde46631b7127a84ce1eebc2a62ff17b2be09850984f47756d268cb
+size 2142888
diff --git a/sample_images/precomputed/jackson_fluffy/image_0.png b/sample_images/precomputed/jackson_fluffy/image_0.png
new file mode 100644
index 0000000000000000000000000000000000000000..d14ed6dca4fe6d935902b283eb993215e8496aca
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e902034f6af4a9abf22c4aff5d800be827641466c9c6b409c173cf690ac8254
+size 1461178
diff --git a/sample_images/precomputed/jackson_fluffy/image_1.png b/sample_images/precomputed/jackson_fluffy/image_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..c7ce474afd200319635a8b523bf1a86c2368e980
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6799019b8334d5045a476ecae2b62f0259a92ca667ef48d5a25c85fc68b79fd4
+size 1142821
diff --git a/sample_images/precomputed/jackson_fluffy/image_10.png b/sample_images/precomputed/jackson_fluffy/image_10.png
new file mode 100644
index 0000000000000000000000000000000000000000..f9a4df1320392578ef9f2e443ec40cc7cd65aaa1
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_10.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91677e9150c384413ce86e1881cafc5bf7a0bdc1958bc7e86eed236e12ce5c56
+size 1145084
diff --git a/sample_images/precomputed/jackson_fluffy/image_11.png b/sample_images/precomputed/jackson_fluffy/image_11.png
new file mode 100644
index 0000000000000000000000000000000000000000..89dc39a1bff123a5356df41eaa7b5cea869c6cb4
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_11.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87de94102b04ad5be63afb7fce11c32e65e752c9f09813fdc4f29ae6abcb271a
+size 1149069
diff --git a/sample_images/precomputed/jackson_fluffy/image_2.png b/sample_images/precomputed/jackson_fluffy/image_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..2935c35475d1b01c35386b296cb8d779b618c720
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f6208e7ba3d460ae81f322133f16b3a91a6af0f3e84f5f4892dd68d94f099c1
+size 1104564
diff --git a/sample_images/precomputed/jackson_fluffy/image_3.png b/sample_images/precomputed/jackson_fluffy/image_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..972ddf08854142fe048fb631b5c55f718666c45a
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_3.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57476e1327bed4fa83fba4d7ff0ec71e18ab30e51c1e57f7e410dd025b3f2793
+size 1097191
diff --git a/sample_images/precomputed/jackson_fluffy/image_4.png b/sample_images/precomputed/jackson_fluffy/image_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..049238e60dcfa3692e1cef3f129a1ffe605bb7ac
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_4.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f86db628089393fb4c8822a7b49a6fe9dcd357d8df7159a6939fb59aac9d8184
+size 1109883
diff --git a/sample_images/precomputed/jackson_fluffy/image_5.png b/sample_images/precomputed/jackson_fluffy/image_5.png
new file mode 100644
index 0000000000000000000000000000000000000000..6bdc247c2b993efafa96f5e8481396fe87d1e600
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_5.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ae7fc49b785c36d92f9ac91bd217d020c67ef1bd6712e048cacdcab04c4cd0f
+size 1121700
diff --git a/sample_images/precomputed/jackson_fluffy/image_6.png b/sample_images/precomputed/jackson_fluffy/image_6.png
new file mode 100644
index 0000000000000000000000000000000000000000..14080359f0ac5284773d7becef7d1e6f4cfb2428
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_6.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a02076072483204e12b4596df807c5ba212e65f0b05bfb0aa57c4a9dc203ae2d
+size 1135145
diff --git a/sample_images/precomputed/jackson_fluffy/image_7.png b/sample_images/precomputed/jackson_fluffy/image_7.png
new file mode 100644
index 0000000000000000000000000000000000000000..7986f24bf969d5cfb46e7e5f5258e38f5a991548
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_7.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a854d126f2a9411561dac0376530980cffdd094da3408742985026d0e8a0b8ab
+size 1141165
diff --git a/sample_images/precomputed/jackson_fluffy/image_8.png b/sample_images/precomputed/jackson_fluffy/image_8.png
new file mode 100644
index 0000000000000000000000000000000000000000..158105b219939c42bae0032407e271113a404d7f
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_8.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e658d1c4cf2c9f793c6fc11131792797e360100d5a081362144463b098958d30
+size 1139347
diff --git a/sample_images/precomputed/jackson_fluffy/image_9.png b/sample_images/precomputed/jackson_fluffy/image_9.png
new file mode 100644
index 0000000000000000000000000000000000000000..712443146ce81fe688cb54433c3d084a64ee564f
--- /dev/null
+++ b/sample_images/precomputed/jackson_fluffy/image_9.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a534b23e078db5dfbb80fb7570bb651141f07560f7878efb27b3184b8a177335
+size 1139695
diff --git a/sample_images/precomputed/light_lamp_blue_side.png b/sample_images/precomputed/light_lamp_blue_side.png
new file mode 100644
index 0000000000000000000000000000000000000000..5b6f9af5353464fd89d98ad33424611f23f276ba
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0449f7a095b0ce774de821c9e21894de2542858ee753c7df6141a1cb42ba6569
+size 1598733
diff --git a/sample_images/precomputed/light_lamp_blue_side/image_0.png b/sample_images/precomputed/light_lamp_blue_side/image_0.png
new file mode 100644
index 0000000000000000000000000000000000000000..0e21c2ecfae427cd622e8fbab1536a98b0c5238d
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side/image_0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5fe71d6a88db7fb27bfdceb1e1332c20b18d599d14967a2afa2d9c8a5d26b25
+size 1084281
diff --git a/sample_images/precomputed/light_lamp_blue_side/image_1.png b/sample_images/precomputed/light_lamp_blue_side/image_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3768ac8e11b30d7de9bd74158bb589325a9dba4
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side/image_1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c19b76df23196565bc0ced8e83fdfc1a830659393620fc89946f9efc7fbc99bd
+size 870448
diff --git a/sample_images/precomputed/light_lamp_blue_side/image_10.png b/sample_images/precomputed/light_lamp_blue_side/image_10.png
new file mode 100644
index 0000000000000000000000000000000000000000..6d00eb71c1f647ed39da89e31bedca6f5770359e
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side/image_10.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8305232fd62f2232260becdc68730e17b4422ba7fd46300c46618ca0f5efe43
+size 955732
diff --git a/sample_images/precomputed/light_lamp_blue_side/image_2.png b/sample_images/precomputed/light_lamp_blue_side/image_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..3b387aeaded7f9e96dd56ae4e3fc999562cea0fe
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side/image_2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a90c1382aa4a8384e23bfa8601b52180f62e06267bbb6ff1309400298235ad3
+size 861294
diff --git a/sample_images/precomputed/light_lamp_blue_side/image_3.png b/sample_images/precomputed/light_lamp_blue_side/image_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..09764b63b7dc84ffed902bd10bdf4efd2c047da7
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side/image_3.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50ad6b4a359695cfc7f501bf6e0c310ac146281d1de6d465af01e1c75bafac37
+size 864481
diff --git a/sample_images/precomputed/light_lamp_blue_side/image_4.png b/sample_images/precomputed/light_lamp_blue_side/image_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..2073f71438b941e2b930a028adda02aeba5f3201
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side/image_4.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96b3645b75af60dc54d6490b63dc9dd444696a932756b605cb4151744b87b3ba
+size 883146
diff --git a/sample_images/precomputed/light_lamp_blue_side/image_5.png b/sample_images/precomputed/light_lamp_blue_side/image_5.png
new file mode 100644
index 0000000000000000000000000000000000000000..cd6ed52dec28b360e7567551f610e2d08199de5c
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side/image_5.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fcf5af163c818767bba790ccd78d2ccf721ac29b7897fc55156a8d32635ce53
+size 889717
diff --git a/sample_images/precomputed/light_lamp_blue_side/image_6.png b/sample_images/precomputed/light_lamp_blue_side/image_6.png
new file mode 100644
index 0000000000000000000000000000000000000000..3671c4a558dc8dfbaaa5d296de3955b5ee5cc73c
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side/image_6.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bc480d926b5b6a8abd9e2ede5e711da3bbd59b60669c94bd18c095f7504b1d1
+size 913177
diff --git a/sample_images/precomputed/light_lamp_blue_side/image_7.png b/sample_images/precomputed/light_lamp_blue_side/image_7.png
new file mode 100644
index 0000000000000000000000000000000000000000..ecc0eaf6105def15746ab780f2298cdf2a3ff025
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side/image_7.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c2d9067e420c726b672e16925b9a955c2f18331057f763185c2bc885c876b32
+size 919014
diff --git a/sample_images/precomputed/light_lamp_blue_side/image_8.png b/sample_images/precomputed/light_lamp_blue_side/image_8.png
new file mode 100644
index 0000000000000000000000000000000000000000..ac093a3443000449a781aafdf81d5882926a81cb
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side/image_8.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b836af5065972ee529296ff5497bbadafed86a4da376fd47ac49f97cd091be92
+size 933655
diff --git a/sample_images/precomputed/light_lamp_blue_side/image_9.png b/sample_images/precomputed/light_lamp_blue_side/image_9.png
new file mode 100644
index 0000000000000000000000000000000000000000..90d5c079a8b53c1ef1ddc5d3e01bf2456bf8041c
--- /dev/null
+++ b/sample_images/precomputed/light_lamp_blue_side/image_9.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a65d9f87ded7f75ffd67740ab3c942f279f0da253e088cf20bbf8f3550ebec95
+size 948566
diff --git a/sample_images/precomputed/venice1_grow_ivy.png b/sample_images/precomputed/venice1_grow_ivy.png
new file mode 100644
index 0000000000000000000000000000000000000000..ef83351ee33dd571b076cc01508ea73266ddd21f
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b8532deb19d1f909a2c73bff728509bbe8a6abf01b3935bd2f6319aaa3e02eb
+size 1140618
diff --git a/sample_images/precomputed/venice1_grow_ivy/image_0.png b/sample_images/precomputed/venice1_grow_ivy/image_0.png
new file mode 100644
index 0000000000000000000000000000000000000000..e2e800062b2716c2b15f53327e9277f00b7fa5a3
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy/image_0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e5e259da0133b3c8e96e23e11473e80ab7f3ec5ee16a70b2e6c24f182f78fe8
+size 1315746
diff --git a/sample_images/precomputed/venice1_grow_ivy/image_1.png b/sample_images/precomputed/venice1_grow_ivy/image_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..0b74f6ffc44b10e8f3335612063e756f5fad08a6
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy/image_1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acf86f4e17ac1671e3ed79a09d25666c2d2c2214f42624d9613c44e0dcb2102c
+size 1236434
diff --git a/sample_images/precomputed/venice1_grow_ivy/image_10.png b/sample_images/precomputed/venice1_grow_ivy/image_10.png
new file mode 100644
index 0000000000000000000000000000000000000000..9ebd264aa022fbe526c8820bab44f217e7889764
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy/image_10.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ca26013c799433209eea4e28ea0a8d10390950a47ee0cbdb6ad1b00e77094e3
+size 1410222
diff --git a/sample_images/precomputed/venice1_grow_ivy/image_2.png b/sample_images/precomputed/venice1_grow_ivy/image_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..cce160064298065a21b64f07bf3ad9a87ef3d7ea
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy/image_2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b99e2f0761cb20dbc3191d942b9165dfc621af5e77ebad391f225f7f9c8d962
+size 1248216
diff --git a/sample_images/precomputed/venice1_grow_ivy/image_3.png b/sample_images/precomputed/venice1_grow_ivy/image_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..e1092c9ca8c42358b413015555bb5164d4be8e40
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy/image_3.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e410231d43d792b6f7ff044da4d20f3fc58b2e88b1e7c4967ece76691a78ee40
+size 1252077
diff --git a/sample_images/precomputed/venice1_grow_ivy/image_4.png b/sample_images/precomputed/venice1_grow_ivy/image_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..7246425c6dfedd0fa360518c018a739a4f7e3c41
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy/image_4.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b91e348526fc87f6fa3af9d5bca5d55c6af35e2b8ff812006628ba39173446d
+size 1258831
diff --git a/sample_images/precomputed/venice1_grow_ivy/image_5.png b/sample_images/precomputed/venice1_grow_ivy/image_5.png
new file mode 100644
index 0000000000000000000000000000000000000000..742c8796956edfa76ec11c2204212cc876a48c79
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy/image_5.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb4da7687c5005de54f573ff51482d67d026da25a8bfcfae0afd5e5f115acc99
+size 1263482
diff --git a/sample_images/precomputed/venice1_grow_ivy/image_6.png b/sample_images/precomputed/venice1_grow_ivy/image_6.png
new file mode 100644
index 0000000000000000000000000000000000000000..43d7bcb1d43a96bc898fb3fdc080906fd29ba398
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy/image_6.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed88828a09c92b7536149dbe899a10ab98d069d4d53ff9e10e5e48351540a32c
+size 1275114
diff --git a/sample_images/precomputed/venice1_grow_ivy/image_7.png b/sample_images/precomputed/venice1_grow_ivy/image_7.png
new file mode 100644
index 0000000000000000000000000000000000000000..89eaa9d869b3ca5b0f4cfe6abd40ef03be460792
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy/image_7.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8027759f7974871c9766ba833a8c90bad2610be2a9c7d7e7500633763ad7066a
+size 1298054
diff --git a/sample_images/precomputed/venice1_grow_ivy/image_8.png b/sample_images/precomputed/venice1_grow_ivy/image_8.png
new file mode 100644
index 0000000000000000000000000000000000000000..6b783a95fb5a5712f010e9d8b8879545fe0602b3
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy/image_8.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a78309f8a8429bbce65fb94fdea36d81c23af7fac32faad050e28e3296544f20
+size 1333925
diff --git a/sample_images/precomputed/venice1_grow_ivy/image_9.png b/sample_images/precomputed/venice1_grow_ivy/image_9.png
new file mode 100644
index 0000000000000000000000000000000000000000..56c3bf925563200f1e1955d65fa31bb17d9bd0e2
--- /dev/null
+++ b/sample_images/precomputed/venice1_grow_ivy/image_9.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da8d2f1feb15ac34bef20988e39ccef7ef39478fbeea09ce72090fe1678bb24b
+size 1372347