SahilCarterr commited on
Commit
4b28759
·
verified ·
1 Parent(s): cb0e352

Upload 14 files

Browse files
.gitattributes CHANGED
@@ -42,3 +42,9 @@ docs/static/images/multilingual_samples.png filter=lfs diff=lfs merge=lfs -text
42
  docs/static/images/non-text.jpg filter=lfs diff=lfs merge=lfs -text
43
  docs/static/images/self_custom.jpg filter=lfs diff=lfs merge=lfs -text
44
  docs/static/images/teaser.jpg filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
42
  docs/static/images/non-text.jpg filter=lfs diff=lfs merge=lfs -text
43
  docs/static/images/self_custom.jpg filter=lfs diff=lfs merge=lfs -text
44
  docs/static/images/teaser.jpg filter=lfs diff=lfs merge=lfs -text
45
+ samples/fire.jpg filter=lfs diff=lfs merge=lfs -text
46
+ samples/test11_ref.png filter=lfs diff=lfs merge=lfs -text
47
+ samples/test11_source.png filter=lfs diff=lfs merge=lfs -text
48
+ samples/test17_source.png filter=lfs diff=lfs merge=lfs -text
49
+ samples/test50_ref.png filter=lfs diff=lfs merge=lfs -text
50
+ samples/test50_source.png filter=lfs diff=lfs merge=lfs -text
models/attention_processor.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from diffusers.models.normalization import RMSNorm
5
+ from typing import Optional
6
+
7
+
8
+ class FluxAttnProcessor(nn.Module):
9
+ def __init__(self, hidden_size, cross_attention_dim=None, scale=1.0, num_tokens=4):
10
+ super().__init__()
11
+
12
+ self.hidden_size = hidden_size
13
+ self.cross_attention_dim = cross_attention_dim
14
+ self.scale = scale
15
+ self.num_tokens = num_tokens
16
+
17
+ self.to_k_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False)
18
+ self.to_v_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False)
19
+
20
+ self.norm_added_k = RMSNorm(128, eps=1e-5, elementwise_affine=False)
21
+
22
+ def __call__(
23
+ self,
24
+ attn,
25
+ hidden_states: torch.FloatTensor,
26
+ image_emb: torch.FloatTensor,
27
+ encoder_hidden_states: torch.FloatTensor = None,
28
+ attention_mask: Optional[torch.FloatTensor] = None,
29
+ image_rotary_emb: Optional[torch.Tensor] = None,
30
+ ) -> torch.FloatTensor:
31
+ batch_size, _, _ = hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
32
+
33
+ query = attn.to_q(hidden_states)
34
+ key = attn.to_k(hidden_states)
35
+ value = attn.to_v(hidden_states)
36
+
37
+ inner_dim = key.shape[-1]
38
+ head_dim = inner_dim // attn.heads
39
+
40
+ query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
41
+ key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
42
+ value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
43
+
44
+ if attn.norm_q is not None:
45
+ query = attn.norm_q(query)
46
+ if attn.norm_k is not None:
47
+ key = attn.norm_k(key)
48
+
49
+ if image_emb is not None:
50
+ ip_hidden_states = image_emb
51
+ ip_hidden_states_key_proj = self.to_k_ip(ip_hidden_states)
52
+ ip_hidden_states_value_proj = self.to_v_ip(ip_hidden_states)
53
+
54
+ ip_hidden_states_key_proj = ip_hidden_states_key_proj.view(
55
+ batch_size, -1, attn.heads, head_dim
56
+ ).transpose(1, 2)
57
+ ip_hidden_states_value_proj = ip_hidden_states_value_proj.view(
58
+ batch_size, -1, attn.heads, head_dim
59
+ ).transpose(1, 2)
60
+
61
+ ip_hidden_states_key_proj = self.norm_added_k(ip_hidden_states_key_proj)
62
+
63
+ ip_hidden_states = F.scaled_dot_product_attention(query,
64
+ ip_hidden_states_key_proj,
65
+ ip_hidden_states_value_proj,
66
+ dropout_p=0.0, is_causal=False)
67
+
68
+ ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
69
+ ip_hidden_states = ip_hidden_states.to(query.dtype)
70
+
71
+ if encoder_hidden_states is not None:
72
+ encoder_hidden_states_query_proj = attn.add_q_proj(encoder_hidden_states)
73
+ encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states)
74
+ encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states)
75
+
76
+ encoder_hidden_states_query_proj = encoder_hidden_states_query_proj.view(
77
+ batch_size, -1, attn.heads, head_dim
78
+ ).transpose(1, 2)
79
+ encoder_hidden_states_key_proj = encoder_hidden_states_key_proj.view(
80
+ batch_size, -1, attn.heads, head_dim
81
+ ).transpose(1, 2)
82
+ encoder_hidden_states_value_proj = encoder_hidden_states_value_proj.view(
83
+ batch_size, -1, attn.heads, head_dim
84
+ ).transpose(1, 2)
85
+
86
+ if attn.norm_added_q is not None:
87
+ encoder_hidden_states_query_proj = attn.norm_added_q(encoder_hidden_states_query_proj)
88
+ if attn.norm_added_k is not None:
89
+ encoder_hidden_states_key_proj = attn.norm_added_k(encoder_hidden_states_key_proj)
90
+
91
+ query = torch.cat([encoder_hidden_states_query_proj, query], dim=2)
92
+ key = torch.cat([encoder_hidden_states_key_proj, key], dim=2)
93
+ value = torch.cat([encoder_hidden_states_value_proj, value], dim=2)
94
+
95
+ if image_rotary_emb is not None:
96
+ from diffusers.models.embeddings import apply_rotary_emb
97
+
98
+ query = apply_rotary_emb(query, image_rotary_emb)
99
+ key = apply_rotary_emb(key, image_rotary_emb)
100
+
101
+ hidden_states = F.scaled_dot_product_attention(query, key, value, dropout_p=0.0, is_causal=False)
102
+
103
+ hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
104
+ hidden_states = hidden_states.to(query.dtype)
105
+
106
+ if encoder_hidden_states is not None:
107
+
108
+ encoder_hidden_states, hidden_states = (
109
+ hidden_states[:, : encoder_hidden_states.shape[1]],
110
+ hidden_states[:, encoder_hidden_states.shape[1]:],
111
+ )
112
+ if image_emb is not None:
113
+ hidden_states = hidden_states + self.scale * ip_hidden_states
114
+
115
+ hidden_states = attn.to_out[0](hidden_states)
116
+ hidden_states = attn.to_out[1](hidden_states)
117
+ encoder_hidden_states = attn.to_add_out(encoder_hidden_states)
118
+
119
+ return hidden_states, encoder_hidden_states
120
+ else:
121
+ if image_emb is not None:
122
+ hidden_states = hidden_states + self.scale * ip_hidden_states
123
+
124
+ return hidden_states
models/calligrapher.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import torch
3
+
4
+ from transformers import AutoProcessor, SiglipVisionModel
5
+ from models.projection_models import MLPProjModel, QFormerProjModel
6
+ from models.attention_processor import FluxAttnProcessor
7
+
8
+
9
+ class Calligrapher:
10
+ def __init__(self, sd_pipe, image_encoder_path, calligrapher_path, device, num_tokens):
11
+ self.device = device
12
+ self.image_encoder_path = image_encoder_path
13
+ self.calligrapher_path = calligrapher_path
14
+ self.num_tokens = num_tokens
15
+
16
+ self.pipe = sd_pipe.to(self.device)
17
+ self.set_attn_adapter()
18
+
19
+ self.image_encoder = SiglipVisionModel.from_pretrained(image_encoder_path).to(self.device, dtype=torch.bfloat16)
20
+ self.clip_image_processor = AutoProcessor.from_pretrained(self.image_encoder_path)
21
+ self.image_proj_mlp, self.image_proj_qformer = self.init_proj()
22
+
23
+ self.load_models()
24
+
25
+ def init_proj(self):
26
+ image_proj_mlp = MLPProjModel(
27
+ cross_attention_dim=self.pipe.transformer.config.joint_attention_dim,
28
+ id_embeddings_dim=1152,
29
+ num_tokens=self.num_tokens,
30
+ ).to(self.device, dtype=torch.bfloat16)
31
+
32
+ image_proj_qformer = QFormerProjModel(
33
+ cross_attention_dim=4096,
34
+ id_embeddings_dim=1152,
35
+ num_tokens=self.num_tokens,
36
+ num_heads=8,
37
+ num_query_tokens=32
38
+ ).to(self.device, dtype=torch.bfloat16)
39
+ return image_proj_mlp, image_proj_qformer
40
+
41
+ def set_attn_adapter(self):
42
+ transformer = self.pipe.transformer
43
+ attn_procs = {}
44
+ for name in transformer.attn_processors.keys():
45
+ if name.startswith("transformer_blocks.") or name.startswith("single_transformer_blocks"):
46
+ attn_procs[name] = FluxAttnProcessor(
47
+ hidden_size=transformer.config.num_attention_heads * transformer.config.attention_head_dim,
48
+ cross_attention_dim=transformer.config.joint_attention_dim,
49
+ num_tokens=self.num_tokens,
50
+ ).to(self.device, dtype=torch.bfloat16)
51
+ else:
52
+ attn_procs[name] = transformer.attn_processors[name]
53
+ transformer.set_attn_processor(attn_procs)
54
+
55
+ def load_models(self):
56
+ state_dict = torch.load(self.calligrapher_path, map_location="cpu")
57
+ self.image_proj_mlp.load_state_dict(state_dict["image_proj_mlp"], strict=True)
58
+ self.image_proj_qformer.load_state_dict(state_dict["image_proj_qformer"], strict=True)
59
+ target_layers = torch.nn.ModuleList(self.pipe.transformer.attn_processors.values())
60
+ target_layers.load_state_dict(state_dict["attn_adapter"], strict=False)
61
+
62
+ @torch.inference_mode()
63
+ def get_image_embeds(self, pil_image=None, clip_image_embeds=None):
64
+ if pil_image is not None:
65
+ if isinstance(pil_image, Image.Image):
66
+ pil_image = [pil_image]
67
+ clip_image = self.clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
68
+ clip_image_embeds = self.image_encoder(
69
+ clip_image.to(self.device, dtype=self.image_encoder.dtype)).pooler_output
70
+ clip_image_embeds = clip_image_embeds.to(dtype=torch.bfloat16)
71
+ else:
72
+ clip_image_embeds = clip_image_embeds.to(self.device, dtype=torch.bfloat16)
73
+ image_prompt_embeds = self.image_proj_mlp(clip_image_embeds) \
74
+ + self.image_proj_qformer(clip_image_embeds)
75
+ return image_prompt_embeds
76
+
77
+ def set_scale(self, scale):
78
+ for attn_processor in self.pipe.transformer.attn_processors.values():
79
+ if isinstance(attn_processor, FluxAttnProcessor):
80
+ attn_processor.scale = scale
81
+
82
+ def generate(
83
+ self,
84
+ image=None,
85
+ mask_image=None,
86
+ ref_image=None,
87
+ clip_image_embeds=None,
88
+ prompt=None,
89
+ scale=1.0,
90
+ seed=None,
91
+ num_inference_steps=30,
92
+ **kwargs,
93
+ ):
94
+ self.set_scale(scale)
95
+
96
+ image_prompt_embeds = self.get_image_embeds(
97
+ pil_image=ref_image, clip_image_embeds=clip_image_embeds
98
+ )
99
+
100
+ if seed is None:
101
+ generator = None
102
+ else:
103
+ generator = torch.Generator(self.device).manual_seed(seed)
104
+
105
+ images = self.pipe(
106
+ image=image,
107
+ mask_image=mask_image,
108
+ prompt=prompt,
109
+ image_emb=image_prompt_embeds,
110
+ num_inference_steps=num_inference_steps,
111
+ generator=generator,
112
+ **kwargs,
113
+ ).images
114
+
115
+ return images
models/projection_models.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+
5
+ class QFormerProjModel(nn.Module):
6
+ def __init__(self,
7
+ cross_attention_dim=4096,
8
+ id_embeddings_dim=1152,
9
+ num_tokens=128,
10
+ num_heads=8,
11
+ num_query_tokens=32):
12
+ super().__init__()
13
+ self.cross_attention_dim = cross_attention_dim
14
+ self.num_tokens = num_tokens
15
+
16
+ self.query_embeds = nn.Parameter(torch.randn(num_tokens, cross_attention_dim))
17
+
18
+ self.id_proj = nn.Sequential(
19
+ nn.Linear(id_embeddings_dim, id_embeddings_dim * 2),
20
+ nn.GELU(),
21
+ nn.Linear(id_embeddings_dim * 2, cross_attention_dim * num_query_tokens)
22
+ )
23
+
24
+ self.cross_attn = nn.MultiheadAttention(
25
+ embed_dim=cross_attention_dim,
26
+ num_heads=num_heads,
27
+ batch_first=True
28
+ )
29
+ self.cross_attn_norm = nn.LayerNorm(cross_attention_dim)
30
+
31
+ self.norm = nn.LayerNorm(cross_attention_dim)
32
+
33
+ def forward(self, id_embeds):
34
+ batch_size = id_embeds.size(0)
35
+
36
+ projected = self.id_proj(id_embeds)
37
+ kv = projected.view(batch_size, -1, self.cross_attention_dim)
38
+
39
+ queries = self.query_embeds.unsqueeze(0).expand(batch_size, -1, -1)
40
+
41
+ attn_output, _ = self.cross_attn(
42
+ query=queries,
43
+ key=kv,
44
+ value=kv
45
+ )
46
+ attn_output = self.cross_attn_norm(attn_output + queries)
47
+
48
+ return self.norm(attn_output)
49
+
50
+
51
+ class MLPProjModel(torch.nn.Module):
52
+ def __init__(self,
53
+ cross_attention_dim=768,
54
+ id_embeddings_dim=512,
55
+ num_tokens=4):
56
+ super().__init__()
57
+ self.cross_attention_dim = cross_attention_dim
58
+ self.num_tokens = num_tokens
59
+
60
+ self.proj = torch.nn.Sequential(
61
+ torch.nn.Linear(id_embeddings_dim, id_embeddings_dim * 2),
62
+ torch.nn.GELU(),
63
+ torch.nn.Linear(id_embeddings_dim * 2, cross_attention_dim * num_tokens),
64
+ )
65
+ self.norm = torch.nn.LayerNorm(cross_attention_dim)
66
+
67
+ def forward(self, id_embeds):
68
+ x = self.proj(id_embeds)
69
+ x = x.reshape(-1, self.num_tokens, self.cross_attention_dim)
70
+ x = self.norm(x)
71
+ return x
models/transformer_flux_inpainting.py ADDED
@@ -0,0 +1,624 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from typing import Any, Dict, Optional, Tuple, Union
17
+
18
+ import numpy as np
19
+ import torch
20
+ import torch.nn as nn
21
+ import torch.nn.functional as F
22
+
23
+ from diffusers.configuration_utils import ConfigMixin, register_to_config
24
+ from diffusers.loaders import FluxTransformer2DLoadersMixin, FromOriginalModelMixin, PeftAdapterMixin
25
+ from diffusers.models.attention import FeedForward
26
+ from diffusers.models.attention_processor import (
27
+ Attention,
28
+ AttentionProcessor,
29
+ FluxAttnProcessor2_0,
30
+ FluxAttnProcessor2_0_NPU,
31
+ FusedFluxAttnProcessor2_0,
32
+ )
33
+ from diffusers.models.modeling_utils import ModelMixin
34
+ from diffusers.models.normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle
35
+ from diffusers.utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers
36
+ from diffusers.utils.import_utils import is_torch_npu_available
37
+ from diffusers.utils.torch_utils import maybe_allow_in_graph
38
+ from diffusers.models.embeddings import CombinedTimestepGuidanceTextProjEmbeddings, CombinedTimestepTextProjEmbeddings, FluxPosEmbed
39
+ from diffusers.models.modeling_outputs import Transformer2DModelOutput
40
+
41
+
42
+ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
43
+
44
+
45
+ @maybe_allow_in_graph
46
+ class FluxSingleTransformerBlock(nn.Module):
47
+ r"""
48
+ A Transformer block following the MMDiT architecture, introduced in Stable Diffusion 3.
49
+
50
+ Reference: https://arxiv.org/abs/2403.03206
51
+
52
+ Parameters:
53
+ dim (`int`): The number of channels in the input and output.
54
+ num_attention_heads (`int`): The number of heads to use for multi-head attention.
55
+ attention_head_dim (`int`): The number of channels in each head.
56
+ context_pre_only (`bool`): Boolean to determine if we should add some blocks associated with the
57
+ processing of `context` conditions.
58
+ """
59
+
60
+ def __init__(self, dim, num_attention_heads, attention_head_dim, mlp_ratio=4.0):
61
+ super().__init__()
62
+ self.mlp_hidden_dim = int(dim * mlp_ratio)
63
+
64
+ self.norm = AdaLayerNormZeroSingle(dim)
65
+ self.proj_mlp = nn.Linear(dim, self.mlp_hidden_dim)
66
+ self.act_mlp = nn.GELU(approximate="tanh")
67
+ self.proj_out = nn.Linear(dim + self.mlp_hidden_dim, dim)
68
+
69
+ if is_torch_npu_available():
70
+ processor = FluxAttnProcessor2_0_NPU()
71
+ else:
72
+ processor = FluxAttnProcessor2_0()
73
+ self.attn = Attention(
74
+ query_dim=dim,
75
+ cross_attention_dim=None,
76
+ dim_head=attention_head_dim,
77
+ heads=num_attention_heads,
78
+ out_dim=dim,
79
+ bias=True,
80
+ processor=processor,
81
+ qk_norm="rms_norm",
82
+ eps=1e-6,
83
+ pre_only=True,
84
+ )
85
+
86
+ def forward(
87
+ self,
88
+ hidden_states: torch.Tensor,
89
+ temb: torch.Tensor,
90
+ image_emb=None,
91
+ image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
92
+ joint_attention_kwargs: Optional[Dict[str, Any]] = None,
93
+ ) -> torch.Tensor:
94
+ residual = hidden_states
95
+ norm_hidden_states, gate = self.norm(hidden_states, emb=temb)
96
+ mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states))
97
+ joint_attention_kwargs = joint_attention_kwargs or {}
98
+ attn_output = self.attn(
99
+ hidden_states=norm_hidden_states,
100
+ image_rotary_emb=image_rotary_emb,
101
+ image_emb=image_emb,
102
+ **joint_attention_kwargs,
103
+ )
104
+
105
+ hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2)
106
+ gate = gate.unsqueeze(1)
107
+ hidden_states = gate * self.proj_out(hidden_states)
108
+ hidden_states = residual + hidden_states
109
+ if hidden_states.dtype == torch.float16:
110
+ hidden_states = hidden_states.clip(-65504, 65504)
111
+
112
+ return hidden_states
113
+
114
+
115
+ @maybe_allow_in_graph
116
+ class FluxTransformerBlock(nn.Module):
117
+ r"""
118
+ A Transformer block following the MMDiT architecture, introduced in Stable Diffusion 3.
119
+
120
+ Reference: https://arxiv.org/abs/2403.03206
121
+
122
+ Args:
123
+ dim (`int`):
124
+ The embedding dimension of the block.
125
+ num_attention_heads (`int`):
126
+ The number of attention heads to use.
127
+ attention_head_dim (`int`):
128
+ The number of dimensions to use for each attention head.
129
+ qk_norm (`str`, defaults to `"rms_norm"`):
130
+ The normalization to use for the query and key tensors.
131
+ eps (`float`, defaults to `1e-6`):
132
+ The epsilon value to use for the normalization.
133
+ """
134
+
135
+ def __init__(
136
+ self, dim: int, num_attention_heads: int, attention_head_dim: int, qk_norm: str = "rms_norm", eps: float = 1e-6
137
+ ):
138
+ super().__init__()
139
+
140
+ self.norm1 = AdaLayerNormZero(dim)
141
+
142
+ self.norm1_context = AdaLayerNormZero(dim)
143
+
144
+ if hasattr(F, "scaled_dot_product_attention"):
145
+ processor = FluxAttnProcessor2_0()
146
+ else:
147
+ raise ValueError(
148
+ "The current PyTorch version does not support the `scaled_dot_product_attention` function."
149
+ )
150
+ self.attn = Attention(
151
+ query_dim=dim,
152
+ cross_attention_dim=None,
153
+ added_kv_proj_dim=dim,
154
+ dim_head=attention_head_dim,
155
+ heads=num_attention_heads,
156
+ out_dim=dim,
157
+ context_pre_only=False,
158
+ bias=True,
159
+ processor=processor,
160
+ qk_norm=qk_norm,
161
+ eps=eps,
162
+ )
163
+
164
+ self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6)
165
+ self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate")
166
+
167
+ self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6)
168
+ self.ff_context = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate")
169
+
170
+ # let chunk size default to None
171
+ self._chunk_size = None
172
+ self._chunk_dim = 0
173
+
174
+ def forward(
175
+ self,
176
+ hidden_states: torch.Tensor,
177
+ encoder_hidden_states: torch.Tensor,
178
+ temb: torch.Tensor,
179
+ image_emb=None,
180
+ image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
181
+ joint_attention_kwargs: Optional[Dict[str, Any]] = None,
182
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
183
+ norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb)
184
+
185
+ norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context(
186
+ encoder_hidden_states, emb=temb
187
+ )
188
+ joint_attention_kwargs = joint_attention_kwargs or {}
189
+ # Attention.
190
+ attention_outputs = self.attn(
191
+ hidden_states=norm_hidden_states,
192
+ encoder_hidden_states=norm_encoder_hidden_states,
193
+ image_rotary_emb=image_rotary_emb,
194
+ image_emb=image_emb,
195
+ **joint_attention_kwargs,
196
+ )
197
+
198
+ if len(attention_outputs) == 2:
199
+ attn_output, context_attn_output = attention_outputs
200
+ elif len(attention_outputs) == 3:
201
+ attn_output, context_attn_output, ip_attn_output = attention_outputs
202
+
203
+ # Process attention outputs for the `hidden_states`.
204
+ attn_output = gate_msa.unsqueeze(1) * attn_output
205
+ hidden_states = hidden_states + attn_output
206
+
207
+ norm_hidden_states = self.norm2(hidden_states)
208
+ norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
209
+
210
+ ff_output = self.ff(norm_hidden_states)
211
+ ff_output = gate_mlp.unsqueeze(1) * ff_output
212
+
213
+ hidden_states = hidden_states + ff_output
214
+ if len(attention_outputs) == 3:
215
+ hidden_states = hidden_states + ip_attn_output
216
+
217
+ # Process attention outputs for the `encoder_hidden_states`.
218
+
219
+ context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output
220
+ encoder_hidden_states = encoder_hidden_states + context_attn_output
221
+
222
+ norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states)
223
+ norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None]
224
+
225
+ context_ff_output = self.ff_context(norm_encoder_hidden_states)
226
+ encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output
227
+ if encoder_hidden_states.dtype == torch.float16:
228
+ encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504)
229
+
230
+ return encoder_hidden_states, hidden_states
231
+
232
+
233
+ class FluxTransformer2DModel(
234
+ ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, FluxTransformer2DLoadersMixin
235
+ ):
236
+ """
237
+ The Transformer model introduced in Flux.
238
+
239
+ Reference: https://blackforestlabs.ai/announcing-black-forest-labs/
240
+
241
+ Args:
242
+ patch_size (`int`, defaults to `1`):
243
+ Patch size to turn the input data into small patches.
244
+ in_channels (`int`, defaults to `64`):
245
+ The number of channels in the input.
246
+ out_channels (`int`, *optional*, defaults to `None`):
247
+ The number of channels in the output. If not specified, it defaults to `in_channels`.
248
+ num_layers (`int`, defaults to `19`):
249
+ The number of layers of dual stream DiT blocks to use.
250
+ num_single_layers (`int`, defaults to `38`):
251
+ The number of layers of single stream DiT blocks to use.
252
+ attention_head_dim (`int`, defaults to `128`):
253
+ The number of dimensions to use for each attention head.
254
+ num_attention_heads (`int`, defaults to `24`):
255
+ The number of attention heads to use.
256
+ joint_attention_dim (`int`, defaults to `4096`):
257
+ The number of dimensions to use for the joint attention (embedding/channel dimension of
258
+ `encoder_hidden_states`).
259
+ pooled_projection_dim (`int`, defaults to `768`):
260
+ The number of dimensions to use for the pooled projection.
261
+ guidance_embeds (`bool`, defaults to `False`):
262
+ Whether to use guidance embeddings for guidance-distilled variant of the model.
263
+ axes_dims_rope (`Tuple[int]`, defaults to `(16, 56, 56)`):
264
+ The dimensions to use for the rotary positional embeddings.
265
+ """
266
+
267
+ _supports_gradient_checkpointing = True
268
+ _no_split_modules = ["FluxTransformerBlock", "FluxSingleTransformerBlock"]
269
+
270
+ @register_to_config
271
+ def __init__(
272
+ self,
273
+ patch_size: int = 1,
274
+ in_channels: int = 64,
275
+ out_channels: Optional[int] = None,
276
+ num_layers: int = 19,
277
+ num_single_layers: int = 38,
278
+ attention_head_dim: int = 128,
279
+ num_attention_heads: int = 24,
280
+ joint_attention_dim: int = 4096,
281
+ pooled_projection_dim: int = 768,
282
+ guidance_embeds: bool = False,
283
+ axes_dims_rope: Tuple[int] = (16, 56, 56),
284
+ ):
285
+ super().__init__()
286
+ self.out_channels = out_channels or in_channels
287
+ self.inner_dim = num_attention_heads * attention_head_dim
288
+
289
+ self.pos_embed = FluxPosEmbed(theta=10000, axes_dim=axes_dims_rope)
290
+
291
+ text_time_guidance_cls = (
292
+ CombinedTimestepGuidanceTextProjEmbeddings if guidance_embeds else CombinedTimestepTextProjEmbeddings
293
+ )
294
+ self.time_text_embed = text_time_guidance_cls(
295
+ embedding_dim=self.inner_dim, pooled_projection_dim=pooled_projection_dim
296
+ )
297
+
298
+ self.context_embedder = nn.Linear(joint_attention_dim, self.inner_dim)
299
+ self.x_embedder = nn.Linear(in_channels, self.inner_dim)
300
+
301
+ self.transformer_blocks = nn.ModuleList(
302
+ [
303
+ FluxTransformerBlock(
304
+ dim=self.inner_dim,
305
+ num_attention_heads=num_attention_heads,
306
+ attention_head_dim=attention_head_dim,
307
+ )
308
+ for _ in range(num_layers)
309
+ ]
310
+ )
311
+
312
+ self.single_transformer_blocks = nn.ModuleList(
313
+ [
314
+ FluxSingleTransformerBlock(
315
+ dim=self.inner_dim,
316
+ num_attention_heads=num_attention_heads,
317
+ attention_head_dim=attention_head_dim,
318
+ )
319
+ for _ in range(num_single_layers)
320
+ ]
321
+ )
322
+
323
+ self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6)
324
+ self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True)
325
+
326
+ self.gradient_checkpointing = False
327
+
328
+ @property
329
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.attn_processors
330
+ def attn_processors(self) -> Dict[str, AttentionProcessor]:
331
+ r"""
332
+ Returns:
333
+ `dict` of attention processors: A dictionary containing all attention processors used in the model with
334
+ indexed by its weight name.
335
+ """
336
+ # set recursively
337
+ processors = {}
338
+
339
+ def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
340
+ if hasattr(module, "get_processor"):
341
+ processors[f"{name}.processor"] = module.get_processor()
342
+
343
+ for sub_name, child in module.named_children():
344
+ fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
345
+
346
+ return processors
347
+
348
+ for name, module in self.named_children():
349
+ fn_recursive_add_processors(name, module, processors)
350
+
351
+ return processors
352
+
353
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_attn_processor
354
+ def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
355
+ r"""
356
+ Sets the attention processor to use to compute attention.
357
+
358
+ Parameters:
359
+ processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
360
+ The instantiated processor class or a dictionary of processor classes that will be set as the processor
361
+ for **all** `Attention` layers.
362
+
363
+ If `processor` is a dict, the key needs to define the path to the corresponding cross attention
364
+ processor. This is strongly recommended when setting trainable attention processors.
365
+
366
+ """
367
+ count = len(self.attn_processors.keys())
368
+
369
+ if isinstance(processor, dict) and len(processor) != count:
370
+ raise ValueError(
371
+ f"A dict of processors was passed, but the number of processors {len(processor)} does not match the"
372
+ f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
373
+ )
374
+
375
+ def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
376
+ if hasattr(module, "set_processor"):
377
+ if not isinstance(processor, dict):
378
+ module.set_processor(processor)
379
+ else:
380
+ module.set_processor(processor.pop(f"{name}.processor"))
381
+
382
+ for sub_name, child in module.named_children():
383
+ fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
384
+
385
+ for name, module in self.named_children():
386
+ fn_recursive_attn_processor(name, module, processor)
387
+
388
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections with FusedAttnProcessor2_0->FusedFluxAttnProcessor2_0
389
+ def fuse_qkv_projections(self):
390
+ """
391
+ Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value)
392
+ are fused. For cross-attention modules, key and value projection matrices are fused.
393
+
394
+ <Tip warning={true}>
395
+
396
+ This API is 🧪 experimental.
397
+
398
+ </Tip>
399
+ """
400
+ self.original_attn_processors = None
401
+
402
+ for _, attn_processor in self.attn_processors.items():
403
+ if "Added" in str(attn_processor.__class__.__name__):
404
+ raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.")
405
+
406
+ self.original_attn_processors = self.attn_processors
407
+
408
+ for module in self.modules():
409
+ if isinstance(module, Attention):
410
+ module.fuse_projections(fuse=True)
411
+
412
+ self.set_attn_processor(FusedFluxAttnProcessor2_0())
413
+
414
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
415
+ def unfuse_qkv_projections(self):
416
+ """Disables the fused QKV projection if enabled.
417
+
418
+ <Tip warning={true}>
419
+
420
+ This API is 🧪 experimental.
421
+
422
+ </Tip>
423
+
424
+ """
425
+ if self.original_attn_processors is not None:
426
+ self.set_attn_processor(self.original_attn_processors)
427
+
428
+ def _set_gradient_checkpointing(self, module, value=False):
429
+ if hasattr(module, "gradient_checkpointing"):
430
+ module.gradient_checkpointing = value
431
+
432
+ def forward(
433
+ self,
434
+ hidden_states: torch.Tensor,
435
+ encoder_hidden_states: torch.Tensor = None,
436
+ image_emb: torch.FloatTensor = None,
437
+ pooled_projections: torch.Tensor = None,
438
+ timestep: torch.LongTensor = None,
439
+ img_ids: torch.Tensor = None,
440
+ txt_ids: torch.Tensor = None,
441
+ guidance: torch.Tensor = None,
442
+ joint_attention_kwargs: Optional[Dict[str, Any]] = None,
443
+ controlnet_block_samples=None,
444
+ controlnet_single_block_samples=None,
445
+ return_dict: bool = True,
446
+ controlnet_blocks_repeat: bool = False,
447
+ ) -> Union[torch.Tensor, Transformer2DModelOutput]:
448
+ """
449
+ The [`FluxTransformer2DModel`] forward method.
450
+
451
+ Args:
452
+ hidden_states (`torch.Tensor` of shape `(batch_size, image_sequence_length, in_channels)`):
453
+ Input `hidden_states`.
454
+ encoder_hidden_states (`torch.Tensor` of shape `(batch_size, text_sequence_length, joint_attention_dim)`):
455
+ Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
456
+ pooled_projections (`torch.Tensor` of shape `(batch_size, projection_dim)`): Embeddings projected
457
+ from the embeddings of input conditions.
458
+ timestep ( `torch.LongTensor`):
459
+ Used to indicate denoising step.
460
+ block_controlnet_hidden_states: (`list` of `torch.Tensor`):
461
+ A list of tensors that if specified are added to the residuals of transformer blocks.
462
+ joint_attention_kwargs (`dict`, *optional*):
463
+ A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
464
+ `self.processor` in
465
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
466
+ return_dict (`bool`, *optional*, defaults to `True`):
467
+ Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
468
+ tuple.
469
+
470
+ Returns:
471
+ If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
472
+ `tuple` where the first element is the sample tensor.
473
+ """
474
+
475
+ if joint_attention_kwargs is not None:
476
+ joint_attention_kwargs = joint_attention_kwargs.copy()
477
+ lora_scale = joint_attention_kwargs.pop("scale", 1.0)
478
+ else:
479
+ lora_scale = 1.0
480
+
481
+ if USE_PEFT_BACKEND:
482
+ # weight the lora layers by setting `lora_scale` for each PEFT layer
483
+ scale_lora_layers(self, lora_scale)
484
+ else:
485
+ if joint_attention_kwargs is not None and joint_attention_kwargs.get("scale", None) is not None:
486
+ logger.warning(
487
+ "Passing `scale` via `joint_attention_kwargs` when not using the PEFT backend is ineffective."
488
+ )
489
+
490
+ hidden_states = self.x_embedder(hidden_states)
491
+
492
+ timestep = timestep.to(hidden_states.dtype) * 1000
493
+ if guidance is not None:
494
+ guidance = guidance.to(hidden_states.dtype) * 1000
495
+ else:
496
+ guidance = None
497
+
498
+ temb = (
499
+ self.time_text_embed(timestep, pooled_projections)
500
+ if guidance is None
501
+ else self.time_text_embed(timestep, guidance, pooled_projections)
502
+ )
503
+ encoder_hidden_states = self.context_embedder(encoder_hidden_states)
504
+
505
+ if txt_ids.ndim == 3:
506
+ logger.warning(
507
+ "Passing `txt_ids` 3d torch.Tensor is deprecated."
508
+ "Please remove the batch dimension and pass it as a 2d torch Tensor"
509
+ )
510
+ txt_ids = txt_ids[0]
511
+ if img_ids.ndim == 3:
512
+ logger.warning(
513
+ "Passing `img_ids` 3d torch.Tensor is deprecated."
514
+ "Please remove the batch dimension and pass it as a 2d torch Tensor"
515
+ )
516
+ img_ids = img_ids[0]
517
+
518
+ ids = torch.cat((txt_ids, img_ids), dim=0)
519
+ image_rotary_emb = self.pos_embed(ids)
520
+
521
+ if joint_attention_kwargs is not None and "ip_adapter_image_embeds" in joint_attention_kwargs:
522
+ ip_adapter_image_embeds = joint_attention_kwargs.pop("ip_adapter_image_embeds")
523
+ ip_hidden_states = self.encoder_hid_proj(ip_adapter_image_embeds)
524
+ joint_attention_kwargs.update({"ip_hidden_states": ip_hidden_states})
525
+
526
+ for index_block, block in enumerate(self.transformer_blocks):
527
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
528
+
529
+ def create_custom_forward(module, return_dict=None):
530
+ def custom_forward(*inputs):
531
+ if return_dict is not None:
532
+ return module(*inputs, return_dict=return_dict)
533
+ else:
534
+ return module(*inputs)
535
+
536
+ return custom_forward
537
+
538
+ ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
539
+ encoder_hidden_states, hidden_states = torch.utils.checkpoint.checkpoint(
540
+ create_custom_forward(block),
541
+ hidden_states,
542
+ encoder_hidden_states,
543
+ temb,
544
+ image_emb,
545
+ image_rotary_emb,
546
+ **ckpt_kwargs,
547
+ )
548
+
549
+ else:
550
+ encoder_hidden_states, hidden_states = block(
551
+ hidden_states=hidden_states,
552
+ encoder_hidden_states=encoder_hidden_states,
553
+ temb=temb,
554
+ image_emb=image_emb,
555
+ image_rotary_emb=image_rotary_emb,
556
+ joint_attention_kwargs=joint_attention_kwargs,
557
+ )
558
+
559
+ # controlnet residual
560
+ if controlnet_block_samples is not None:
561
+ interval_control = len(self.transformer_blocks) / len(controlnet_block_samples)
562
+ interval_control = int(np.ceil(interval_control))
563
+ # For Xlabs ControlNet.
564
+ if controlnet_blocks_repeat:
565
+ hidden_states = (
566
+ hidden_states + controlnet_block_samples[index_block % len(controlnet_block_samples)]
567
+ )
568
+ else:
569
+ hidden_states = hidden_states + controlnet_block_samples[index_block // interval_control]
570
+ hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
571
+
572
+ for index_block, block in enumerate(self.single_transformer_blocks):
573
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
574
+
575
+ def create_custom_forward(module, return_dict=None):
576
+ def custom_forward(*inputs):
577
+ if return_dict is not None:
578
+ return module(*inputs, return_dict=return_dict)
579
+ else:
580
+ return module(*inputs)
581
+
582
+ return custom_forward
583
+
584
+ ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
585
+ hidden_states = torch.utils.checkpoint.checkpoint(
586
+ create_custom_forward(block),
587
+ hidden_states,
588
+ temb,
589
+ image_emb,
590
+ image_rotary_emb,
591
+ **ckpt_kwargs,
592
+ )
593
+
594
+ else:
595
+ hidden_states = block(
596
+ hidden_states=hidden_states,
597
+ temb=temb,
598
+ image_emb=image_emb,
599
+ image_rotary_emb=image_rotary_emb,
600
+ joint_attention_kwargs=joint_attention_kwargs,
601
+ )
602
+
603
+ # controlnet residual
604
+ if controlnet_single_block_samples is not None:
605
+ interval_control = len(self.single_transformer_blocks) / len(controlnet_single_block_samples)
606
+ interval_control = int(np.ceil(interval_control))
607
+ hidden_states[:, encoder_hidden_states.shape[1] :, ...] = (
608
+ hidden_states[:, encoder_hidden_states.shape[1] :, ...]
609
+ + controlnet_single_block_samples[index_block // interval_control]
610
+ )
611
+
612
+ hidden_states = hidden_states[:, encoder_hidden_states.shape[1] :, ...]
613
+
614
+ hidden_states = self.norm_out(hidden_states, temb)
615
+ output = self.proj_out(hidden_states)
616
+
617
+ if USE_PEFT_BACKEND:
618
+ # remove `lora_scale` from each PEFT layer
619
+ unscale_lora_layers(self, lora_scale)
620
+
621
+ if not return_dict:
622
+ return (output,)
623
+
624
+ return Transformer2DModelOutput(sample=output)
samples/fire.jpg ADDED

Git LFS Details

  • SHA256: 929575080c872917f41e44a19a1be7ab8df014c365b4f5ef522ab2a9c9195bf6
  • Pointer size: 131 Bytes
  • Size of remote file: 289 kB
samples/rainbow.jpg ADDED
samples/test11_mask.png ADDED
samples/test11_ref.png ADDED

Git LFS Details

  • SHA256: ebee924ebe7410553bec8407de5ffab3b48ca87d41a8218571e49c423dff83ef
  • Pointer size: 131 Bytes
  • Size of remote file: 284 kB
samples/test11_source.png ADDED

Git LFS Details

  • SHA256: 8d3aa1c3d75b18e2dbb6a775bffd370ab95c2e7ba95aec19961965dd082126d4
  • Pointer size: 132 Bytes
  • Size of remote file: 1.13 MB
samples/test17_mask.png ADDED
samples/test17_source.png ADDED

Git LFS Details

  • SHA256: a3a6821cda62b24c161f228c9070ae2a7253fd438dcc0a773df8854dd72983b3
  • Pointer size: 132 Bytes
  • Size of remote file: 1.12 MB
samples/test50_mask.png ADDED
samples/test50_ref.png ADDED

Git LFS Details

  • SHA256: 5a11e6c201ed71e837c79377d8f753a4eb7e0ea010803b147d6a872cdd6bc23a
  • Pointer size: 131 Bytes
  • Size of remote file: 185 kB
samples/test50_source.png ADDED

Git LFS Details

  • SHA256: 4c476c140ac37f4b88a3a461e4cd5a7eb8ff7ac71003a94cfc9d1989b9106bb5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.16 MB