Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
a194a28
1
Parent(s):
f40a554
modified: src/flux/model.py
Browse files- src/flux/__pycache__/__init__.cpython-310.pyc +0 -0
- src/flux/__pycache__/_version.cpython-310.pyc +0 -0
- src/flux/__pycache__/math.cpython-310.pyc +0 -0
- src/flux/__pycache__/model.cpython-310.pyc +0 -0
- src/flux/__pycache__/sampling.cpython-310.pyc +0 -0
- src/flux/__pycache__/util.cpython-310.pyc +0 -0
- src/flux/model.py +8 -0
- src/flux/modules/__pycache__/autoencoder.cpython-310.pyc +0 -0
- src/flux/modules/__pycache__/conditioner.cpython-310.pyc +0 -0
- src/flux/modules/__pycache__/layers.cpython-310.pyc +0 -0
- src/flux/modules/layers.py +0 -11
src/flux/__pycache__/__init__.cpython-310.pyc
CHANGED
|
Binary files a/src/flux/__pycache__/__init__.cpython-310.pyc and b/src/flux/__pycache__/__init__.cpython-310.pyc differ
|
|
|
src/flux/__pycache__/_version.cpython-310.pyc
CHANGED
|
Binary files a/src/flux/__pycache__/_version.cpython-310.pyc and b/src/flux/__pycache__/_version.cpython-310.pyc differ
|
|
|
src/flux/__pycache__/math.cpython-310.pyc
CHANGED
|
Binary files a/src/flux/__pycache__/math.cpython-310.pyc and b/src/flux/__pycache__/math.cpython-310.pyc differ
|
|
|
src/flux/__pycache__/model.cpython-310.pyc
CHANGED
|
Binary files a/src/flux/__pycache__/model.cpython-310.pyc and b/src/flux/__pycache__/model.cpython-310.pyc differ
|
|
|
src/flux/__pycache__/sampling.cpython-310.pyc
CHANGED
|
Binary files a/src/flux/__pycache__/sampling.cpython-310.pyc and b/src/flux/__pycache__/sampling.cpython-310.pyc differ
|
|
|
src/flux/__pycache__/util.cpython-310.pyc
CHANGED
|
Binary files a/src/flux/__pycache__/util.cpython-310.pyc and b/src/flux/__pycache__/util.cpython-310.pyc differ
|
|
|
src/flux/model.py
CHANGED
|
@@ -90,6 +90,10 @@ class Flux(nn.Module):
|
|
| 90 |
if img.ndim != 3 or txt.ndim != 3:
|
| 91 |
raise ValueError("Input img and txt tensors must have 3 dimensions.")
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
# --- CRITICAL DEBUG: Check the device of self.img_in's parameters ---
|
| 94 |
weight_device = self.img_in.weight.device
|
| 95 |
bias_device = self.img_in.bias.device if self.img_in.bias is not None else "N/A (None)"
|
|
@@ -114,6 +118,10 @@ class Flux(nn.Module):
|
|
| 114 |
# running on sequences img
|
| 115 |
img = self.img_in(img)
|
| 116 |
vec = self.time_in(timestep_embedding(timesteps, 256))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
if self.params.guidance_embed:
|
| 118 |
if guidance is None:
|
| 119 |
raise ValueError("Didn't get guidance strength for guidance distilled model.")
|
|
|
|
| 90 |
if img.ndim != 3 or txt.ndim != 3:
|
| 91 |
raise ValueError("Input img and txt tensors must have 3 dimensions.")
|
| 92 |
|
| 93 |
+
print(f"img_{cur_step}:{img}")
|
| 94 |
+
print(f"img_ids_{cur_step}:{img_ids}")
|
| 95 |
+
print(f"txt_{cur_step}:{txt}")
|
| 96 |
+
print(f"txt_ids_{cur_step}:{txt_ids}")
|
| 97 |
# --- CRITICAL DEBUG: Check the device of self.img_in's parameters ---
|
| 98 |
weight_device = self.img_in.weight.device
|
| 99 |
bias_device = self.img_in.bias.device if self.img_in.bias is not None else "N/A (None)"
|
|
|
|
| 118 |
# running on sequences img
|
| 119 |
img = self.img_in(img)
|
| 120 |
vec = self.time_in(timestep_embedding(timesteps, 256))
|
| 121 |
+
|
| 122 |
+
print(f"self.img_in(img)_{cur_step}:{img}")
|
| 123 |
+
print(f"self.time_in(timestep_embedding(timesteps, 256))_{cur_step}:{vec}")
|
| 124 |
+
|
| 125 |
if self.params.guidance_embed:
|
| 126 |
if guidance is None:
|
| 127 |
raise ValueError("Didn't get guidance strength for guidance distilled model.")
|
src/flux/modules/__pycache__/autoencoder.cpython-310.pyc
CHANGED
|
Binary files a/src/flux/modules/__pycache__/autoencoder.cpython-310.pyc and b/src/flux/modules/__pycache__/autoencoder.cpython-310.pyc differ
|
|
|
src/flux/modules/__pycache__/conditioner.cpython-310.pyc
CHANGED
|
Binary files a/src/flux/modules/__pycache__/conditioner.cpython-310.pyc and b/src/flux/modules/__pycache__/conditioner.cpython-310.pyc differ
|
|
|
src/flux/modules/__pycache__/layers.cpython-310.pyc
CHANGED
|
Binary files a/src/flux/modules/__pycache__/layers.cpython-310.pyc and b/src/flux/modules/__pycache__/layers.cpython-310.pyc differ
|
|
|
src/flux/modules/layers.py
CHANGED
|
@@ -158,11 +158,6 @@ class DoubleStreamBlock(nn.Module):
|
|
| 158 |
self.cur_block = cur_block
|
| 159 |
|
| 160 |
def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, cur_step: int, info) -> tuple[Tensor, Tensor]:
|
| 161 |
-
|
| 162 |
-
print(f"img_{cur_step}:{img}")
|
| 163 |
-
print(f"txt_{cur_step}:{txt}")
|
| 164 |
-
print(f"vec_{cur_step}:{vec}")
|
| 165 |
-
print(f"pe_{cur_step}:{pe}")
|
| 166 |
|
| 167 |
img_mod1, img_mod2 = self.img_mod(vec)
|
| 168 |
txt_mod1, txt_mod2 = self.txt_mod(vec)
|
|
@@ -175,18 +170,12 @@ class DoubleStreamBlock(nn.Module):
|
|
| 175 |
|
| 176 |
img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
|
| 177 |
|
| 178 |
-
print(f"img_modulated_{cur_step}:{img_modulated}")
|
| 179 |
-
print(f"img_qkv_{cur_step}:{img_qkv}")
|
| 180 |
-
print(f"img_q_{cur_step}:{img_q}")
|
| 181 |
-
print(f"img_k_{cur_step}:{img_k}")
|
| 182 |
# prepare txt for attention
|
| 183 |
txt_modulated = self.txt_norm1(txt)
|
| 184 |
txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
|
| 185 |
txt_qkv = self.txt_attn.qkv(txt_modulated)
|
| 186 |
txt_q, txt_k, txt_v = rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
|
| 187 |
txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
|
| 188 |
-
print(f"txt_q_{cur_step}:{txt_q}")
|
| 189 |
-
print(f"txt_k_{cur_step}:{txt_k}")
|
| 190 |
# run actual attention
|
| 191 |
q = torch.cat((txt_q, img_q), dim=2) #[8, 24, 512, 128] + [8, 24, 900, 128] -> [8, 24, 1412, 128]
|
| 192 |
k = torch.cat((txt_k, img_k), dim=2)
|
|
|
|
| 158 |
self.cur_block = cur_block
|
| 159 |
|
| 160 |
def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, cur_step: int, info) -> tuple[Tensor, Tensor]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
img_mod1, img_mod2 = self.img_mod(vec)
|
| 163 |
txt_mod1, txt_mod2 = self.txt_mod(vec)
|
|
|
|
| 170 |
|
| 171 |
img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
# prepare txt for attention
|
| 174 |
txt_modulated = self.txt_norm1(txt)
|
| 175 |
txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
|
| 176 |
txt_qkv = self.txt_attn.qkv(txt_modulated)
|
| 177 |
txt_q, txt_k, txt_v = rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
|
| 178 |
txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
|
|
|
|
|
|
|
| 179 |
# run actual attention
|
| 180 |
q = torch.cat((txt_q, img_q), dim=2) #[8, 24, 512, 128] + [8, 24, 900, 128] -> [8, 24, 1412, 128]
|
| 181 |
k = torch.cat((txt_k, img_k), dim=2)
|