Spaces:

TSXu
/

UniCalli_Dev

Running on Zero

TSXu commited on Jan 28

Commit

aecc9f1

1 Parent(s): 8af673c

Disable autocast for MLPEmbedder and Modulation to fix CUBLAS errors

Files changed (1) hide show

src/flux/modules/layers.py CHANGED Viewed

@@ -59,13 +59,14 @@ class MLPEmbedder(nn.Module):
         self.out_layer = nn.Linear(hidden_dim, hidden_dim, bias=True)
     def forward(self, x: Tensor) -> Tensor:
-        # Use fp32 for computation to avoid CUBLAS errors, then convert back
         orig_dtype = x.dtype
-        x = x.float()
-        # Compute with fp32 weights
-        x = F.linear(x, self.in_layer.weight.float(), self.in_layer.bias.float() if self.in_layer.bias is not None else None)
-        x = self.silu(x)
-        x = F.linear(x, self.out_layer.weight.float(), self.out_layer.bias.float() if self.out_layer.bias is not None else None)
         return x.to(orig_dtype)
@@ -176,7 +177,14 @@ class Modulation(nn.Module):
         self.lin = nn.Linear(dim, self.multiplier * dim, bias=True)
     def forward(self, vec: Tensor) -> tuple[ModulationOut, ModulationOut | None]:
-        out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(self.multiplier, dim=-1)
         return (
             ModulationOut(*out[:3]),

         self.out_layer = nn.Linear(hidden_dim, hidden_dim, bias=True)
     def forward(self, x: Tensor) -> Tensor:
+        # Disable autocast and use fp32 for computation to avoid CUBLAS errors
         orig_dtype = x.dtype
+        with torch.autocast(device_type='cuda', enabled=False):
+            x = x.float()
+            # Compute with fp32 weights
+            x = F.linear(x, self.in_layer.weight.float(), self.in_layer.bias.float() if self.in_layer.bias is not None else None)
+            x = self.silu(x)
+            x = F.linear(x, self.out_layer.weight.float(), self.out_layer.bias.float() if self.out_layer.bias is not None else None)
         return x.to(orig_dtype)
         self.lin = nn.Linear(dim, self.multiplier * dim, bias=True)
     def forward(self, vec: Tensor) -> tuple[ModulationOut, ModulationOut | None]:
+        # Disable autocast and use fp32 for computation to avoid CUBLAS errors
+        orig_dtype = vec.dtype
+        with torch.autocast(device_type='cuda', enabled=False):
+            vec = vec.float()
+            out = F.linear(F.silu(vec), self.lin.weight.float(), self.lin.bias.float() if self.lin.bias is not None else None)
+            out = out[:, None, :].chunk(self.multiplier, dim=-1)
+            # Convert back to original dtype
+            out = tuple(o.to(orig_dtype) for o in out)
         return (
             ModulationOut(*out[:3]),