BiliSakura
/

BitDance-14B-64x-diffusers

@@ -111,7 +111,7 @@ def euler_maruyama(
     dt = t_all[1:] - t_all[:-1]
     t = torch.tensor(0.0, device=c.device, dtype=torch.float32)
-    t_batch = torch.zeros(c.shape[0], device=c.device)
     for i in range(num_sampling_steps):
         t_batch[:] = t
         combined = torch.cat([x] * cfg_mult, dim=0)
@@ -152,6 +152,7 @@ class TimestepEmbedder(nn.Module):
     def forward(self, t: torch.Tensor) -> torch.Tensor:
         t_freq = timestep_embedding(t, self.frequency_embedding_size)
         return self.mlp(t_freq)
@@ -301,6 +302,10 @@ class TransEncoder(nn.Module):
         nn.init.constant_(self.final_layer.linear.bias, 0)
     def forward(self, x: torch.Tensor, t: torch.Tensor, c: torch.Tensor) -> torch.Tensor:
         x = self.input_proj(x)
         t = self.time_embed(t).unsqueeze(1)
         c = self.cond_embed(c)

     dt = t_all[1:] - t_all[:-1]
     t = torch.tensor(0.0, device=c.device, dtype=torch.float32)
+    t_batch = torch.zeros(c.shape[0], device=c.device, dtype=c.dtype)
     for i in range(num_sampling_steps):
         t_batch[:] = t
         combined = torch.cat([x] * cfg_mult, dim=0)
     def forward(self, t: torch.Tensor) -> torch.Tensor:
         t_freq = timestep_embedding(t, self.frequency_embedding_size)
+        t_freq = t_freq.to(self.mlp[0].weight.dtype)
         return self.mlp(t_freq)
         nn.init.constant_(self.final_layer.linear.bias, 0)
     def forward(self, x: torch.Tensor, t: torch.Tensor, c: torch.Tensor) -> torch.Tensor:
+        dtype = next(self.parameters()).dtype
+        x = x.to(dtype)
+        t = t.to(dtype)
+        c = c.to(dtype)
         x = self.input_proj(x)
         t = self.time_embed(t).unsqueeze(1)
         c = self.cond_embed(c)