TuKoResearch
/

AuriStreamDistillLarge_100M40PredTeacher_bad

@@ -80,7 +80,9 @@ class ConvLayer(nn.Module):
         )
         if norm == "group":
-            self.norm = GroupNorm1D(num_groups=out_channels, num_channels=out_channels)
         elif norm == "layer":
             self.norm = nn.LayerNorm(out_channels)
         else:

         )
         if norm == "group":
+            # Use num_groups=1 (like LayerNorm over channels) for stability with short sequences
+            # This is what wav2vec2/HuBERT use
+            self.norm = GroupNorm1D(num_groups=1, num_channels=out_channels)
         elif norm == "layer":
             self.norm = nn.LayerNorm(out_channels)
         else: