TuKoResearch
/

AuriStream7BWide_librispeech

AuriStream.AuriStream

Model card Files Files and versions

klemenk commited on May 7, 2025

Commit

d533175

·

verified ·

1 Parent(s): 2141c91

Update modeling_auristream.py

Files changed (1) hide show

modeling_auristream.py +22 -0

modeling_auristream.py CHANGED Viewed

@@ -577,3 +577,25 @@ class RMSNorm(nn.Module):
         if self.weight is not None:
             return output * self.weight
         return output

         if self.weight is not None:
             return output * self.weight
         return output
+class DWA(nn.Module):
+    """ Depth Weighted Average layer that averages representations across the layers of a transformer """
+    """ From: https://arxiv.org/pdf/2402.02622"""
+    def __init__(self, n_layers: int):
+        super().__init__()
+        self.alphas = nn.Parameter(torch.zeros(n_layers, n_layers))
+        self.alphas.data = torch.eye(n_layers)
+        self.accumulators = []
+    def init_accumulators(self, x):
+        self.accumulators = [x]
+        return x * self.alphas[0, 0]
+    def forward(self, x):
+        self.accumulators.append(x)
+        x = 0.0
+        for i in range(len(self.accumulators)):
+            x = x + self.alphas[i, len(self.accumulators)-1] * self.accumulators[i]
+        return x