TuKoResearch
/

AuriStream100M_librilight

AuriStream.AuriStream

Model card Files Files and versions

klemenk commited on May 10, 2025

Commit

b56f0a4

·

verified ·

1 Parent(s): 0afa744

Update modeling_auristream.py

Files changed (1) hide show

modeling_auristream.py +21 -1

modeling_auristream.py CHANGED Viewed

@@ -587,4 +587,24 @@ class RMSNorm(nn.Module):
         output = self._norm(x.float()).type_as(x)
         if self.weight is not None:
             return output * self.weight
-        return output

         output = self._norm(x.float()).type_as(x)
         if self.weight is not None:
             return output * self.weight
+        return output
+class DWA(nn.Module):
+    """ Depth Weighted Average layer that averages representations across the layers of a transformer """
+    """ From: https://arxiv.org/pdf/2402.02622"""
+    def __init__(self, n_layers: int):
+        super().__init__()
+        self.alphas = nn.Parameter(torch.zeros(n_layers, n_layers))
+        self.alphas.data = torch.eye(n_layers)
+        self.accumulators = []
+    def init_accumulators(self, x):
+        self.accumulators = [x]
+        return x * self.alphas[0, 0]
+    def forward(self, x):
+        self.accumulators.append(x)
+        for i in range(len(self.accumulators)):
+            x = x + self.alphas[i, len(self.accumulators)-1] * self.accumulators[i]
+        return x