Spaces:

PlayfulTechnology
/

QARAC

Build error

PeteBleackley commited on Mar 11, 2024

Commit

a1e9f64

1 Parent(s): 776f717

Factorized the weight matrix in the GlobalAttentionPoolingHead, thus reducing the number of parameters in this layer by a factor of 48

Files changed (2) hide show

qarac/models/layers/FactorizedMatrixMultiplication.py ADDED Viewed

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Mar  8 08:08:03 2024
+@author: peter
+"""
+import torch
+class FactorizedMatrixMultiplication(torch.nn.Module):
+    def __init__(self,size):
+        super(FactorizedMatrixMultiplication,self).__init__()
+        self.left = torch.nn.parameter.Parameter(torch.empty((size,8)))
+        self.right = torch.nn.parameter.Parameter(torch.empty((8,size)))
+        sigma = (3.0/(4.0*size))**0.25
+        torch.nn.init.normal_(self.left,0.0,sigma)
+        torch.nn.init.normal_(self.right,0.0,sigma)
+        self.matrix = torch.tensordot(self.left,self.right,1)
+    def forward(self,X):
+        return torch.einsum('ij,klj->kli',self.matrix,X)

qarac/models/layers/GlobalAttentionPoolingHead.py CHANGED Viewed

@@ -7,6 +7,7 @@ Created on Tue Sep  5 07:32:55 2023
 """
 import torch
 class GlobalAttentionPoolingHead(torch.nn.Module):
@@ -26,8 +27,8 @@ class GlobalAttentionPoolingHead(torch.nn.Module):
         """
         size = config.hidden_size
         super(GlobalAttentionPoolingHead,self).__init__()
-        self.global_projection = torch.nn.Linear(size,size,bias=False)
-        self.local_projection = torch.nn.Linear(size,size,bias=False)
         self.cosine = torch.nn.CosineSimilarity(dim=2,eps=1.0e-12)

 """
 import torch
+import FactorizedMatrixMultiplication
 class GlobalAttentionPoolingHead(torch.nn.Module):
         """
         size = config.hidden_size
         super(GlobalAttentionPoolingHead,self).__init__()
+        self.global_projection = FactorizedMatrixMultiplication.FactorizedMatrixMultiplication(size)
+        self.local_projection = FactorizedMatrixMultiplication.FactorizedMatrixMultiplication(size)
         self.cosine = torch.nn.CosineSimilarity(dim=2,eps=1.0e-12)