Update model.py
Browse files
model.py
CHANGED
|
@@ -30,6 +30,9 @@ class minGRU(nn.Module):
|
|
| 30 |
def __init__(self, d_model, d_inner):
|
| 31 |
super().__init__()
|
| 32 |
|
|
|
|
|
|
|
|
|
|
| 33 |
self.hidden_proj = nn.Linear(d_model, d_inner, bias=False)
|
| 34 |
self.gate_proj = nn.Linear(d_model, d_inner, bias=False)
|
| 35 |
self.out_proj = nn.Linear(d_inner, d_model, bias=False)
|
|
@@ -100,6 +103,12 @@ class RMSNorm(nn.Module):
|
|
| 100 |
class minGRULM(nn.Module):
|
| 101 |
def __init__(self, vocab_size, d_model, d_inner, n_layers):
|
| 102 |
super().__init__()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
self.embed = nn.Embedding(vocab_size, d_model)
|
| 104 |
|
| 105 |
self.layers = nn.ModuleList([])
|
|
|
|
| 30 |
def __init__(self, d_model, d_inner):
|
| 31 |
super().__init__()
|
| 32 |
|
| 33 |
+
self.d_model = d_model
|
| 34 |
+
self.d_inner = d_inner
|
| 35 |
+
|
| 36 |
self.hidden_proj = nn.Linear(d_model, d_inner, bias=False)
|
| 37 |
self.gate_proj = nn.Linear(d_model, d_inner, bias=False)
|
| 38 |
self.out_proj = nn.Linear(d_inner, d_model, bias=False)
|
|
|
|
| 103 |
class minGRULM(nn.Module):
|
| 104 |
def __init__(self, vocab_size, d_model, d_inner, n_layers):
|
| 105 |
super().__init__()
|
| 106 |
+
|
| 107 |
+
self.vocab_size = vocab_size
|
| 108 |
+
self.d_model = d_model
|
| 109 |
+
self.d_inner = d_inner
|
| 110 |
+
self.n_layers = n_layers
|
| 111 |
+
|
| 112 |
self.embed = nn.Embedding(vocab_size, d_model)
|
| 113 |
|
| 114 |
self.layers = nn.ModuleList([])
|