File size: 993 Bytes
17c0b30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import torch
import torch.nn as nn

from .usta_multi_head_attention import UstaMultiHeadAttention
from .usta_layer_norm import UstaLayerNorm
from .usta_mlp import UstaMLP


class UstaDecoderBlock(nn.Module):
    def __init__(self,embedding_dim,num_heads,context_length,device):
        super().__init__()
        self.self_attention = UstaMultiHeadAttention(
            embedding_dim, 
            embedding_dim, 
            context_length, 
            num_heads, 
            dropout_rate=0.5,
            device=device
            )

        self.norm1 = UstaLayerNorm(embedding_dim,device=device)
        self.mlp = UstaMLP(embedding_dim,embedding_dim,device=device)
        self.norm2 = UstaLayerNorm(embedding_dim,device=device)
    

    def forward(self,x):
        res = self.norm1(x)
        x = self.self_attention(x)
        x = self.norm1(x)
        x = x + res
        res = self.norm2(x)
        x = self.mlp(x)
        x = self.norm2(x)
        x = x + res
        return x