File size: 4,349 Bytes
e5b68ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
| Transformer_ALiBi shares most of the modules with [Transformer-RPB](https://huggingface.co/Abner0803/Transformer-RPB) except of the below modules
## TransformerComp
Add `TransformerComp` into your current script
```python
class TransformerComp(BaseTransformerComp):
def __init__(
self,
input_dim: int,
hidden_dim: int,
num_layers: int,
num_heads: int,
dropout: float = 0.1,
mask_type: str = "none",
) -> None:
"""
mask_type: "none", "alibi", "calibi", "causal"
"""
super().__init__(input_dim, hidden_dim, num_layers, num_heads, dropout)
self.feature_layer = nn.Linear(input_dim, hidden_dim)
self.pe = PositionalEncoding(hidden_dim, dropout)
self.mask_type = mask_type
if self.mask_type in ["alibi", "calibi"]:
closest_power_of_2 = 2 ** int(math.log2(num_heads))
base_slopes = torch.pow(
2,
-torch.arange(1, closest_power_of_2 + 1, dtype=torch.float32)
* 8
/ closest_power_of_2,
)
if closest_power_of_2 != num_heads:
extra_slopes = torch.pow(
2,
-torch.arange(
1,
2 * (num_heads - closest_power_of_2) + 1,
2,
dtype=torch.float32,
)
* 8
/ closest_power_of_2,
)
base_slopes = torch.cat([base_slopes, extra_slopes])
self.register_buffer(
"slopes", base_slopes.view(-1, 1, 1)
) # [n_heads, 1, 1]
encoder_layer = nn.TransformerEncoderLayer(
d_model=hidden_dim,
nhead=num_heads,
dim_feedforward=hidden_dim * 4,
dropout=dropout,
activation="relu",
batch_first=False,
)
self.encoder_norm = nn.LayerNorm(hidden_dim)
self.transformer_encoder = nn.TransformerEncoder(
encoder_layer, num_layers=num_layers
)
def _generate_alibi_mask(self, seq_len: int, device: torch.device) -> torch.Tensor:
"""
Creates a mask that is Relative (ALiBi).
Returns: [Num_Heads, Seq_Len, Seq_Len]
"""
context_pos = torch.arange(seq_len, device=device).unsqueeze(1)
memory_pos = torch.arange(seq_len, device=device).unsqueeze(0)
distance = torch.abs(context_pos - memory_pos)
alibi_bias = distance * -1.0 * self.slopes
return alibi_bias
def _generate_causal_alibi_mask(
self, seq_len: int, device: torch.device
) -> torch.Tensor:
"""
Creates a mask that is Relative (ALiBi) and Causal (Mask Wall)
"""
context_pos = torch.arange(seq_len, device=device).unsqueeze(1)
memory_pos = torch.arange(seq_len, device=device).unsqueeze(0)
distance = torch.abs(context_pos - memory_pos)
alibi_bias = distance * -1.0 * self.slopes
causal_mask = torch.triu(
torch.ones(seq_len, seq_len, device=device, dtype=torch.bool), diagonal=1
)
alibi_bias.masked_fill_(causal_mask, float("-inf"))
return alibi_bias
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""x.shape [batch, seq_len, n_stocks, n_feats]"""
x, batch, n_stocks = self._reshape_input(x)
seq_len = x.shape[0]
x = self.encoder_norm(self.pe(self.feature_layer(x))) # [t, b * s, d_model]
if self.mask_type == "causal":
mask = self._generate_causal_mask(seq_len, x.device).permute(1, 0)
elif self.mask_type == "alibi":
mask = self._generate_alibi_mask(seq_len, x.device).repeat(
x.shape[1], 1, 1
) # [b * s, t, t]
elif self.mask_type == "calibi":
mask = self._generate_causal_alibi_mask(seq_len, x.device).repeat(
x.shape[1], 1, 1
)
else:
mask = None
x = self.transformer_encoder(x, mask=mask)
return self._reshape_output(x, batch, n_stocks)
```
## Model Config
```yaml
input_dim: 8
output_dim: 1
hidden_dim: 64
num_layers: 2
num_heads: 4
dropout: 0.0
tfm_type: "base"
mask_type: "alibi"
``` |