| Transformer_ALiBi shares most of the modules with [Transformer-RPB](https://huggingface.co/Abner0803/Transformer-RPB) except of the below modules ## TransformerComp Add `TransformerComp` into your current script ```python class TransformerComp(BaseTransformerComp): def __init__( self, input_dim: int, hidden_dim: int, num_layers: int, num_heads: int, dropout: float = 0.1, mask_type: str = "none", ) -> None: """ mask_type: "none", "alibi", "calibi", "causal" """ super().__init__(input_dim, hidden_dim, num_layers, num_heads, dropout) self.feature_layer = nn.Linear(input_dim, hidden_dim) self.pe = PositionalEncoding(hidden_dim, dropout) self.mask_type = mask_type if self.mask_type in ["alibi", "calibi"]: closest_power_of_2 = 2 ** int(math.log2(num_heads)) base_slopes = torch.pow( 2, -torch.arange(1, closest_power_of_2 + 1, dtype=torch.float32) * 8 / closest_power_of_2, ) if closest_power_of_2 != num_heads: extra_slopes = torch.pow( 2, -torch.arange( 1, 2 * (num_heads - closest_power_of_2) + 1, 2, dtype=torch.float32, ) * 8 / closest_power_of_2, ) base_slopes = torch.cat([base_slopes, extra_slopes]) self.register_buffer( "slopes", base_slopes.view(-1, 1, 1) ) # [n_heads, 1, 1] encoder_layer = nn.TransformerEncoderLayer( d_model=hidden_dim, nhead=num_heads, dim_feedforward=hidden_dim * 4, dropout=dropout, activation="relu", batch_first=False, ) self.encoder_norm = nn.LayerNorm(hidden_dim) self.transformer_encoder = nn.TransformerEncoder( encoder_layer, num_layers=num_layers ) def _generate_alibi_mask(self, seq_len: int, device: torch.device) -> torch.Tensor: """ Creates a mask that is Relative (ALiBi). Returns: [Num_Heads, Seq_Len, Seq_Len] """ context_pos = torch.arange(seq_len, device=device).unsqueeze(1) memory_pos = torch.arange(seq_len, device=device).unsqueeze(0) distance = torch.abs(context_pos - memory_pos) alibi_bias = distance * -1.0 * self.slopes return alibi_bias def _generate_causal_alibi_mask( self, seq_len: int, device: torch.device ) -> torch.Tensor: """ Creates a mask that is Relative (ALiBi) and Causal (Mask Wall) """ context_pos = torch.arange(seq_len, device=device).unsqueeze(1) memory_pos = torch.arange(seq_len, device=device).unsqueeze(0) distance = torch.abs(context_pos - memory_pos) alibi_bias = distance * -1.0 * self.slopes causal_mask = torch.triu( torch.ones(seq_len, seq_len, device=device, dtype=torch.bool), diagonal=1 ) alibi_bias.masked_fill_(causal_mask, float("-inf")) return alibi_bias def forward(self, x: torch.Tensor) -> torch.Tensor: """x.shape [batch, seq_len, n_stocks, n_feats]""" x, batch, n_stocks = self._reshape_input(x) seq_len = x.shape[0] x = self.encoder_norm(self.pe(self.feature_layer(x))) # [t, b * s, d_model] if self.mask_type == "causal": mask = self._generate_causal_mask(seq_len, x.device).permute(1, 0) elif self.mask_type == "alibi": mask = self._generate_alibi_mask(seq_len, x.device).repeat( x.shape[1], 1, 1 ) # [b * s, t, t] elif self.mask_type == "calibi": mask = self._generate_causal_alibi_mask(seq_len, x.device).repeat( x.shape[1], 1, 1 ) else: mask = None x = self.transformer_encoder(x, mask=mask) return self._reshape_output(x, batch, n_stocks) ``` ## Model Config ```yaml input_dim: 8 output_dim: 1 hidden_dim: 64 num_layers: 2 num_heads: 4 dropout: 0.0 tfm_type: "base" mask_type: "alibi" ```