Abner0803
/

Transformer-RPB

Model card Files Files and versions

xet

Community

Abner0803 commited on Jan 9

Commit

011fa3c

verified ·

1 Parent(s): 0c2f092

Create README.md

Browse files

Files changed (1) hide show

README.md +268 -0

README.md ADDED Viewed

	@@ -0,0 +1,268 @@

+## To use these checkpoints, you need to use the following model structure for Transformer
+### Import used packages
+```python
+import math
+import torch
+from torch import nn
+```
+### PositionalEncoding
+```python
+class PositionalEncoding(nn.Module):
+    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000) -> None:
+        super().__init__()
+        self.dropout = nn.Dropout(p=dropout)
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(
+            1
+        )  # (max_len, 1)
+        div_term = torch.exp(
+            torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)
+        )
+        pe[:, 0::2] = torch.sin(position * div_term)  # (max_len, d_model // 2)
+        truncated_div_term = div_term[: d_model // 2]
+        pe[:, 1::2] = torch.cos(position * truncated_div_term)  #
+        pe = pe.unsqueeze(0).transpose(0, 1)  # (max_len, 1, d_model)
+        self.register_buffer("pe", pe)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = x + self.pe[: x.size(0), :, :]
+        return self.dropout(x)
+```
+### RPBClass
+```python
+class RelativePositionBiasV2(nn.Module):
+    def __init__(self, n_heads, num_buckets=32, max_distance=128, bidirectional=True):
+        super().__init__()
+        assert num_buckets % 2 == 0, "num_buckets should be even for bidirectional"
+        self.n_heads = n_heads
+        self.num_buckets = num_buckets
+        self.max_distance = max_distance
+        self.bidirectional = bidirectional
+        self.emb = nn.Embedding(num_buckets, n_heads)
+    def _relative_position_bucket(self, relative_position):
+        """
+        relative_position: [Tq, Tk] = k - q
+        returns bucket ids in [0, num_buckets-1]
+        """
+        num_buckets = self.num_buckets
+        max_distance = self.max_distance
+        ret = torch.zeros_like(relative_position, dtype=torch.long)
+        n = -relative_position  # want smaller buckets for n > 0 (keys before queries)
+        if self.bidirectional:
+            half = num_buckets // 2
+            ret += (n < 0).long() * half
+            n = n.abs()
+            num_buckets = half  # remaining buckets for non-negative distances
+        else:
+            n = torch.clamp(n, min=0)
+        # Now n >= 0
+        max_exact = num_buckets // 2
+        is_small = n < max_exact
+        # Avoid log(0) and division by zero; also ensure max_distance > max_exact
+        denom = max(1.0, math.log(max(max_distance, max_exact + 1) / max(1, max_exact)))
+        val_if_large = (
+            max_exact
+            + (
+                (torch.log(n.float() / max(1, max_exact) + 1e-6) / denom)
+                * (num_buckets - max_exact)
+            ).long()
+        )
+        val_if_large = torch.clamp(val_if_large, max=num_buckets - 1)
+        ret += torch.where(is_small, n.long(), val_if_large)
+        # Final clamp for absolute safety when bidirectional half-split was applied
+        return torch.clamp(ret, min=0, max=self.num_buckets - 1)
+    def forward(self, Tq, Tk, device=None):
+        device = device or torch.device("cpu")
+        qpos = torch.arange(Tq, device=device)[:, None]
+        kpos = torch.arange(Tk, device=device)[None, :]
+        buckets = self._relative_position_bucket(kpos - qpos)  # [Tq, Tk]
+        bias = self.emb(buckets)  # [Tq, Tk, H]
+        return bias.permute(2, 0, 1)  # [H, Tq, Tk]
+```
+### Transformer Base Class
+```python
+class BaseTransformerComp(nn.Module):
+    """Base class for transformer-based intra-stock components."""
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        num_layers: int,
+        num_heads: int,
+        dropout: float = 0.1,
+        mask_type: str = "none",
+    ) -> None:
+        super().__init__()
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.num_layers = num_layers
+        self.num_heads = num_heads
+        self.dropout_rate = dropout
+        self.mask_type = mask_type
+    def _reshape_input(self, x: torch.Tensor) -> tuple[torch.Tensor, int, int]:
+        """
+        Reshape input from [batch, seq_len, n_stocks, n_feats] to [seq_len, batch*n_stocks, n_feats].
+        Returns reshaped tensor and original batch/n_stocks sizes for later reconstruction.
+        """
+        batch, seq_len, n_stocks, n_feats = x.shape
+        if batch == 0 or seq_len == 0 or n_stocks == 0:
+            raise ValueError(
+                f"Invalid input dimensions: batch={batch}, seq_len={seq_len}, "
+                f"n_stocks={n_stocks}, n_feats={n_feats}"
+            )
+        x = x.permute(0, 2, 1, 3).contiguous()
+        x = x.reshape(batch * n_stocks, seq_len, n_feats)  # [b * s, t, f]
+        x = x.permute(1, 0, 2).contiguous()  # [t, b * s, f]
+        return x, batch, n_stocks
+    def _reshape_output(
+        self, x: torch.Tensor, batch: int, n_stocks: int
+    ) -> torch.Tensor:
+        """Reshape output from [seq_len, batch*n_stocks, hidden_dim] to [batch, n_stocks, hidden_dim]."""
+        output = x[-1]  # Take last time step: [b * s, hidden_dim]
+        output = output.reshape(batch, n_stocks, -1)  # [b, s, hidden_dim]
+        return output
+    def _generate_causal_mask(self, seq_len: int, device: torch.device) -> torch.Tensor:
+        """Generate causal attention mask."""
+        mask = torch.triu(
+            torch.ones(seq_len, seq_len, device=device) * float("-inf"), diagonal=1
+        )
+        return mask
+```
+### RPB Components
+```python
+class TransformerRPBComp(BaseTransformerComp):
+    """TransformerComp with Relative Bias Pooling."""
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        num_layers: int,
+        num_heads: int,
+        dropout: float = 0.1,
+        mask_type: str = "none",
+    ) -> None:
+        super().__init__(input_dim, hidden_dim, num_layers, num_heads, dropout)
+        self.feature_layer = nn.Linear(input_dim, hidden_dim)
+        self.pe = PositionalEncoding(hidden_dim, dropout)
+        self.encoder_norm = nn.LayerNorm(hidden_dim)
+        self.mask_type = mask_type
+        self.rbp = RelativePositionBiasV2(n_heads=num_heads)
+        self.encoder_layers = nn.ModuleList(
+            [
+                TransformerEncoderLayerWithRPB(
+                    d_model=hidden_dim,
+                    nhead=num_heads,
+                    dim_feedforward=hidden_dim * 4,
+                    dropout=dropout,
+                    rbp=self.rbp,
+                )
+                for _ in range(num_layers)
+            ]
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """x.shape [batch, seq_len, n_stocks, n_feats]"""
+        x, batch, n_stocks = self._reshape_input(x)
+        seq_len = x.shape[0]
+        x = self.encoder_norm(self.pe(self.feature_layer(x)))  # [t, b * s, d_model]
+        if self.mask_type == "causal":
+            mask = self._generate_causal_mask(seq_len, x.device).permute(1, 0)
+        else:
+            mask = None
+        for layer in self.encoder_layers:
+            x = layer(x, src_mask=mask)
+        return self._reshape_output(x, batch, n_stocks)
+```
+### Transformer Module
+```python
+class Transformer(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int = 1,
+        hidden_dim: int = 256,
+        num_layers: int = 2,
+        num_heads: int = 4,
+        dropout: float = 0.1,
+        tfm_type: str = "base",
+        mask_type: str = "none",
+    ) -> None:
+        """
+        tfm_type: "base", "rope", "rpb"
+        mask_type: "none", "alibi", "causal"
+        """
+        super().__init__()
+        self.tfm_type = tfm_type
+        self.mask_type = mask_type
+        tfm_type_mapper = {
+            "base": TransformerComp,
+            "alibi": TransformerComp,
+            "rope": TransformerRoPEComp,
+            "rpb": TransformerRPBComp,
+        }
+        self.transformer_encoder = tfm_type_mapper[self.tfm_type](
+            input_dim=input_dim,
+            hidden_dim=hidden_dim,
+            num_layers=num_layers,
+            num_heads=num_heads,
+            dropout=dropout,
+            mask_type=mask_type,
+        )
+        self.fc_out = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim, bias=True),
+            nn.GELU(),
+            nn.Linear(hidden_dim, output_dim, bias=True),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        tfm_out = self.transformer_encoder(x)  # [b, s, d_model]
+        final_out = self.fc_out(tfm_out).squeeze(-1)  # [b, s]
+        return final_out
+```
+### Model Configuration
+```yaml
+input_dim: 8,
+output_dim: 1,
+hidden_dim: 64,
+num_layers: 2,
+num_heads: 4,
+dropout: 0.0,
+tfm_type: "rpb",
+mask_type: "causal",
+```