Abner0803
/

Mamba_Transformer_simple

Model card Files Files and versions

Mamba_Transformer_simple / README.md

Abner0803's picture

Update README.md

3f5319b verified about 1 month ago

|

history blame contribute delete

2.34 kB

	## Model Structure

	```python
	class MambaTransformerSimple(nn.Module):
	def __init__(
	self,
	d_feat: int = 8,
	hidden_size: int = 64,
	num_layers: int = 1,
	dropout: float = 0.0,
	noise_level: float = 0.0,
	d_state: int = 16,
	d_conv: int = 4,
	expand: int = 2,
	mask_type: str = "none",
	) -> None:
	super().__init__()
	self.mask_type = mask_type
	self.transformer_encoder_layer = nn.TransformerEncoderLayer(
	d_model=hidden_size,
	nhead=4,
	dim_feedforward=hidden_size * 4,
	dropout=dropout,
	activation="relu",
	batch_first=False,
	)
	self.transformer_encoder = nn.TransformerEncoder(
	self.transformer_encoder_layer, num_layers=num_layers
	)
	self.input_proj = nn.Linear(d_feat, hidden_size)
	self.mamba = Mamba(
	d_model=hidden_size, d_state=d_state, d_conv=d_conv, expand=expand
	)
	self.mid_norm = nn.LayerNorm(hidden_size)
	self.out = nn.Sequential(
	nn.Linear(hidden_size, hidden_size), nn.GELU(), nn.Linear(hidden_size, 1)
	)

	def _generate_causal_mask(self, seq_len: int, device: torch.device) -> torch.Tensor:
	"""Generate causal attention mask."""
	mask = torch.triu(
	torch.ones(seq_len, seq_len, device=device) * float("-inf"), diagonal=1
	)
	return mask

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	b, t, s, f = x.shape
	x = x.permute(0, 2, 1, 3).reshape(b * s, t, f)
	x = self.input_proj(x) # [b * s, t, h]
	mamba_out = self.mamba(x) # [b * s, t, h]
	mamba_out = mamba_out.permute(1, 0, 2).contiguous() # [t, b * s, h]
	mamba_out = self.mid_norm(mamba_out)

	if self.mask_type == "causal":
	mask = self._generate_causal_mask(t, x.device)
	else:
	mask = None

	tfm_out = self.transformer_encoder(mamba_out, mask=mask) # [t, b * s, h]
	tfm_out = tfm_out[-1].reshape(b, s, -1)
	final_out = self.out(tfm_out).squeeze(-1) # [b, s]

	return final_out
	```

	## Model Config

	```yaml
	num_layers: 1
	d_feat: 8
	hidden_size: 64
	d_state: 16
	d_conv: 4
	expand: 2
	dropout: 0.1
	noise_level: 0.0
	mask_type: "none"
	```