| from dataclasses import dataclass |
| from typing import Tuple |
|
|
| from transformers import PretrainedConfig |
|
|
|
|
| @dataclass |
| class sCTConfig(PretrainedConfig): |
| model_type = "sCT" |
|
|
| def __init__(self, **kwargs): |
| self.alphabet_size = kwargs.get("alphabet_size", 7) |
| self.pad_token_id = kwargs.get("pad_token_id", 5) |
| self.mask_token_id = kwargs.get("mask_token_id", 6) |
| self.cell_len = kwargs.get("cell_len", 19968) |
|
|
| self.num_downsamples = kwargs.get("num_downsamples", 8) |
| self.attention_heads = kwargs.get("attention_heads", 16) |
| self.key_size = kwargs.get("key_size", None) |
| self.token_embed_dim = kwargs.get("token_embed_dim", 16) |
|
|
| self.embed_dim = kwargs.get("embed_dim", 1024) |
| self.ffn_embed_dim = kwargs.get("ffn_embed_dim", 2048) |
| self.num_layers = kwargs.get("num_layers", 4) |
| self.layer_norm_eps = kwargs.get("layer_norm_eps", 1e-5) |
| self.interpolation_method = kwargs.get("interpolation_method", "nearest") |
|
|
| |
| self.max_positions: int = kwargs.get("max_positions", 20480) |
| self.num_cells: int = kwargs.get("num_cells", 50) |
| self.num_hidden_layers_head: int = kwargs.get("num_hidden_layers_head", 1) |
|
|
| self.use_skip_connection: bool = kwargs.get("use_skip_connection", True) |
|
|
| |
| self.use_gradient_checkpointing: bool = False |
|
|
| |
| self.embeddings_layers_to_save: Tuple[int, ...] = kwargs.get( |
| "embeddings_layers_to_save", () |
| ) |
| self.attention_maps_to_save: list[tuple[int, int]] = kwargs.get( |
| "attention_maps_to_save", [] |
| ) |
|
|
| |
| self.use_spatial_information: bool = kwargs.get( |
| "use_spatial_information", False |
| ) |
| self.num_scales: int = kwargs.get("num_scales", 10) |
| self.sigma_min: float = kwargs.get("sigma_min", 1.0) |
| self.sigma_max: float = kwargs.get("sigma_max", 10.0) |
|
|
| super().__init__(**kwargs) |
|
|
| def __post_init__(self) -> None: |
| """ |
| Checks that the given values are compatible. |
| """ |
| if self.key_size is None: |
| if not self.embed_dim % self.attention_heads == 0: |
| raise ValueError( |
| f"When no key size is provided, the embedding dimension" |
| f"should be divisible by the number of heads, however " |
| f"provided embedding dimension is {self.embed_dim} and " |
| f"the number of heads is {self.attention_heads}." |
| ) |
| self.key_size = self.embed_dim // self.attention_heads |
|
|