| |
| |
| """ |
| ================================================ |
| @author: Jaron |
| @time: 2024/07/10 19:43:31 |
| @email: fjjth98@163.com |
| @description: Causal Cross-Attention Mask (CCAM) |
| ================================================ |
| """ |
|
|
| from transformers import PretrainedConfig |
|
|
|
|
| class CCAMConfig(PretrainedConfig): |
| model_type = 'ccam' |
| _auto_class = 'AutoConfig' |
|
|
| def __init__( |
| self, |
| num_query: int = 1024, |
| num_heads: int = 16, |
| hidden_size: int = 1024, |
| intermediate_size: int = 4096, |
| num_key_value_heads: int = 16, |
| dropout: float = 0.1, |
| mlp_bias: bool = True, |
| hidden_act: str = 'swiglu', |
| output_size: int = None, |
| attention_bias: bool = True, |
| layer_norm_eps: float = 1e-5, |
| cross_hidden_size: int = None, |
| attention_dropout: float = 0.1, |
| _attn_implementation: str = 'sdpa', |
| **kwargs |
| ): |
| super().__init__(**kwargs) |
| self.dropout = dropout |
| self.mlp_bias = mlp_bias |
| self.num_query = num_query |
| self.num_heads = num_heads |
| self.hidden_act = hidden_act |
| self.hidden_size = hidden_size |
| self.output_size = output_size |
| self.layer_norm_eps = layer_norm_eps |
| self.attention_bias = attention_bias |
| self.intermediate_size = intermediate_size |
| self.cross_hidden_size = cross_hidden_size |
| self.attention_dropout = attention_dropout |
| self.num_key_value_heads = num_key_value_heads |
| self._attn_implementation = _attn_implementation |
|
|