UniRNA-L16 / config.py
EscheWang's picture
Upload folder using huggingface_hub
0867146 verified
import os
from transformers import PretrainedConfig
class UniRNAConfig(PretrainedConfig):
"""Configuration for UniRNA models."""
model_type: str = "unirna"
def __init__(
self,
vocab_size: int = 10,
hidden_size: int = 768,
num_hidden_layers: int = 12,
num_attention_heads: int = 12,
intermediate_size: int = 3072,
hidden_dropout_prob: float = 0.0,
attention_probs_dropout_prob: float = 0.0,
max_position_embeddings: int = 1026,
layer_norm_eps: float = 1e-5,
pad_token_id: int = 0,
sep_token_id: int = 1,
cls_token_id: int = 3,
mask_token_id: int = 4,
emb_layer_norm_before: bool = True,
token_dropout: bool = True,
position_embedding_type: str = "rotary",
use_flash_attention: bool = False,
tie_word_embeddings: bool = False,
is_decoder: bool = False,
**kwargs,
):
# Ensure attribute exists before any access.
self.architectures = kwargs.get("architectures", None)
super().__init__(
pad_token_id=pad_token_id,
sep_token_id=sep_token_id,
cls_token_id=cls_token_id,
mask_token_id=mask_token_id,
tie_word_embeddings=tie_word_embeddings,
is_decoder=is_decoder,
**kwargs,
)
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.layer_norm_eps = layer_norm_eps
self.emb_layer_norm_before = emb_layer_norm_before
self.token_dropout = token_dropout
self.position_embedding_type = position_embedding_type
self.use_flash_attention = use_flash_attention
if self.architectures is None:
self.architectures = ["UniRNAForMaskedLM"]
def build_config(path):
path = os.path.splitext(path)[0]
name = os.path.basename(path)
model_type, num_hidden_layers, hidden_size, _ = name.split("_")[:4]
num_hidden_layers = int(num_hidden_layers[1:])
hidden_size = int(hidden_size[1:])
num_attention_heads = hidden_size // 64
intermediate_size = hidden_size * 3
config = UniRNAConfig(
model_type=model_type,
num_hidden_layers=num_hidden_layers,
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
intermediate_size=intermediate_size,
pad_token_id=0,
sep_token_id=1,
mask_token_id=4,
cls_token_id=3,
vocab_size=10,
emb_layer_norm_before=True,
layer_norm_eps=1e-5,
hidden_dropout_prob=0.0,
attention_probs_dropout_prob=0.0,
token_dropout=True,
initializer_range=0.02,
use_flash_attention=True,
max_position_embeddings=1026,
position_embedding_type="rotary",
tie_word_embeddings=False,
)
config._name_or_path = name
return config
def build_config_GENE(path, num_hidden_layers: int, hidden_size: int, vocab_size: int, model_type="GENE"):
path = os.path.splitext(path)[0]
name = os.path.basename(path)
# model_type, num_hidden_layers, hidden_size, _ = name.split("_")[:4]
num_hidden_layers = int(num_hidden_layers)
hidden_size = int(hidden_size)
num_attention_heads = hidden_size // 64
intermediate_size = hidden_size * 4
config = UniRNAConfig(
model_type=model_type,
num_hidden_layers=num_hidden_layers,
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
intermediate_size=intermediate_size,
pad_token_id=0,
sep_token_id=1,
mask_token_id=4,
cls_token_id=3,
vocab_size=vocab_size,
emb_layer_norm_before=True,
layer_norm_eps=1e-5,
hidden_dropout_prob=0.0,
attention_probs_dropout_prob=0.0,
token_dropout=True,
initializer_range=0.02,
use_flash_attention=True,
max_position_embeddings=1026,
position_embedding_type="rotary",
tie_word_embeddings=False,
)
config._name_or_path = name
return config