Taykhoom
/

UTR-LM-MLM

Model card Files Files and versions

UTR-LM-MLM / configuration_utrlm.py

Taykhoom's picture

Upload folder using huggingface_hub

0a535de verified about 18 hours ago

history blame contribute delete

1.72 kB

	from transformers import PretrainedConfig


	class UtrLmConfig(PretrainedConfig):
	"""
	Configuration for UTR-LM (ESM2-based RNA language model).

	Vocab (10 tokens):
	<pad>:0 <eos>:1 <unk>:2 A:3 G:4 C:5 T:6 <cls>:7 <mask>:8 <sep>:9
	"""

	model_type = "utrlm"

	def __init__(
	self,
	num_layers: int = 6,
	embed_dim: int = 128,
	attention_heads: int = 16,
	alphabet_size: int = 10,
	padding_idx: int = 0,
	mask_idx: int = 8,
	cls_idx: int = 7,
	eos_idx: int = 1,
	prepend_bos: bool = True,
	append_eos: bool = True,
	token_dropout: bool = True,
	**kwargs,
	):
	kwargs.setdefault("pad_token_id", padding_idx)
	super().__init__(**kwargs)
	# Written into config.json so AutoModel / AutoModelForMaskedLM resolve
	# the correct classes when loading from the Hub with trust_remote_code=True.
	self.auto_map = {
	"AutoConfig": "configuration_utrlm.UtrLmConfig",
	"AutoTokenizer": "tokenization_utrlm.UtrLmTokenizer",
	"AutoModel": "modeling_utrlm.UtrLmModel",
	"AutoModelForMaskedLM": "modeling_utrlm.UtrLmForMaskedLM",
	}
	self.num_layers = num_layers
	self.embed_dim = embed_dim
	self.attention_heads = attention_heads
	self.alphabet_size = alphabet_size
	self.padding_idx = padding_idx
	self.mask_idx = mask_idx
	self.cls_idx = cls_idx
	self.eos_idx = eos_idx
	self.prepend_bos = prepend_bos
	self.append_eos = append_eos
	self.token_dropout = token_dropout

	@property
	def hidden_size(self) -> int:
	return self.embed_dim