CloverLM / configuration_cloverlm.py
mansaripo's picture
Upload folder using huggingface_hub
05b36fb verified
from transformers import PretrainedConfig
class CloverLMConfig(PretrainedConfig):
model_type = "cloverlm"
def __init__(
self,
vocab_size=32000,
num_blocks=4,
heads=6,
d_head=128,
ratio=3,
scale_type="1/sqrt(d)",
max_context=1024,
quartet_2_impl="pseudoquant",
weight_tying=True,
attn_backend="pytorch",
# Optional: HuggingFace / vLLM tooling (defaults derived from shape)
hidden_size=None,
intermediate_size=None,
max_position_embeddings=None,
num_attention_heads=None,
num_key_value_heads=None,
head_dim=None,
**kwargs,
):
self.num_blocks = num_blocks
self.num_hidden_layers = num_blocks
self.heads = heads
self.d_head = d_head
self.ratio = ratio
self.scale_type = scale_type
self.max_context = max_context
self.quartet_2_impl = quartet_2_impl
self.weight_tying = weight_tying
self.attn_backend = attn_backend
d_model = heads * d_head
self.hidden_size = hidden_size if hidden_size is not None else d_model
self.intermediate_size = (
intermediate_size if intermediate_size is not None else 4 * d_model
)
self.max_position_embeddings = (
max_position_embeddings
if max_position_embeddings is not None
else max_context
)
self.num_attention_heads = (
num_attention_heads if num_attention_heads is not None else heads
)
self.num_key_value_heads = (
num_key_value_heads
if num_key_value_heads is not None
else heads // ratio
)
self.head_dim = head_dim if head_dim is not None else d_head
kwargs.pop("tie_word_embeddings", None)
super().__init__(
vocab_size=vocab_size,
tie_word_embeddings=weight_tying,
**kwargs,
)