C2LLM-7B / configuration_c2llm.py
Geralt-Targaryen's picture
Upload folder using huggingface_hub
e8e1a4c verified
from transformers import PretrainedConfig
class C2LLMConfig(PretrainedConfig):
model_type = "c2llm"
keys_to_ignore_at_inference = ["past_key_values"]
base_model_tp_plan = {
"layers.*.self_attn.q_proj": "colwise",
"layers.*.self_attn.k_proj": "colwise",
"layers.*.self_attn.v_proj": "colwise",
"layers.*.self_attn.o_proj": "rowwise",
"layers.*.mlp.gate_proj": "colwise",
"layers.*.mlp.up_proj": "colwise",
"layers.*.mlp.down_proj": "rowwise",
}
base_model_pp_plan = {
"embed_tokens": (["input_ids"], ["inputs_embeds"]),
"layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
"norm": (["hidden_states"], ["hidden_states"]),
}
def __init__(
self,
attention_dropout=0.0,
bos_token_id=151643,
eos_token_id=151645,
hidden_act="silu",
hidden_size=3584,
initializer_range=0.02,
intermediate_size=18944,
max_position_embeddings=32768,
max_window_layers=28,
model_type="c2llm",
num_attention_heads=28,
num_hidden_layers=28,
num_key_value_heads=4,
rms_norm_eps=1e-6,
rope_theta=1000000.0,
sliding_window=131072,
tie_word_embeddings=False,
torch_dtype="bfloat16",
transformers_version="4.43.1",
use_cache=True,
use_sliding_window=False,
vocab_size=152064,
**kwargs,
):
self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size
self.intermediate_size = intermediate_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.use_sliding_window = use_sliding_window
self.sliding_window = sliding_window if use_sliding_window else None
self.max_window_layers = max_window_layers
# for backward compatibility
if num_key_value_heads is None:
num_key_value_heads = num_attention_heads
self.num_key_value_heads = num_key_value_heads
self.hidden_act = hidden_act
self.initializer_range = initializer_range
self.rms_norm_eps = rms_norm_eps
self.use_cache = use_cache
self.rope_theta = rope_theta
self.attention_dropout = attention_dropout
super().__init__(
tie_word_embeddings=tie_word_embeddings,
**kwargs,
)
def to_dict(self):
output = super().to_dict()
keys_to_remove = [
"base_model"
]
for key in keys_to_remove:
output.pop(key, None)
return output
__all__ = ["C2LLMConfig"]