from transformers import PretrainedConfig class TinyLLMConfig(PretrainedConfig): model_type = "tinyllm" def __init__(self, model_cfg=None, **kwargs): super().__init__(**kwargs) self.model_cfg = model_cfg or {} self.vocab_size = self.model_cfg.get("vocab_size", kwargs.get("vocab_size", 0)) self.hidden_size = self.model_cfg.get("hidden_dim", kwargs.get("hidden_size", 0)) self.num_hidden_layers = self.model_cfg.get("core_model", {}).get( "num_layers", kwargs.get("num_hidden_layers", 0) ) self.num_attention_heads = self.model_cfg.get("core_model", {}).get( "attn", {} ).get("num_heads", kwargs.get("num_attention_heads", 0)) self.max_position_embeddings = self.model_cfg.get( "context_window", kwargs.get("max_position_embeddings", 0) )