| from transformers import PretrainedConfig | |
| class TinyLLMConfig(PretrainedConfig): | |
| model_type = "tinyllm" | |
| def __init__(self, model_cfg=None, **kwargs): | |
| super().__init__(**kwargs) | |
| self.model_cfg = model_cfg or {} | |
| self.vocab_size = self.model_cfg.get("vocab_size", kwargs.get("vocab_size", 0)) | |
| self.hidden_size = self.model_cfg.get("hidden_dim", kwargs.get("hidden_size", 0)) | |
| self.num_hidden_layers = self.model_cfg.get("core_model", {}).get( | |
| "num_layers", kwargs.get("num_hidden_layers", 0) | |
| ) | |
| self.num_attention_heads = self.model_cfg.get("core_model", {}).get( | |
| "attn", {} | |
| ).get("num_heads", kwargs.get("num_attention_heads", 0)) | |
| self.max_position_embeddings = self.model_cfg.get( | |
| "context_window", kwargs.get("max_position_embeddings", 0) | |
| ) | |