# ==================================================================== # configuration_unified.py # ==================================================================== """ Configuration class for Unified Language Model HuggingFace Transformers compatible configuration with AutoClass support """ from transformers import PretrainedConfig from typing import Optional class UnifiedModelConfig(PretrainedConfig): """ Configuration class for UnifiedModel. Inherits from PretrainedConfig for full HuggingFace compatibility. """ model_type = "unified_model" def __init__( self, vocab_size: int = None, hidden_size: int = 256, intermediate_size: int = 1024, num_hidden_layers: int = 6, num_attention_heads: int = 8, num_key_value_heads: int = 4, max_position_embeddings: int = 2048, rms_norm_eps: float = 1e-6, rope_theta: float = 10000.0, attention_dropout: float = 0.1, mlp_dropout: float = 0.1, embedding_dropout: float = 0.1, xielu_alpha_p_init: float = 0.8, xielu_alpha_n_init: float = 0.8, xielu_beta: float = 0.5, tie_word_embeddings: bool = True, # HuggingFace standard parameter name # LaX configuration (Linear only) lax_enabled: bool = True, lax_gate_type: str = "linear", # Only "linear" supported now # Canon Layers configuration (A+C only) canon_enabled: bool = True, canon_kernel_size: int = 4, canon_a_enabled: bool = True, # Before attention canon_c_enabled: bool = True, # Before MLP # Canon B and D are permanently disabled # FANFormer configuration fanformer_p: float = 0.15, # HuggingFace standard parameters pad_token_id: int = None, bos_token_id: int = None, eos_token_id: int = None, **kwargs ): super().__init__( pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, tie_word_embeddings=tie_word_embeddings, **kwargs ) self.vocab_size = vocab_size self.hidden_size = hidden_size self.intermediate_size = intermediate_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads self.num_key_value_heads = num_key_value_heads self.max_position_embeddings = max_position_embeddings self.rms_norm_eps = rms_norm_eps self.rope_theta = rope_theta self.attention_dropout = attention_dropout self.mlp_dropout = mlp_dropout self.embedding_dropout = embedding_dropout self.xielu_alpha_p_init = xielu_alpha_p_init self.xielu_alpha_n_init = xielu_alpha_n_init self.xielu_beta = xielu_beta self.tie_word_embeddings = tie_word_embeddings # LaX configuration self.lax_enabled = lax_enabled self.lax_gate_type = lax_gate_type # Canon Layers configuration self.canon_enabled = canon_enabled self.canon_kernel_size = canon_kernel_size self.canon_a_enabled = canon_a_enabled self.canon_c_enabled = canon_c_enabled # FANFormer self.fanformer_p = fanformer_p # ✅ FIXED: Force complete auto_map in config.json self.auto_map = { "AutoConfig": "configuration_unified.UnifiedModelConfig", "AutoModel": "modeling_unified.UnifiedModel", "AutoModelForCausalLM": "modeling_unified.UnifiedModel" } def to_diff_dict(self): """ ✅ FIXED: Fuerza la serialización de tie_word_embeddings en config.json Sobreescribe to_diff_dict() para asegurar que tie_word_embeddings siempre aparezca en el config.json, evitando problemas de carga donde HuggingFace no reconoce el weight tying. Returns: Dict: Configuración optimizada con tie_word_embeddings forzado """ # Obtiene la serialización normal (solo diferencias) output = super().to_diff_dict() # ✅ FUERZA la inclusión de tie_word_embeddings # Esto asegura que aparezca en config.json sin importar si HF # considera que es "default" o no output["tie_word_embeddings"] = self.tie_word_embeddings return output