File size: 1,113 Bytes
7e1eb73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# -*- coding: utf-8 -*-
"""
Shared Subspace Decoder Models
This module contains the implementation of the Shared Subspace Decoder architecture,
including Multi-Head Latent Attention (MLA) and decomposed MLP layers.
"""
from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
from .configuration_shared_subspace_decoder import SharedSpaceDecoderConfig
from .modeling_shared_subspace_decoder import (
SharedSpaceDecoderPreTrainedModel,
SharedSpaceDecoderModel,
)
# Import from task_heads in layers directory
from ..layers.task_heads import SharedSpaceDecoderForCausalLM
# Register the configuration class with AutoConfig
AutoConfig.register("shared_subspace_decoder", SharedSpaceDecoderConfig)
# Register the model classes with AutoModel
AutoModel.register(SharedSpaceDecoderConfig, SharedSpaceDecoderModel)
AutoModelForCausalLM.register(SharedSpaceDecoderConfig, SharedSpaceDecoderForCausalLM)
__all__ = [
"SharedSpaceDecoderConfig",
"SharedSpaceDecoderPreTrainedModel",
"SharedSpaceDecoderModel",
"SharedSpaceDecoderForCausalLM",
]
|