|
|
|
|
| """
|
| Shared Subspace Decoder Models
|
|
|
| This module contains the implementation of the Shared Subspace Decoder architecture,
|
| including Multi-Head Latent Attention (MLA) and decomposed MLP layers.
|
| """
|
|
|
| from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
|
|
|
| from .configuration_shared_subspace_decoder import SharedSpaceDecoderConfig
|
| from .modeling_shared_subspace_decoder import (
|
| SharedSpaceDecoderPreTrainedModel,
|
| SharedSpaceDecoderModel,
|
| )
|
|
|
|
|
| from ..layers.task_heads import SharedSpaceDecoderForCausalLM
|
|
|
|
|
| AutoConfig.register("shared_subspace_decoder", SharedSpaceDecoderConfig)
|
|
|
|
|
| AutoModel.register(SharedSpaceDecoderConfig, SharedSpaceDecoderModel)
|
| AutoModelForCausalLM.register(SharedSpaceDecoderConfig, SharedSpaceDecoderForCausalLM)
|
|
|
| __all__ = [
|
| "SharedSpaceDecoderConfig",
|
| "SharedSpaceDecoderPreTrainedModel",
|
| "SharedSpaceDecoderModel",
|
| "SharedSpaceDecoderForCausalLM",
|
| ]
|
|
|