|
|
|
|
|
|
|
|
""" |
|
|
Shared Subspace Decoder Models |
|
|
|
|
|
This module contains the implementation of the Shared Subspace Decoder architecture, |
|
|
including Multi-Head Latent Attention (MLA) and decomposed MLP layers. |
|
|
""" |
|
|
|
|
|
print("\n========================================\n") |
|
|
print(" models/__init__.py: Is this being run?") |
|
|
print("\n========================================\n") |
|
|
|
|
|
from transformers import AutoConfig, AutoModel, AutoModelForCausalLM |
|
|
|
|
|
from .shared_space_config import SharedSpaceDecoderConfig |
|
|
from .shared_space_decoder import ( |
|
|
SharedSpaceDecoderPreTrainedModel, |
|
|
SharedSpaceDecoderModel, |
|
|
) |
|
|
|
|
|
|
|
|
from ..layers.task_heads import SharedSpaceDecoderForCausalLM |
|
|
|
|
|
|
|
|
AutoConfig.register("shared_space_decoder", SharedSpaceDecoderConfig) |
|
|
|
|
|
|
|
|
AutoModel.register(SharedSpaceDecoderConfig, SharedSpaceDecoderModel) |
|
|
AutoModelForCausalLM.register(SharedSpaceDecoderConfig, SharedSpaceDecoderForCausalLM) |
|
|
|
|
|
__all__ = [ |
|
|
"SharedSpaceDecoderConfig", |
|
|
"SharedSpaceDecoderPreTrainedModel", |
|
|
"SharedSpaceDecoderModel", |
|
|
"SharedSpaceDecoderForCausalLM", |
|
|
] |
|
|
|