File size: 1,205 Bytes
d1ef951 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# -*- coding: utf-8 -*-
"""
Shared Subspace Decoder Models
This module contains the implementation of the Shared Subspace Decoder architecture,
including Multi-Head Latent Attention (MLA) and decomposed MLP layers.
"""
print("\n========================================\n")
print(" models/__init__.py: Is this being run?")
print("\n========================================\n")
from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
from .shared_space_config import SharedSpaceDecoderConfig
from .shared_space_decoder import (
SharedSpaceDecoderPreTrainedModel,
SharedSpaceDecoderModel,
)
# Import from task_heads in layers directory
from ..layers.task_heads import SharedSpaceDecoderForCausalLM
# Register the configuration class with AutoConfig
AutoConfig.register("shared_space_decoder", SharedSpaceDecoderConfig)
# Register the model classes with AutoModel
AutoModel.register(SharedSpaceDecoderConfig, SharedSpaceDecoderModel)
AutoModelForCausalLM.register(SharedSpaceDecoderConfig, SharedSpaceDecoderForCausalLM)
__all__ = [
"SharedSpaceDecoderConfig",
"SharedSpaceDecoderPreTrainedModel",
"SharedSpaceDecoderModel",
"SharedSpaceDecoderForCausalLM",
]
|