ChrisMcCormick's picture
Adding source code
cd2099c verified
raw
history blame contribute delete
555 Bytes
# -*- coding: utf-8 -*-
"""
Subspace Decoder Layers
This module contains the layer implementations for the Shared Subspace Decoder,
including Multi-Head Latent Attention (MLA) and decomposed MLP layers.
"""
# Import the main layer classes
from .mla import MultiheadLatentAttention, RotaryEmbedding
from .feedforward import SubspaceFeedForward
from .task_heads import SharedSpaceDecoderForCausalLM
__all__ = [
"MultiheadLatentAttention",
"RotaryEmbedding",
"SubspaceFeedForward",
"SharedSpaceDecoderForCausalLM",
]