""" nanoKimi - The simplest, fastest repository for training/finetuning Kimi-K2 models This package implements the Kimi-K2 architecture with key innovations: - Muon Optimizer: Advanced optimization for faster convergence - Mixture of Experts (MoE): Efficient scaling with expert routing - Latent Attention: Memory-efficient attention mechanism """ from .model import KimiK2 from .optimizer import Muon, create_muon_optimizer from .attention import LatentAttention, MultiHeadAttention from .moe import MoELayer, StandardFFN __version__ = "0.1.0" __author__ = "nanoKimi Team" __all__ = [ "KimiK2", "Muon", "create_muon_optimizer", "LatentAttention", "MultiHeadAttention", "MoELayer", "StandardFFN" ]