Upload src/__init__.py
Browse files- src/__init__.py +26 -0
src/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
nanoKimi - The simplest, fastest repository for training/finetuning Kimi-K2 models
|
| 3 |
+
|
| 4 |
+
This package implements the Kimi-K2 architecture with key innovations:
|
| 5 |
+
- Muon Optimizer: Advanced optimization for faster convergence
|
| 6 |
+
- Mixture of Experts (MoE): Efficient scaling with expert routing
|
| 7 |
+
- Latent Attention: Memory-efficient attention mechanism
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from .model import KimiK2
|
| 11 |
+
from .optimizer import Muon, create_muon_optimizer
|
| 12 |
+
from .attention import LatentAttention, MultiHeadAttention
|
| 13 |
+
from .moe import MoELayer, StandardFFN
|
| 14 |
+
|
| 15 |
+
__version__ = "0.1.0"
|
| 16 |
+
__author__ = "nanoKimi Team"
|
| 17 |
+
|
| 18 |
+
__all__ = [
|
| 19 |
+
"KimiK2",
|
| 20 |
+
"Muon",
|
| 21 |
+
"create_muon_optimizer",
|
| 22 |
+
"LatentAttention",
|
| 23 |
+
"MultiHeadAttention",
|
| 24 |
+
"MoELayer",
|
| 25 |
+
"StandardFFN"
|
| 26 |
+
]
|