File size: 10,676 Bytes
d580d32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 |
"""
BTLM_Extensions: Extensions Package for BitTransformerLM
=======================================================
This package provides advanced optimizers and compression techniques
as extensions for BitTransformerLM, allowing easy experimentation with
different training configurations.
Available Extensions:
Optimizers:
- Muon: Orthogonal momentum optimizer with Newton-Schulz iterations
- Lion: EvoLved Sign Momentum optimizer for memory efficiency
- Adafactor: Memory-efficient factorized optimizer
Compression:
- RLE: Advanced Run-Length Encoding with multiple schemes
Usage:
from BTLM_Extensions import configure_muon_optimizer, RLEEncoder
# Use Muon optimizer
optimizer, scheduler = configure_muon_optimizer(model, lr=1e-3)
# Use RLE compression
encoder = RLEEncoder(scheme="adaptive")
compressed, metadata = encoder.encode(data)
"""
__version__ = "1.0.0"
__author__ = "BitTransformerLM Extensions"
__email__ = "extensions@bittransformerlm.ai"
# Import all optimizers
from .muon_optimizer import (
Muon,
configure_muon_optimizer,
create_muon_training_config,
)
from .lion_optimizer import (
Lion,
AdaptiveLion,
configure_lion_optimizer,
configure_adaptive_lion_optimizer,
create_lion_training_config,
)
from .adafactor_optimizer import (
Adafactor,
AdafactorScheduler,
configure_adafactor_optimizer,
configure_adafactor_with_scheduler,
create_adafactor_training_config,
analyze_memory_usage,
)
# Import compression utilities
from .rle_compression import (
RLEEncoder,
CompressedBitDataset,
create_compression_aware_loss,
integrate_rle_with_training,
benchmark_compression_schemes,
create_rle_training_config,
)
# Convenience functions for easy optimizer swapping
def get_optimizer_config(optimizer_type: str, **kwargs):
"""
Get configuration for specified optimizer type.
Args:
optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor')
**kwargs: Optimizer-specific parameters
Returns:
Dictionary with optimizer configuration
"""
if optimizer_type.lower() == "muon":
return create_muon_training_config(**kwargs)
elif optimizer_type.lower() == "lion":
return create_lion_training_config(**kwargs)
elif optimizer_type.lower() == "adafactor":
return create_adafactor_training_config(**kwargs)
else:
raise ValueError(f"Unknown optimizer type: {optimizer_type}")
def configure_optimizer(optimizer_type: str, model, **kwargs):
"""
Configure optimizer based on type string.
Args:
optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor')
model: PyTorch model to optimize
**kwargs: Optimizer-specific parameters
Returns:
Tuple of (optimizer, scheduler)
"""
if optimizer_type.lower() == "muon":
return configure_muon_optimizer(model, **kwargs)
elif optimizer_type.lower() == "lion":
return configure_lion_optimizer(model, **kwargs)
elif optimizer_type.lower() == "adafactor":
return configure_adafactor_optimizer(model, **kwargs)
else:
raise ValueError(f"Unknown optimizer type: {optimizer_type}")
# Integration helpers for BitTransformerLM
class ExtensionManager:
"""
Manager class for easy integration with BitTransformerLM.
Provides unified interface for switching between optimizers
and compression schemes.
"""
SUPPORTED_OPTIMIZERS = ["muon", "lion", "adafactor"]
SUPPORTED_COMPRESSION = ["rle"]
def __init__(self):
self.current_optimizer = None
self.current_compression = None
def setup_optimizer(self, optimizer_type: str, model, **kwargs):
"""Setup optimizer for training."""
if optimizer_type not in self.SUPPORTED_OPTIMIZERS:
raise ValueError(f"Unsupported optimizer: {optimizer_type}")
optimizer, scheduler = configure_optimizer(optimizer_type, model, **kwargs)
self.current_optimizer = optimizer_type
return optimizer, scheduler
def setup_compression(self, compression_type: str, **kwargs):
"""Setup compression scheme."""
if compression_type not in self.SUPPORTED_COMPRESSION:
raise ValueError(f"Unsupported compression: {compression_type}")
if compression_type == "rle":
encoder = RLEEncoder(**kwargs)
self.current_compression = compression_type
return encoder
def create_training_config(self, optimizer_type: str = "muon", compression_type: str = "rle", **kwargs):
"""Create comprehensive training configuration."""
config = {
"optimizer": get_optimizer_config(optimizer_type, **kwargs),
"compression": create_rle_training_config(**kwargs) if compression_type == "rle" else None,
"extensions": {
"optimizer_type": optimizer_type,
"compression_type": compression_type,
"version": __version__,
}
}
return config
def benchmark_optimizers(self, model, test_data, epochs: int = 5):
"""Benchmark all available optimizers on test data."""
import torch
import torch.nn.functional as F
import time
results = {}
for opt_type in self.SUPPORTED_OPTIMIZERS:
print(f"Benchmarking {opt_type} optimizer...")
# Create fresh model copy
model_copy = type(model)(**model._current_params())
model_copy.load_state_dict(model.state_dict())
try:
# Setup optimizer
optimizer, scheduler = self.setup_optimizer(opt_type, model_copy, lr=1e-3)
# Training loop
start_time = time.time()
losses = []
for epoch in range(epochs):
optimizer.zero_grad()
# Simple forward pass
logits, _ = model_copy(test_data)
pred = logits[:, :-1, :].reshape(-1, 2)
target = test_data[:, 1:].reshape(-1)
loss = F.cross_entropy(pred, target)
loss.backward()
optimizer.step()
if scheduler:
scheduler.step()
losses.append(loss.item())
end_time = time.time()
results[opt_type] = {
"final_loss": losses[-1],
"avg_loss": sum(losses) / len(losses),
"training_time": end_time - start_time,
"convergence": losses[0] - losses[-1],
"success": True,
}
except Exception as e:
results[opt_type] = {
"final_loss": float('inf'),
"avg_loss": float('inf'),
"training_time": 0,
"convergence": 0,
"success": False,
"error": str(e),
}
return results
# Create global extension manager instance
extension_manager = ExtensionManager()
# Export all important symbols
__all__ = [
# Optimizers
"Muon",
"Lion",
"AdaptiveLion",
"Adafactor",
"AdafactorScheduler",
# Optimizer configuration functions
"configure_muon_optimizer",
"configure_lion_optimizer",
"configure_adaptive_lion_optimizer",
"configure_adafactor_optimizer",
"configure_adafactor_with_scheduler",
# Training configuration creators
"create_muon_training_config",
"create_lion_training_config",
"create_adafactor_training_config",
# Compression
"RLEEncoder",
"CompressedBitDataset",
"create_compression_aware_loss",
"integrate_rle_with_training",
"benchmark_compression_schemes",
"create_rle_training_config",
# Convenience functions
"get_optimizer_config",
"configure_optimizer",
"ExtensionManager",
"extension_manager",
"analyze_memory_usage",
]
# Package information
def get_version():
"""Get package version."""
return __version__
def list_optimizers():
"""List all available optimizers."""
return ExtensionManager.SUPPORTED_OPTIMIZERS.copy()
def list_compression_schemes():
"""List all available compression schemes."""
return ExtensionManager.SUPPORTED_COMPRESSION.copy()
def get_package_info():
"""Get package information."""
return {
"name": "BTLM_Extensions",
"version": __version__,
"author": __author__,
"email": __email__,
"optimizers": list_optimizers(),
"compression": list_compression_schemes(),
"description": "Advanced optimizers and compression for BitTransformerLM",
}
# Print welcome message when imported
def _welcome_message():
"""Print welcome message with available extensions."""
print(f"๐ BTLM_Extensions v{__version__} loaded!")
print(f"๐ Available optimizers: {', '.join(list_optimizers())}")
print(f"๐๏ธ Available compression: {', '.join(list_compression_schemes())}")
print("๐ Use help(BTLM_Extensions) for detailed documentation")
# Uncomment the line below if you want the welcome message on import
# _welcome_message()
# Demonstrate usage example in docstring
def demo_usage():
"""
Demonstration of BTLM_Extensions usage:
# Quick optimizer swap
from BTLM_Extensions import configure_optimizer
# Try different optimizers
muon_opt, muon_sched = configure_optimizer("muon", model, lr=1e-3)
lion_opt, lion_sched = configure_optimizer("lion", model, lr=1e-4)
adafactor_opt, adafactor_sched = configure_optimizer("adafactor", model)
# Use with BitTransformerLM training
from bit_transformer.training import train_loop
train_loop(model, data, optimizer=muon_opt, scheduler=muon_sched)
# Advanced compression
from BTLM_Extensions import RLEEncoder, integrate_rle_with_training
# Setup compression-aware training
dataset, loss_fn = integrate_rle_with_training(model, data)
# Benchmark optimizers
from BTLM_Extensions import extension_manager
results = extension_manager.benchmark_optimizers(model, test_data)
print("Benchmark results:", results)
"""
pass |