WCNegentropy's picture
πŸš€ Refined BitTransformerLM: Organized codebase with best practices
d580d32 verified
"""
BTLM_Extensions: Extensions Package for BitTransformerLM
=======================================================
This package provides advanced optimizers and compression techniques
as extensions for BitTransformerLM, allowing easy experimentation with
different training configurations.
Available Extensions:
Optimizers:
- Muon: Orthogonal momentum optimizer with Newton-Schulz iterations
- Lion: EvoLved Sign Momentum optimizer for memory efficiency
- Adafactor: Memory-efficient factorized optimizer
Compression:
- RLE: Advanced Run-Length Encoding with multiple schemes
Usage:
from BTLM_Extensions import configure_muon_optimizer, RLEEncoder
# Use Muon optimizer
optimizer, scheduler = configure_muon_optimizer(model, lr=1e-3)
# Use RLE compression
encoder = RLEEncoder(scheme="adaptive")
compressed, metadata = encoder.encode(data)
"""
__version__ = "1.0.0"
__author__ = "BitTransformerLM Extensions"
__email__ = "extensions@bittransformerlm.ai"
# Import all optimizers
from .muon_optimizer import (
Muon,
configure_muon_optimizer,
create_muon_training_config,
)
from .lion_optimizer import (
Lion,
AdaptiveLion,
configure_lion_optimizer,
configure_adaptive_lion_optimizer,
create_lion_training_config,
)
from .adafactor_optimizer import (
Adafactor,
AdafactorScheduler,
configure_adafactor_optimizer,
configure_adafactor_with_scheduler,
create_adafactor_training_config,
analyze_memory_usage,
)
# Import compression utilities
from .rle_compression import (
RLEEncoder,
CompressedBitDataset,
create_compression_aware_loss,
integrate_rle_with_training,
benchmark_compression_schemes,
create_rle_training_config,
)
# Convenience functions for easy optimizer swapping
def get_optimizer_config(optimizer_type: str, **kwargs):
"""
Get configuration for specified optimizer type.
Args:
optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor')
**kwargs: Optimizer-specific parameters
Returns:
Dictionary with optimizer configuration
"""
if optimizer_type.lower() == "muon":
return create_muon_training_config(**kwargs)
elif optimizer_type.lower() == "lion":
return create_lion_training_config(**kwargs)
elif optimizer_type.lower() == "adafactor":
return create_adafactor_training_config(**kwargs)
else:
raise ValueError(f"Unknown optimizer type: {optimizer_type}")
def configure_optimizer(optimizer_type: str, model, **kwargs):
"""
Configure optimizer based on type string.
Args:
optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor')
model: PyTorch model to optimize
**kwargs: Optimizer-specific parameters
Returns:
Tuple of (optimizer, scheduler)
"""
if optimizer_type.lower() == "muon":
return configure_muon_optimizer(model, **kwargs)
elif optimizer_type.lower() == "lion":
return configure_lion_optimizer(model, **kwargs)
elif optimizer_type.lower() == "adafactor":
return configure_adafactor_optimizer(model, **kwargs)
else:
raise ValueError(f"Unknown optimizer type: {optimizer_type}")
# Integration helpers for BitTransformerLM
class ExtensionManager:
"""
Manager class for easy integration with BitTransformerLM.
Provides unified interface for switching between optimizers
and compression schemes.
"""
SUPPORTED_OPTIMIZERS = ["muon", "lion", "adafactor"]
SUPPORTED_COMPRESSION = ["rle"]
def __init__(self):
self.current_optimizer = None
self.current_compression = None
def setup_optimizer(self, optimizer_type: str, model, **kwargs):
"""Setup optimizer for training."""
if optimizer_type not in self.SUPPORTED_OPTIMIZERS:
raise ValueError(f"Unsupported optimizer: {optimizer_type}")
optimizer, scheduler = configure_optimizer(optimizer_type, model, **kwargs)
self.current_optimizer = optimizer_type
return optimizer, scheduler
def setup_compression(self, compression_type: str, **kwargs):
"""Setup compression scheme."""
if compression_type not in self.SUPPORTED_COMPRESSION:
raise ValueError(f"Unsupported compression: {compression_type}")
if compression_type == "rle":
encoder = RLEEncoder(**kwargs)
self.current_compression = compression_type
return encoder
def create_training_config(self, optimizer_type: str = "muon", compression_type: str = "rle", **kwargs):
"""Create comprehensive training configuration."""
config = {
"optimizer": get_optimizer_config(optimizer_type, **kwargs),
"compression": create_rle_training_config(**kwargs) if compression_type == "rle" else None,
"extensions": {
"optimizer_type": optimizer_type,
"compression_type": compression_type,
"version": __version__,
}
}
return config
def benchmark_optimizers(self, model, test_data, epochs: int = 5):
"""Benchmark all available optimizers on test data."""
import torch
import torch.nn.functional as F
import time
results = {}
for opt_type in self.SUPPORTED_OPTIMIZERS:
print(f"Benchmarking {opt_type} optimizer...")
# Create fresh model copy
model_copy = type(model)(**model._current_params())
model_copy.load_state_dict(model.state_dict())
try:
# Setup optimizer
optimizer, scheduler = self.setup_optimizer(opt_type, model_copy, lr=1e-3)
# Training loop
start_time = time.time()
losses = []
for epoch in range(epochs):
optimizer.zero_grad()
# Simple forward pass
logits, _ = model_copy(test_data)
pred = logits[:, :-1, :].reshape(-1, 2)
target = test_data[:, 1:].reshape(-1)
loss = F.cross_entropy(pred, target)
loss.backward()
optimizer.step()
if scheduler:
scheduler.step()
losses.append(loss.item())
end_time = time.time()
results[opt_type] = {
"final_loss": losses[-1],
"avg_loss": sum(losses) / len(losses),
"training_time": end_time - start_time,
"convergence": losses[0] - losses[-1],
"success": True,
}
except Exception as e:
results[opt_type] = {
"final_loss": float('inf'),
"avg_loss": float('inf'),
"training_time": 0,
"convergence": 0,
"success": False,
"error": str(e),
}
return results
# Create global extension manager instance
extension_manager = ExtensionManager()
# Export all important symbols
__all__ = [
# Optimizers
"Muon",
"Lion",
"AdaptiveLion",
"Adafactor",
"AdafactorScheduler",
# Optimizer configuration functions
"configure_muon_optimizer",
"configure_lion_optimizer",
"configure_adaptive_lion_optimizer",
"configure_adafactor_optimizer",
"configure_adafactor_with_scheduler",
# Training configuration creators
"create_muon_training_config",
"create_lion_training_config",
"create_adafactor_training_config",
# Compression
"RLEEncoder",
"CompressedBitDataset",
"create_compression_aware_loss",
"integrate_rle_with_training",
"benchmark_compression_schemes",
"create_rle_training_config",
# Convenience functions
"get_optimizer_config",
"configure_optimizer",
"ExtensionManager",
"extension_manager",
"analyze_memory_usage",
]
# Package information
def get_version():
"""Get package version."""
return __version__
def list_optimizers():
"""List all available optimizers."""
return ExtensionManager.SUPPORTED_OPTIMIZERS.copy()
def list_compression_schemes():
"""List all available compression schemes."""
return ExtensionManager.SUPPORTED_COMPRESSION.copy()
def get_package_info():
"""Get package information."""
return {
"name": "BTLM_Extensions",
"version": __version__,
"author": __author__,
"email": __email__,
"optimizers": list_optimizers(),
"compression": list_compression_schemes(),
"description": "Advanced optimizers and compression for BitTransformerLM",
}
# Print welcome message when imported
def _welcome_message():
"""Print welcome message with available extensions."""
print(f"πŸš€ BTLM_Extensions v{__version__} loaded!")
print(f"πŸ“Š Available optimizers: {', '.join(list_optimizers())}")
print(f"πŸ—œοΈ Available compression: {', '.join(list_compression_schemes())}")
print("πŸ“– Use help(BTLM_Extensions) for detailed documentation")
# Uncomment the line below if you want the welcome message on import
# _welcome_message()
# Demonstrate usage example in docstring
def demo_usage():
"""
Demonstration of BTLM_Extensions usage:
# Quick optimizer swap
from BTLM_Extensions import configure_optimizer
# Try different optimizers
muon_opt, muon_sched = configure_optimizer("muon", model, lr=1e-3)
lion_opt, lion_sched = configure_optimizer("lion", model, lr=1e-4)
adafactor_opt, adafactor_sched = configure_optimizer("adafactor", model)
# Use with BitTransformerLM training
from bit_transformer.training import train_loop
train_loop(model, data, optimizer=muon_opt, scheduler=muon_sched)
# Advanced compression
from BTLM_Extensions import RLEEncoder, integrate_rle_with_training
# Setup compression-aware training
dataset, loss_fn = integrate_rle_with_training(model, data)
# Benchmark optimizers
from BTLM_Extensions import extension_manager
results = extension_manager.benchmark_optimizers(model, test_data)
print("Benchmark results:", results)
"""
pass