""" BTLM_Extensions: Extensions Package for BitTransformerLM ======================================================= This package provides advanced optimizers and compression techniques as extensions for BitTransformerLM, allowing easy experimentation with different training configurations. Available Extensions: Optimizers: - Muon: Orthogonal momentum optimizer with Newton-Schulz iterations - Lion: EvoLved Sign Momentum optimizer for memory efficiency - Adafactor: Memory-efficient factorized optimizer Compression: - RLE: Advanced Run-Length Encoding with multiple schemes Usage: from BTLM_Extensions import configure_muon_optimizer, RLEEncoder # Use Muon optimizer optimizer, scheduler = configure_muon_optimizer(model, lr=1e-3) # Use RLE compression encoder = RLEEncoder(scheme="adaptive") compressed, metadata = encoder.encode(data) """ __version__ = "1.0.0" __author__ = "BitTransformerLM Extensions" __email__ = "extensions@bittransformerlm.ai" # Import all optimizers from .muon_optimizer import ( Muon, configure_muon_optimizer, create_muon_training_config, ) from .lion_optimizer import ( Lion, AdaptiveLion, configure_lion_optimizer, configure_adaptive_lion_optimizer, create_lion_training_config, ) from .adafactor_optimizer import ( Adafactor, AdafactorScheduler, configure_adafactor_optimizer, configure_adafactor_with_scheduler, create_adafactor_training_config, analyze_memory_usage, ) # Import compression utilities from .rle_compression import ( RLEEncoder, CompressedBitDataset, create_compression_aware_loss, integrate_rle_with_training, benchmark_compression_schemes, create_rle_training_config, ) # Convenience functions for easy optimizer swapping def get_optimizer_config(optimizer_type: str, **kwargs): """ Get configuration for specified optimizer type. Args: optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor') **kwargs: Optimizer-specific parameters Returns: Dictionary with optimizer configuration """ if optimizer_type.lower() == "muon": return create_muon_training_config(**kwargs) elif optimizer_type.lower() == "lion": return create_lion_training_config(**kwargs) elif optimizer_type.lower() == "adafactor": return create_adafactor_training_config(**kwargs) else: raise ValueError(f"Unknown optimizer type: {optimizer_type}") def configure_optimizer(optimizer_type: str, model, **kwargs): """ Configure optimizer based on type string. Args: optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor') model: PyTorch model to optimize **kwargs: Optimizer-specific parameters Returns: Tuple of (optimizer, scheduler) """ if optimizer_type.lower() == "muon": return configure_muon_optimizer(model, **kwargs) elif optimizer_type.lower() == "lion": return configure_lion_optimizer(model, **kwargs) elif optimizer_type.lower() == "adafactor": return configure_adafactor_optimizer(model, **kwargs) else: raise ValueError(f"Unknown optimizer type: {optimizer_type}") # Integration helpers for BitTransformerLM class ExtensionManager: """ Manager class for easy integration with BitTransformerLM. Provides unified interface for switching between optimizers and compression schemes. """ SUPPORTED_OPTIMIZERS = ["muon", "lion", "adafactor"] SUPPORTED_COMPRESSION = ["rle"] def __init__(self): self.current_optimizer = None self.current_compression = None def setup_optimizer(self, optimizer_type: str, model, **kwargs): """Setup optimizer for training.""" if optimizer_type not in self.SUPPORTED_OPTIMIZERS: raise ValueError(f"Unsupported optimizer: {optimizer_type}") optimizer, scheduler = configure_optimizer(optimizer_type, model, **kwargs) self.current_optimizer = optimizer_type return optimizer, scheduler def setup_compression(self, compression_type: str, **kwargs): """Setup compression scheme.""" if compression_type not in self.SUPPORTED_COMPRESSION: raise ValueError(f"Unsupported compression: {compression_type}") if compression_type == "rle": encoder = RLEEncoder(**kwargs) self.current_compression = compression_type return encoder def create_training_config(self, optimizer_type: str = "muon", compression_type: str = "rle", **kwargs): """Create comprehensive training configuration.""" config = { "optimizer": get_optimizer_config(optimizer_type, **kwargs), "compression": create_rle_training_config(**kwargs) if compression_type == "rle" else None, "extensions": { "optimizer_type": optimizer_type, "compression_type": compression_type, "version": __version__, } } return config def benchmark_optimizers(self, model, test_data, epochs: int = 5): """Benchmark all available optimizers on test data.""" import torch import torch.nn.functional as F import time results = {} for opt_type in self.SUPPORTED_OPTIMIZERS: print(f"Benchmarking {opt_type} optimizer...") # Create fresh model copy model_copy = type(model)(**model._current_params()) model_copy.load_state_dict(model.state_dict()) try: # Setup optimizer optimizer, scheduler = self.setup_optimizer(opt_type, model_copy, lr=1e-3) # Training loop start_time = time.time() losses = [] for epoch in range(epochs): optimizer.zero_grad() # Simple forward pass logits, _ = model_copy(test_data) pred = logits[:, :-1, :].reshape(-1, 2) target = test_data[:, 1:].reshape(-1) loss = F.cross_entropy(pred, target) loss.backward() optimizer.step() if scheduler: scheduler.step() losses.append(loss.item()) end_time = time.time() results[opt_type] = { "final_loss": losses[-1], "avg_loss": sum(losses) / len(losses), "training_time": end_time - start_time, "convergence": losses[0] - losses[-1], "success": True, } except Exception as e: results[opt_type] = { "final_loss": float('inf'), "avg_loss": float('inf'), "training_time": 0, "convergence": 0, "success": False, "error": str(e), } return results # Create global extension manager instance extension_manager = ExtensionManager() # Export all important symbols __all__ = [ # Optimizers "Muon", "Lion", "AdaptiveLion", "Adafactor", "AdafactorScheduler", # Optimizer configuration functions "configure_muon_optimizer", "configure_lion_optimizer", "configure_adaptive_lion_optimizer", "configure_adafactor_optimizer", "configure_adafactor_with_scheduler", # Training configuration creators "create_muon_training_config", "create_lion_training_config", "create_adafactor_training_config", # Compression "RLEEncoder", "CompressedBitDataset", "create_compression_aware_loss", "integrate_rle_with_training", "benchmark_compression_schemes", "create_rle_training_config", # Convenience functions "get_optimizer_config", "configure_optimizer", "ExtensionManager", "extension_manager", "analyze_memory_usage", ] # Package information def get_version(): """Get package version.""" return __version__ def list_optimizers(): """List all available optimizers.""" return ExtensionManager.SUPPORTED_OPTIMIZERS.copy() def list_compression_schemes(): """List all available compression schemes.""" return ExtensionManager.SUPPORTED_COMPRESSION.copy() def get_package_info(): """Get package information.""" return { "name": "BTLM_Extensions", "version": __version__, "author": __author__, "email": __email__, "optimizers": list_optimizers(), "compression": list_compression_schemes(), "description": "Advanced optimizers and compression for BitTransformerLM", } # Print welcome message when imported def _welcome_message(): """Print welcome message with available extensions.""" print(f"🚀 BTLM_Extensions v{__version__} loaded!") print(f"📊 Available optimizers: {', '.join(list_optimizers())}") print(f"🗜️ Available compression: {', '.join(list_compression_schemes())}") print("📖 Use help(BTLM_Extensions) for detailed documentation") # Uncomment the line below if you want the welcome message on import # _welcome_message() # Demonstrate usage example in docstring def demo_usage(): """ Demonstration of BTLM_Extensions usage: # Quick optimizer swap from BTLM_Extensions import configure_optimizer # Try different optimizers muon_opt, muon_sched = configure_optimizer("muon", model, lr=1e-3) lion_opt, lion_sched = configure_optimizer("lion", model, lr=1e-4) adafactor_opt, adafactor_sched = configure_optimizer("adafactor", model) # Use with BitTransformerLM training from bit_transformer.training import train_loop train_loop(model, data, optimizer=muon_opt, scheduler=muon_sched) # Advanced compression from BTLM_Extensions import RLEEncoder, integrate_rle_with_training # Setup compression-aware training dataset, loss_fn = integrate_rle_with_training(model, data) # Benchmark optimizers from BTLM_Extensions import extension_manager results = extension_manager.benchmark_optimizers(model, test_data) print("Benchmark results:", results) """ pass