|
|
""" |
|
|
BTLM_Extensions: Extensions Package for BitTransformerLM |
|
|
======================================================= |
|
|
|
|
|
This package provides advanced optimizers and compression techniques |
|
|
as extensions for BitTransformerLM, allowing easy experimentation with |
|
|
different training configurations. |
|
|
|
|
|
Available Extensions: |
|
|
|
|
|
Optimizers: |
|
|
- Muon: Orthogonal momentum optimizer with Newton-Schulz iterations |
|
|
- Lion: EvoLved Sign Momentum optimizer for memory efficiency |
|
|
- Adafactor: Memory-efficient factorized optimizer |
|
|
|
|
|
Compression: |
|
|
- RLE: Advanced Run-Length Encoding with multiple schemes |
|
|
|
|
|
Usage: |
|
|
from BTLM_Extensions import configure_muon_optimizer, RLEEncoder |
|
|
|
|
|
# Use Muon optimizer |
|
|
optimizer, scheduler = configure_muon_optimizer(model, lr=1e-3) |
|
|
|
|
|
# Use RLE compression |
|
|
encoder = RLEEncoder(scheme="adaptive") |
|
|
compressed, metadata = encoder.encode(data) |
|
|
""" |
|
|
|
|
|
__version__ = "1.0.0" |
|
|
__author__ = "BitTransformerLM Extensions" |
|
|
__email__ = "extensions@bittransformerlm.ai" |
|
|
|
|
|
|
|
|
from .muon_optimizer import ( |
|
|
Muon, |
|
|
configure_muon_optimizer, |
|
|
create_muon_training_config, |
|
|
) |
|
|
|
|
|
from .lion_optimizer import ( |
|
|
Lion, |
|
|
AdaptiveLion, |
|
|
configure_lion_optimizer, |
|
|
configure_adaptive_lion_optimizer, |
|
|
create_lion_training_config, |
|
|
) |
|
|
|
|
|
from .adafactor_optimizer import ( |
|
|
Adafactor, |
|
|
AdafactorScheduler, |
|
|
configure_adafactor_optimizer, |
|
|
configure_adafactor_with_scheduler, |
|
|
create_adafactor_training_config, |
|
|
analyze_memory_usage, |
|
|
) |
|
|
|
|
|
|
|
|
from .rle_compression import ( |
|
|
RLEEncoder, |
|
|
CompressedBitDataset, |
|
|
create_compression_aware_loss, |
|
|
integrate_rle_with_training, |
|
|
benchmark_compression_schemes, |
|
|
create_rle_training_config, |
|
|
) |
|
|
|
|
|
|
|
|
def get_optimizer_config(optimizer_type: str, **kwargs): |
|
|
""" |
|
|
Get configuration for specified optimizer type. |
|
|
|
|
|
Args: |
|
|
optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor') |
|
|
**kwargs: Optimizer-specific parameters |
|
|
|
|
|
Returns: |
|
|
Dictionary with optimizer configuration |
|
|
""" |
|
|
if optimizer_type.lower() == "muon": |
|
|
return create_muon_training_config(**kwargs) |
|
|
elif optimizer_type.lower() == "lion": |
|
|
return create_lion_training_config(**kwargs) |
|
|
elif optimizer_type.lower() == "adafactor": |
|
|
return create_adafactor_training_config(**kwargs) |
|
|
else: |
|
|
raise ValueError(f"Unknown optimizer type: {optimizer_type}") |
|
|
|
|
|
|
|
|
def configure_optimizer(optimizer_type: str, model, **kwargs): |
|
|
""" |
|
|
Configure optimizer based on type string. |
|
|
|
|
|
Args: |
|
|
optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor') |
|
|
model: PyTorch model to optimize |
|
|
**kwargs: Optimizer-specific parameters |
|
|
|
|
|
Returns: |
|
|
Tuple of (optimizer, scheduler) |
|
|
""" |
|
|
if optimizer_type.lower() == "muon": |
|
|
return configure_muon_optimizer(model, **kwargs) |
|
|
elif optimizer_type.lower() == "lion": |
|
|
return configure_lion_optimizer(model, **kwargs) |
|
|
elif optimizer_type.lower() == "adafactor": |
|
|
return configure_adafactor_optimizer(model, **kwargs) |
|
|
else: |
|
|
raise ValueError(f"Unknown optimizer type: {optimizer_type}") |
|
|
|
|
|
|
|
|
|
|
|
class ExtensionManager: |
|
|
""" |
|
|
Manager class for easy integration with BitTransformerLM. |
|
|
|
|
|
Provides unified interface for switching between optimizers |
|
|
and compression schemes. |
|
|
""" |
|
|
|
|
|
SUPPORTED_OPTIMIZERS = ["muon", "lion", "adafactor"] |
|
|
SUPPORTED_COMPRESSION = ["rle"] |
|
|
|
|
|
def __init__(self): |
|
|
self.current_optimizer = None |
|
|
self.current_compression = None |
|
|
|
|
|
def setup_optimizer(self, optimizer_type: str, model, **kwargs): |
|
|
"""Setup optimizer for training.""" |
|
|
if optimizer_type not in self.SUPPORTED_OPTIMIZERS: |
|
|
raise ValueError(f"Unsupported optimizer: {optimizer_type}") |
|
|
|
|
|
optimizer, scheduler = configure_optimizer(optimizer_type, model, **kwargs) |
|
|
self.current_optimizer = optimizer_type |
|
|
return optimizer, scheduler |
|
|
|
|
|
def setup_compression(self, compression_type: str, **kwargs): |
|
|
"""Setup compression scheme.""" |
|
|
if compression_type not in self.SUPPORTED_COMPRESSION: |
|
|
raise ValueError(f"Unsupported compression: {compression_type}") |
|
|
|
|
|
if compression_type == "rle": |
|
|
encoder = RLEEncoder(**kwargs) |
|
|
self.current_compression = compression_type |
|
|
return encoder |
|
|
|
|
|
def create_training_config(self, optimizer_type: str = "muon", compression_type: str = "rle", **kwargs): |
|
|
"""Create comprehensive training configuration.""" |
|
|
config = { |
|
|
"optimizer": get_optimizer_config(optimizer_type, **kwargs), |
|
|
"compression": create_rle_training_config(**kwargs) if compression_type == "rle" else None, |
|
|
"extensions": { |
|
|
"optimizer_type": optimizer_type, |
|
|
"compression_type": compression_type, |
|
|
"version": __version__, |
|
|
} |
|
|
} |
|
|
return config |
|
|
|
|
|
def benchmark_optimizers(self, model, test_data, epochs: int = 5): |
|
|
"""Benchmark all available optimizers on test data.""" |
|
|
import torch |
|
|
import torch.nn.functional as F |
|
|
import time |
|
|
|
|
|
results = {} |
|
|
|
|
|
for opt_type in self.SUPPORTED_OPTIMIZERS: |
|
|
print(f"Benchmarking {opt_type} optimizer...") |
|
|
|
|
|
|
|
|
model_copy = type(model)(**model._current_params()) |
|
|
model_copy.load_state_dict(model.state_dict()) |
|
|
|
|
|
try: |
|
|
|
|
|
optimizer, scheduler = self.setup_optimizer(opt_type, model_copy, lr=1e-3) |
|
|
|
|
|
|
|
|
start_time = time.time() |
|
|
losses = [] |
|
|
|
|
|
for epoch in range(epochs): |
|
|
optimizer.zero_grad() |
|
|
|
|
|
|
|
|
logits, _ = model_copy(test_data) |
|
|
pred = logits[:, :-1, :].reshape(-1, 2) |
|
|
target = test_data[:, 1:].reshape(-1) |
|
|
loss = F.cross_entropy(pred, target) |
|
|
|
|
|
loss.backward() |
|
|
optimizer.step() |
|
|
if scheduler: |
|
|
scheduler.step() |
|
|
|
|
|
losses.append(loss.item()) |
|
|
|
|
|
end_time = time.time() |
|
|
|
|
|
results[opt_type] = { |
|
|
"final_loss": losses[-1], |
|
|
"avg_loss": sum(losses) / len(losses), |
|
|
"training_time": end_time - start_time, |
|
|
"convergence": losses[0] - losses[-1], |
|
|
"success": True, |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
results[opt_type] = { |
|
|
"final_loss": float('inf'), |
|
|
"avg_loss": float('inf'), |
|
|
"training_time": 0, |
|
|
"convergence": 0, |
|
|
"success": False, |
|
|
"error": str(e), |
|
|
} |
|
|
|
|
|
return results |
|
|
|
|
|
|
|
|
|
|
|
extension_manager = ExtensionManager() |
|
|
|
|
|
|
|
|
__all__ = [ |
|
|
|
|
|
"Muon", |
|
|
"Lion", |
|
|
"AdaptiveLion", |
|
|
"Adafactor", |
|
|
"AdafactorScheduler", |
|
|
|
|
|
|
|
|
"configure_muon_optimizer", |
|
|
"configure_lion_optimizer", |
|
|
"configure_adaptive_lion_optimizer", |
|
|
"configure_adafactor_optimizer", |
|
|
"configure_adafactor_with_scheduler", |
|
|
|
|
|
|
|
|
"create_muon_training_config", |
|
|
"create_lion_training_config", |
|
|
"create_adafactor_training_config", |
|
|
|
|
|
|
|
|
"RLEEncoder", |
|
|
"CompressedBitDataset", |
|
|
"create_compression_aware_loss", |
|
|
"integrate_rle_with_training", |
|
|
"benchmark_compression_schemes", |
|
|
"create_rle_training_config", |
|
|
|
|
|
|
|
|
"get_optimizer_config", |
|
|
"configure_optimizer", |
|
|
"ExtensionManager", |
|
|
"extension_manager", |
|
|
"analyze_memory_usage", |
|
|
] |
|
|
|
|
|
|
|
|
def get_version(): |
|
|
"""Get package version.""" |
|
|
return __version__ |
|
|
|
|
|
def list_optimizers(): |
|
|
"""List all available optimizers.""" |
|
|
return ExtensionManager.SUPPORTED_OPTIMIZERS.copy() |
|
|
|
|
|
def list_compression_schemes(): |
|
|
"""List all available compression schemes.""" |
|
|
return ExtensionManager.SUPPORTED_COMPRESSION.copy() |
|
|
|
|
|
def get_package_info(): |
|
|
"""Get package information.""" |
|
|
return { |
|
|
"name": "BTLM_Extensions", |
|
|
"version": __version__, |
|
|
"author": __author__, |
|
|
"email": __email__, |
|
|
"optimizers": list_optimizers(), |
|
|
"compression": list_compression_schemes(), |
|
|
"description": "Advanced optimizers and compression for BitTransformerLM", |
|
|
} |
|
|
|
|
|
|
|
|
def _welcome_message(): |
|
|
"""Print welcome message with available extensions.""" |
|
|
print(f"π BTLM_Extensions v{__version__} loaded!") |
|
|
print(f"π Available optimizers: {', '.join(list_optimizers())}") |
|
|
print(f"ποΈ Available compression: {', '.join(list_compression_schemes())}") |
|
|
print("π Use help(BTLM_Extensions) for detailed documentation") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def demo_usage(): |
|
|
""" |
|
|
Demonstration of BTLM_Extensions usage: |
|
|
|
|
|
# Quick optimizer swap |
|
|
from BTLM_Extensions import configure_optimizer |
|
|
|
|
|
# Try different optimizers |
|
|
muon_opt, muon_sched = configure_optimizer("muon", model, lr=1e-3) |
|
|
lion_opt, lion_sched = configure_optimizer("lion", model, lr=1e-4) |
|
|
adafactor_opt, adafactor_sched = configure_optimizer("adafactor", model) |
|
|
|
|
|
# Use with BitTransformerLM training |
|
|
from bit_transformer.training import train_loop |
|
|
|
|
|
train_loop(model, data, optimizer=muon_opt, scheduler=muon_sched) |
|
|
|
|
|
# Advanced compression |
|
|
from BTLM_Extensions import RLEEncoder, integrate_rle_with_training |
|
|
|
|
|
# Setup compression-aware training |
|
|
dataset, loss_fn = integrate_rle_with_training(model, data) |
|
|
|
|
|
# Benchmark optimizers |
|
|
from BTLM_Extensions import extension_manager |
|
|
|
|
|
results = extension_manager.benchmark_optimizers(model, test_data) |
|
|
print("Benchmark results:", results) |
|
|
""" |
|
|
pass |