Spaces:

Fred808
/

INV

Paused

App Files Files Community

INV / helium /normalization.py

Fred808

Upload 256 files

7a0c684 verified 5 months ago

raw

history blame contribute delete

11.3 kB

	from typing import Optional, Union, Tuple, List, Dict
	import numpy as np
	from dataclasses import dataclass
	from enum import Enum
	import warnings
	from .core.db_manager import HeliumDBManager
	from virtual_gpu_driver.src.ai.tensor_types import Tensor, Device, DType
	import hashlib
	import json
	from functools import lru_cache

	class NormType(Enum):
	"""Supported normalization types"""
	BATCH = "batch"
	LAYER = "layer"
	GROUP = "group"
	INSTANCE = "instance"
	RMS = "rms"

	def normalize(input: Tensor,
	mean: Optional[Tensor] = None,
	variance: Optional[Tensor] = None,
	weight: Optional[Tensor] = None,
	bias: Optional[Tensor] = None,
	eps: float = 1e-5) -> Tuple[Tensor, Tensor, Tensor]:
	"""
	Normalizes the input using mean and variance. If mean/variance not provided,
	they are computed from the input.

	Args:
	input: Input tensor
	mean: Optional pre-computed mean
	variance: Optional pre-computed variance
	weight: Optional scale parameter
	bias: Optional bias parameter
	eps: Small constant for numerical stability

	Returns:
	Tuple of (normalized tensor, mean, variance)
	"""
	# Calculate mean and variance if not provided
	if mean is None or variance is None:
	# Compute stats along last dimension
	axes = tuple(range(input.ndim - 1))
	mean = input.mean(axis=axes, keepdims=True)
	variance = input.var(axis=axes, keepdims=True)

	# Normalize
	denom = (variance + eps).sqrt()
	normalized = (input - mean) / denom

	# Apply scale and bias if provided
	if weight is not None:
	normalized = normalized * weight
	if bias is not None:
	normalized = normalized + bias

	return normalized, mean, variance

	@dataclass
	class NormConfig:
	"""Configuration for normalization layers"""
	norm_type: NormType
	num_features: int
	eps: float = 1e-5
	momentum: float = 0.1
	affine: bool = True
	num_groups: int = 32 # For group norm
	track_running_stats: bool = True
	dtype: np.dtype = np.float32
	use_cache: bool = True

	class NormalizationCache:
	"""Cache manager for normalization computations"""
	def __init__(self):
	self.db = HeliumDBManager.get_instance()
	self.running_means: Dict[str, np.ndarray] = {}
	self.running_vars: Dict[str, np.ndarray] = {}

	def _compute_key(self, x: np.ndarray, norm_type: NormType) -> str:
	"""Compute cache key for input"""
	hasher = hashlib.sha256()
	hasher.update(x.tobytes())
	hasher.update(norm_type.value.encode())
	return hasher.hexdigest()

	def get(self, key: str) -> Optional[Dict[str, np.ndarray]]:
	"""Get cached computation"""
	return self.db.get_activation(key)

	def set(self, key: str, value: Dict[str, np.ndarray], metadata: Dict):
	"""Cache computation"""
	self.db.set_activation(key, value, metadata)

	def update_running_stats(
	self,
	key: str,
	mean: np.ndarray,
	var: np.ndarray,
	momentum: float
	):
	"""Update running statistics"""
	if key in self.running_means:
	self.running_means[key] = (
	(1 - momentum) * self.running_means[key] +
	momentum * mean
	)
	self.running_vars[key] = (
	(1 - momentum) * self.running_vars[key] +
	momentum * var
	)
	else:
	self.running_means[key] = mean
	self.running_vars[key] = var

	class Normalization:
	"""
	Unified normalization implementation with support for:
	- Multiple normalization types
	- Hardware acceleration
	- Mixed precision
	- Computation caching
	- Running statistics tracking
	"""

	def __init__(
	self,
	config: NormConfig,
	driver = None
	):
	"""Initialize normalization layer"""
	self.config = config
	self.driver = driver
	self.cache = NormalizationCache()

	# Initialize learnable parameters if needed
	if config.affine:
	self.gamma = np.ones(config.num_features, dtype=config.dtype)
	self.beta = np.zeros(config.num_features, dtype=config.dtype)
	else:
	self.gamma = None
	self.beta = None

	@staticmethod
	@lru_cache(maxsize=128)
	def _get_reshape_dims(
	input_shape: Tuple[int, ...],
	num_features: int,
	norm_type: NormType
	) -> Tuple[Tuple[int, ...], Tuple[int, ...]]:
	"""Get reshaping dimensions for parameters"""
	ndim = len(input_shape)
	if norm_type == NormType.BATCH:
	param_shape = (1, num_features) + (1,) * (ndim - 2)
	reduction_axes = (0,) + tuple(range(2, ndim))
	elif norm_type == NormType.LAYER:
	param_shape = (1,) * (ndim - 1) + (num_features,)
	reduction_axes = tuple(range(ndim - 1))
	else: # GROUP, INSTANCE
	param_shape = (1, num_features) + (1,) * (ndim - 2)
	reduction_axes = (2,) + tuple(range(3, ndim))
	return param_shape, reduction_axes

	def _check_input(self, x: np.ndarray):
	"""Validate input tensor"""
	if x.ndim < 2:
	raise ValueError(f"Expected at least 2D input, got shape {x.shape}")

	if self.config.norm_type in [NormType.BATCH, NormType.GROUP]:
	if x.shape[1] != self.config.num_features:
	raise ValueError(
	f"Expected {self.config.num_features} features, got {x.shape[1]}"
	)

	def _compute_stats(
	self,
	x: np.ndarray,
	reduction_axes: Tuple[int, ...]
	) -> Tuple[np.ndarray, np.ndarray]:
	"""Compute mean and variance"""
	if self.driver and hasattr(self.driver, 'reduce_mean'):
	mean = self.driver.reduce_mean(x, axis=reduction_axes, keepdims=True)
	var = self.driver.reduce_var(x, axis=reduction_axes, keepdims=True)
	else:
	mean = np.mean(x, axis=reduction_axes, keepdims=True)
	var = np.var(x, axis=reduction_axes, keepdims=True)
	return mean, var


	def normalize(
	self,
	x: np.ndarray,
	training: bool = True
	) -> np.ndarray:
	"""
	Apply normalization to input tensor
	"""
	self._check_input(x)

	# Get cache key and check cache
	if self.config.use_cache and not training:
	cache_key = self.cache._compute_key(x, self.config.norm_type)
	cached = self.cache.get(cache_key)
	if cached is not None:
	return cached['output']

	# Get reshaping dimensions
	param_shape, reduction_axes = self._get_reshape_dims(
	x.shape,
	self.config.num_features,
	self.config.norm_type
	)

	# Special handling for group norm
	if self.config.norm_type == NormType.GROUP:
	groups = self.config.num_groups
	N, C = x.shape[:2]
	x = x.reshape(N, groups, C // groups, *x.shape[2:])
	reduction_axes = (2,) + tuple(range(3, x.ndim))

	# Compute statistics
	mean, var = self._compute_stats(x, reduction_axes)

	# Update running statistics during training
	if training and self.config.track_running_stats:
	self.cache.update_running_stats(
	str(id(self)),
	mean,
	var,
	self.config.momentum
	)

	# Normalize
	x_norm = (x - mean) / np.sqrt(var + self.config.eps)

	# Reshape back if group norm
	if self.config.norm_type == NormType.GROUP:
	x_norm = x_norm.reshape(N, C, *x.shape[3:])

	# Apply affine transform if needed
	if self.config.affine:
	gamma = self.gamma.reshape(param_shape)
	beta = self.beta.reshape(param_shape)
	out = gamma * x_norm + beta
	else:
	out = x_norm

	# Cache result if needed
	if self.config.use_cache and not training:
	self.cache.set(
	cache_key,
	{
	'output': out,
	'mean': mean,
	'var': var
	},
	{
	'shape': x.shape,
	'dtype': str(x.dtype),
	'norm_type': self.config.norm_type.value
	}
	)

	return out

	@classmethod
	def batch_norm(
	cls,
	x: np.ndarray,
	num_features: Optional[int] = None,
	**kwargs
	) -> np.ndarray:
	"""Convenience method for batch normalization"""
	config = NormConfig(
	norm_type=NormType.BATCH,
	num_features=num_features or x.shape[1],
	**kwargs
	)
	return cls(config).normalize(x)

	@classmethod
	def layer_norm(
	cls,
	x: np.ndarray,
	num_features: Optional[int] = None,
	**kwargs
	) -> np.ndarray:
	"""Convenience method for layer normalization"""
	config = NormConfig(
	norm_type=NormType.LAYER,
	num_features=num_features or x.shape[-1],
	**kwargs
	)
	return cls(config).normalize(x)

	@classmethod
	def group_norm(
	cls,
	x: np.ndarray,
	num_features: Optional[int] = None,
	num_groups: int = 32,
	**kwargs
	) -> np.ndarray:
	"""Convenience method for group normalization"""
	config = NormConfig(
	norm_type=NormType.GROUP,
	num_features=num_features or x.shape[1],
	num_groups=num_groups,
	**kwargs
	)
	return cls(config).normalize(x)

	@classmethod
	def instance_norm(
	cls,
	x: np.ndarray,
	num_features: Optional[int] = None,
	**kwargs
	) -> np.ndarray:
	"""Convenience method for instance normalization"""
	config = NormConfig(
	norm_type=NormType.INSTANCE,
	num_features=num_features or x.shape[1],
	**kwargs
	)
	return cls(config).normalize(x)

	@classmethod
	def rms_norm(
	cls,
	x: np.ndarray,
	num_features: Optional[int] = None,
	**kwargs
	) -> np.ndarray:
	"""Convenience method for RMS normalization"""
	config = NormConfig(
	norm_type=NormType.RMS,
	num_features=num_features or x.shape[-1],
	track_running_stats=False, # RMS norm doesn't use running stats
	**kwargs
	)
	return cls(config).normalize(x)