Spaces:
Runtime error
Runtime error
| """ | |
| LoRA (Low-Rank Adaptation) implementation for MLP layers. | |
| Replaces qkv projections in attention and the FFN MLP layers. | |
| """ | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| import math | |
| USE_LORA :bool = 1 # enable LoRA replacement for MLP layers (and Conv2d) | |
| if USE_LORA: | |
| LORA_dropout :float = 0.0 # LoRA dropout rate | |
| LORA_apply_to_conv :bool = 1 # also apply LoRA to Conv2d layers | |
| LORA_freeze_base :bool = False | |
| LORA_DEBUG :bool = 0 | |
| FORCE_SAME_RANK_ACROSS_TASKS :bool = 0 | |
| DONT_lora_if_dim_lt :int = 90 # 0: disable. increase for low-dim layers (e.g., in/out conv dim < 32) | |
| DONT_lora_if_rankFrac_gt :float = 0.3 | |
| class LoRALinear(nn.Module): | |
| """ | |
| LoRA layer that wraps a frozen Linear layer with low-rank adaptation. | |
| Args: | |
| original_linear: original nn.Linear layer that will be frozen | |
| rank: LoRA rank (r) | |
| dropout: dropout probability | |
| """ | |
| def __init__( | |
| self, | |
| original_linear: nn.Linear, | |
| rank: int = 4, | |
| dropout: float = 0.0, | |
| freeze_base: bool = True, | |
| ): | |
| super().__init__() | |
| self.in_features = original_linear.in_features | |
| self.out_features = original_linear.out_features | |
| self.rank = rank | |
| self.scaling = 2.0 | |
| # Freeze the original weights | |
| self.original_linear = original_linear | |
| if freeze_base: | |
| for param in self.original_linear.parameters(): | |
| param.requires_grad = False | |
| # LoRA low-rank decomposition: W = W_0 + B @ A, where B: out_features x rank, A: rank x in_features | |
| self.lora_A = nn.Parameter(torch.zeros(rank, self.in_features)) | |
| self.lora_B = nn.Parameter(torch.zeros(self.out_features, rank)) | |
| # Initialization | |
| nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5)) | |
| nn.init.zeros_(self.lora_B) # initialize B to 0 so LoRA has no initial effect | |
| # Dropout | |
| self.dropout = nn.Dropout(p=dropout) if dropout > 0 else nn.Identity() | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| # Output from the frozen original linear layer | |
| result = self.original_linear(x) | |
| # LoRA low-rank update: x @ A^T @ B^T | |
| # x: (..., in_features) | |
| # lora_A: (rank, in_features) -> A^T: (in_features, rank) | |
| # lora_B: (out_features, rank) -> B^T: (rank, out_features) | |
| lora_out = self.dropout(x) @ self.lora_A.T @ self.lora_B.T | |
| return result + lora_out * self.scaling | |
| def __repr__(self): | |
| return f"LoRALinear(in_features={self.in_features}, out_features={self.out_features}, rank={self.rank}, scaling={self.scaling})" | |
| def replace_linear_with_lora( | |
| module: nn.Module, | |
| rank: int = 4, | |
| dropout: float = 0.0, | |
| target_modules: list = None, | |
| verbose: bool = True, | |
| ): | |
| """ | |
| Recursively replace nn.Linear layers within a module with LoRALinear wrappers. | |
| Args: | |
| module: module whose linear layers should be replaced | |
| rank: LoRA rank | |
| dropout: dropout probability | |
| target_modules: specific module names to replace; None means all linears | |
| e.g.: ['to_q', 'to_k', 'to_v', 'to_out'] for attention | |
| ['net.0', 'net.2'] for FeedForward | |
| verbose: whether to log replacements | |
| Returns: | |
| the module with replacements applied | |
| """ | |
| replaced_count = 0 | |
| for name, child in module.named_children(): | |
| # Skip modules not in the target list (if filtering is enabled) | |
| if target_modules is not None and name not in target_modules: | |
| # Continue recursing into child modules | |
| replace_linear_with_lora(child, rank, dropout, target_modules, verbose) | |
| continue | |
| if isinstance(child, nn.Linear): | |
| # Replace with LoRALinear | |
| lora_layer = LoRALinear(child, rank=rank, dropout=dropout, freeze_base=LORA_freeze_base) | |
| setattr(module, name, lora_layer) | |
| replaced_count += 1 | |
| if verbose: | |
| print(f"[LoRA] Replaced {name}: {child.in_features} -> {child.out_features} with rank={rank}") | |
| elif isinstance(child, nn.Sequential): | |
| # Handle Sequential containers (e.g., FeedForward nets) | |
| new_sequential = nn.Sequential() | |
| for idx, submodule in enumerate(child): | |
| if isinstance(submodule, nn.Linear): | |
| lora_layer = LoRALinear(submodule, rank=rank, dropout=dropout, freeze_base=LORA_freeze_base) | |
| new_sequential.add_module(str(idx), lora_layer) | |
| replaced_count += 1 | |
| if verbose: | |
| print(f"[LoRA] Replaced {name}.{idx}: {submodule.in_features} -> {submodule.out_features} with rank={rank}") | |
| else: | |
| new_sequential.add_module(str(idx), submodule) | |
| setattr(module, name, new_sequential) | |
| else: | |
| # Recurse into the remaining submodules | |
| replace_linear_with_lora(child, rank, dropout, target_modules, verbose) | |
| return module | |
| def count_lora_parameters(module: nn.Module): | |
| """ | |
| Count LoRA parameters within a module. | |
| Returns: | |
| dict: {'trainable': trainable params, 'frozen': frozen params, 'total': total params} | |
| """ | |
| trainable_params = 0 | |
| frozen_params = 0 | |
| for name, param in module.named_parameters(): | |
| num_params = param.numel() | |
| if param.requires_grad: | |
| trainable_params += num_params | |
| else: | |
| frozen_params += num_params | |
| total_params = trainable_params + frozen_params | |
| return { | |
| 'trainable': trainable_params, | |
| 'frozen': frozen_params, | |
| 'total': total_params, | |
| 'trainable_ratio': trainable_params / total_params if total_params > 0 else 0, | |
| } | |
| def print_lora_parameters(module: nn.Module, name: str = "Model"): | |
| """Print LoRA parameter statistics.""" | |
| stats = count_lora_parameters(module) | |
| print(f"\n{'='*60}") | |
| print(f"{name} Parameter Statistics:") | |
| print(f"{'='*60}") | |
| print(f"Trainable params: {stats['trainable']:,} ({stats['trainable_ratio']*100:.2f}%)") | |
| print(f"Frozen params: {stats['frozen']:,} ({(1-stats['trainable_ratio'])*100:.2f}%)") | |
| print(f"Total params: {stats['total']:,}") | |
| print(f"{'='*60}\n") | |
| class LoRAConv2d(nn.Module): | |
| """ | |
| LoRA layer for Conv2d. | |
| Treat Conv2d as a matrix multiplication: | |
| - flatten kernel: (out_channels, in_channels, k, k) -> (out_channels, in_channels*k*k) | |
| - apply low-rank decomposition: W = W_0 + B @ A | |
| Args: | |
| original_conv: original nn.Conv2d layer that will be frozen | |
| rank: LoRA rank (r) | |
| dropout: dropout probability | |
| """ | |
| def __init__( | |
| self, | |
| original_conv: nn.Conv2d, | |
| rank: int = 4, | |
| dropout: float = 0.0, | |
| freeze_base: bool = True, | |
| ): | |
| super().__init__() | |
| self.out_channels = original_conv.out_channels | |
| self.in_channels = original_conv.in_channels | |
| self.kernel_size = original_conv.kernel_size | |
| self.stride = original_conv.stride | |
| self.padding = original_conv.padding | |
| self.dilation = original_conv.dilation | |
| self.groups = original_conv.groups | |
| self.rank = rank | |
| self.scaling = 2.0 | |
| # Freeze the original weights | |
| self.original_conv = original_conv | |
| if freeze_base: | |
| for param in self.original_conv.parameters(): | |
| param.requires_grad = False | |
| # LoRA low-rank decomposition | |
| # lora_A: (rank, in_channels, kernel_size, kernel_size) | |
| # lora_B: (out_channels, rank, 1, 1) - via 1x1 convolution | |
| self.lora_A = nn.Parameter(torch.zeros( | |
| rank, | |
| self.in_channels // self.groups, | |
| self.kernel_size[0], | |
| self.kernel_size[1] | |
| )) | |
| self.lora_B = nn.Parameter(torch.zeros(self.out_channels, rank, 1, 1)) | |
| # Initialization | |
| nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5)) | |
| nn.init.zeros_(self.lora_B) # initialize B to 0 | |
| # Dropout | |
| self.dropout = nn.Dropout(p=dropout) if dropout > 0 else nn.Identity() | |
| print(f"param orig:lora (M) = {self.original_conv.weight.numel()/1024/1024}:{self.lora_A.numel()+self.lora_B.numel()/1024/1024}") | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| # Output from the frozen original convolution | |
| # Use no_grad to avoid computing gradients for the base weights | |
| result = self.original_conv(x) | |
| # LoRA low-rank update | |
| # first apply lora_A (down projection) then lora_B (up projection) | |
| x_dropped = self.dropout(x) | |
| lora_out = F.conv2d( | |
| x_dropped, | |
| self.lora_A, | |
| stride=self.stride, | |
| padding=self.padding, | |
| dilation=self.dilation, | |
| groups=self.groups | |
| ) | |
| lora_out = F.conv2d(lora_out, self.lora_B) | |
| return result + lora_out * self.scaling | |
| def __repr__(self): | |
| return (f"LoRAConv2d(in_channels={self.in_channels}, out_channels={self.out_channels}, " | |
| f"kernel_size={self.kernel_size}, rank={self.rank}, scaling={self.scaling})") | |
| def _auto_lora_rank(in_features: int, out_features: int) -> int: | |
| m = min(in_features, out_features) | |
| r = max(LORA_rank_min, int(round(m / max(1.0, LORA_rank_ratio)))) | |
| if (LORA_rank_max is not None) and (r > LORA_rank_max): | |
| r = LORA_rank_max | |
| return max(1, r) | |
| def _svd_low_rank(M: torch.Tensor, rank: int): | |
| # M: [out, in] | |
| orig_device = M.device | |
| orig_dtype = M.dtype | |
| if 1: | |
| M = M.to(device=torch.device('cuda'), dtype=torch.float32) | |
| U, S, Vh = torch.linalg.svd(M, full_matrices=False) | |
| r = min(rank, U.shape[1], Vh.shape[0]) | |
| U_r = U[:, :r] | |
| S_r = S[:r] | |
| Vh_r = Vh[:r, :] | |
| S_root = torch.sqrt(torch.clamp(S_r, min=0)) | |
| B = U_r @ torch.diag(S_root) # [out, r] | |
| A = torch.diag(S_root) @ Vh_r # [r, in] | |
| B = B.to(device=orig_device, dtype=orig_dtype) | |
| A = A.to(device=orig_device, dtype=orig_dtype) | |
| S = S.to(device=orig_device, dtype=orig_dtype) | |
| return B, A, S | |
| def _svdvals_squared(M: torch.Tensor) -> torch.Tensor: | |
| # Return squared singular values (energy), sorted in descending order; M: [out, in] | |
| orig_device = M.device | |
| orig_dtype = M.dtype | |
| if 1: | |
| M = M.to(device=torch.device('cuda'), dtype=torch.float32) | |
| S = torch.linalg.svdvals(M) | |
| S2 = (S.float() ** 2) | |
| return S2.to(device=orig_device, dtype=torch.float32) | |
| def _compute_adaptive_rank_from_S2_list( | |
| list_S2: list, | |
| avg_threshold: float = None, | |
| min_threshold: float = None, | |
| max_rank: int = None, | |
| ) -> int: | |
| # list_S2: squared singular value vectors (descending) for each matrix | |
| assert len(list_S2) > 0 | |
| if avg_threshold is None: | |
| avg_threshold = ADAPTIVE_RANK_AVG_ENERGY_THRESH | |
| if min_threshold is None: | |
| min_threshold = ADAPTIVE_RANK_MIN_ENERGY_THRESH | |
| totals = [] | |
| lengths = [] | |
| for s2 in list_S2: | |
| assert s2.numel() > 0 | |
| total = s2.sum() | |
| # Quick fail: zero ΔW has zero energy, so thresholds can't be evaluated | |
| assert float(total.item()) > 0.0, "Zero energy in weight_diff; cannot determine adaptive rank" | |
| totals.append(total) | |
| lengths.append(int(s2.shape[0])) | |
| R_cap = min(lengths) | |
| if LORA_rank_max is not None: | |
| R_cap = min(R_cap, int(LORA_rank_max)) | |
| if max_rank is not None: | |
| R_cap = min(R_cap, int(max_rank)) | |
| R_cap = max(1, R_cap) | |
| # Iterate ranks r to see if both average and minimum energy ratios meet thresholds | |
| for r in range(1, R_cap + 1): | |
| ratios = [] | |
| for s2, total in zip(list_S2, totals): | |
| captured = s2[:r].sum() | |
| ratios.append(float((captured / total).item())) | |
| avg_ratio = sum(ratios) / len(ratios) | |
| min_ratio = min(ratios) | |
| if (avg_ratio >= avg_threshold) and (min_ratio >= min_threshold): | |
| ret = min(int(R_cap), max(int(LORA_rank_min), int(r))) | |
| return ret | |
| # If no rank satisfies both thresholds, fail fast instead of silently degrading | |
| raise AssertionError(f"No rank satisfies avg>={avg_threshold} and min>={min_threshold} up to R_cap={R_cap}") | |
| def _compute_per_task_ranks_from_S2_list( | |
| list_S2: list, | |
| min_threshold: float = None, | |
| max_rank: int = None, | |
| ) -> list: | |
| # Compute rank per matrix so its energy ratio >= min_threshold (uses min threshold only) | |
| assert len(list_S2) > 0 | |
| if min_threshold is None: | |
| min_threshold = ADAPTIVE_RANK_MIN_ENERGY_THRESH | |
| ret = [] | |
| for i, s2 in enumerate(list_S2): | |
| assert s2.numel() > 0 | |
| total = s2.sum() | |
| assert float(total.item()) > 0.0, "Zero energy in weight_diff; cannot determine adaptive rank" | |
| R_cap = int(s2.shape[0]) | |
| if LORA_rank_max is not None: | |
| R_cap = min(R_cap, int(LORA_rank_max)) | |
| if max_rank is not None: | |
| R_cap = min(R_cap, int(max_rank)) | |
| R_cap = max(1, R_cap) | |
| found = R_cap | |
| # Task-level threshold: when ranks are allowed to differ, use TASK_2_adaptive_rank_min_energy_thresh | |
| thres_this = TASK_2_adaptive_rank_min_energy_thresh[i] if (not FORCE_SAME_RANK_ACROSS_TASKS) else min_threshold | |
| for r in range(1, R_cap + 1): | |
| ratio = s2[:r].sum() / total | |
| if float(ratio.item()) >= float(thres_this): | |
| found = r | |
| break | |
| ret.append(int(max(int(LORA_rank_min), int(found)))) | |
| return ret | |
| def compute_adaptive_rank_for_linear_diffs( | |
| weight_diffs: list, | |
| avg_threshold: float = None, | |
| min_threshold: float = None, | |
| max_rank: int = None, | |
| per_task: bool = None, | |
| ): | |
| # weight_diffs: List[Tensor [out, in]] | |
| assert isinstance(weight_diffs, (list, tuple)) and len(weight_diffs) > 0 | |
| if per_task is None: | |
| per_task = not FORCE_SAME_RANK_ACROSS_TASKS | |
| list_S2 = [_svdvals_squared(M) for M in weight_diffs] | |
| out0, in0 = weight_diffs[0].shape | |
| if per_task: | |
| ranks = _compute_per_task_ranks_from_S2_list(list_S2, min_threshold, max_rank) | |
| print(f"[AdaptiveRank-Linear per-task] in={in0} out={out0} ranks={ranks}") | |
| return ranks | |
| else: | |
| ret = _compute_adaptive_rank_from_S2_list(list_S2, None, min_threshold, max_rank) | |
| print(f"[AdaptiveRank-Linear] in={in0} out={out0} rank={ret}") | |
| return ret | |
| def compute_adaptive_rank_for_conv_diffs( | |
| weight_diffs: list, | |
| avg_threshold: float = None, | |
| min_threshold: float = None, | |
| max_rank: int = None, | |
| per_task: bool = None, | |
| ): | |
| # weight_diffs: List[Tensor [out, in, kH, kW]] -> reshape to [out, in*k*k] | |
| assert isinstance(weight_diffs, (list, tuple)) and len(weight_diffs) > 0 | |
| if per_task is None: | |
| per_task = not FORCE_SAME_RANK_ACROSS_TASKS | |
| list_S2 = [] | |
| for W in weight_diffs: | |
| out_c, in_c, kH, kW = W.shape | |
| M = W.reshape(out_c, in_c * kH * kW) | |
| list_S2.append(_svdvals_squared(M)) | |
| out0, in0, kH0, kW0 = weight_diffs[0].shape | |
| if per_task: | |
| ranks = _compute_per_task_ranks_from_S2_list(list_S2, min_threshold, max_rank) | |
| print(f"[AdaptiveRank-Conv per-task] in_ch={in0} out_ch={out0} kernel=({kH0},{kW0}) ranks={ranks}") | |
| return ranks | |
| else: | |
| ret = _compute_adaptive_rank_from_S2_list(list_S2, None, min_threshold, max_rank) | |
| print(f"[AdaptiveRank-Conv] in_ch={in0} out_ch={out0} kernel=({kH0},{kW0}) rank={ret}") | |
| return ret | |
| class LoRAAdapterLinearOnly(nn.Module): | |
| """ | |
| Incremental LoRA (no base Linear) that returns x @ A^T @ B^T + bias_delta. | |
| """ | |
| def __init__(self, in_features: int, out_features: int, rank: int = None, dropout: float = 0.0, scaling: float = 1.0, use_bias_delta: bool = True): | |
| super().__init__() | |
| if rank is None: | |
| rank = _auto_lora_rank(in_features, out_features) | |
| self.in_features = in_features | |
| self.out_features = out_features | |
| self.rank = rank | |
| self.scaling = scaling | |
| self.dropout = nn.Dropout(p=dropout) if dropout > 0 else nn.Identity() | |
| self.lora_A = nn.Parameter(torch.zeros(rank, in_features)) | |
| self.lora_B = nn.Parameter(torch.zeros(out_features, rank)) | |
| self.use_bias_delta = use_bias_delta | |
| if use_bias_delta: | |
| self.lora_bias = nn.Parameter(torch.zeros(out_features)) | |
| else: | |
| self.register_parameter('lora_bias', None) | |
| # init | |
| nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5)) | |
| nn.init.zeros_(self.lora_B) | |
| def init_from_diff(self, weight_diff: torch.Tensor, bias_diff: torch.Tensor = None): | |
| # weight_diff: [out, in] | |
| B, A, S = _svd_low_rank(weight_diff.float(), self.rank) | |
| self.lora_A.copy_(A.to(self.lora_A.dtype).to(self.lora_A.device)) | |
| self.lora_B.copy_(B.to(self.lora_B.dtype).to(self.lora_B.device)) | |
| if self.use_bias_delta and (bias_diff is not None): | |
| self.lora_bias.copy_(bias_diff) | |
| if LORA_DEBUG: | |
| energy_total = (S.float() ** 2).sum().item() | |
| energy_top = (S[: self.rank].float() ** 2).sum().item() | |
| energy_ratio = energy_top / max(1e-12, energy_total) | |
| approx = (B @ A).to(weight_diff.device).to(weight_diff.dtype) | |
| err = torch.linalg.norm((approx - weight_diff).float()).item() | |
| base = torch.linalg.norm(weight_diff.float()).item() | |
| rel_err = err / max(1e-12, base) | |
| bias_norm = 0.0 if (bias_diff is None) else float(torch.linalg.norm(bias_diff.float()).item()) | |
| print(f"[LoRA-Linear init] shape={tuple(weight_diff.shape)} rank={self.rank} energy={energy_ratio:.4f} rel_err={rel_err:.6f} bias_norm={bias_norm:.6f}") | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| update = self.dropout(x) @ self.lora_A.T @ self.lora_B.T | |
| if self.lora_bias is not None: | |
| update = update + self.lora_bias | |
| return update * self.scaling | |
| class LoRAAdapterConv2dOnly(nn.Module): | |
| """ | |
| Incremental LoRA for Conv2d: convolve with A then 1x1 B, return the delta. | |
| """ | |
| def __init__(self, in_channels: int, out_channels: int, kernel_size: tuple, stride: tuple, padding: tuple, dilation: tuple, groups: int = 1, rank: int = None, dropout: float = 0.0, scaling: float = 1.0, use_bias_delta: bool = True): | |
| super().__init__() | |
| if isinstance(kernel_size, int): | |
| kernel_size = (kernel_size, kernel_size) | |
| if isinstance(stride, int): | |
| stride = (stride, stride) | |
| if isinstance(padding, int): | |
| padding = (padding, padding) | |
| if isinstance(dilation, int): | |
| dilation = (dilation, dilation) | |
| kH, kW = kernel_size | |
| if rank is None: | |
| # Estimate rank from the flattened in/out dimensions | |
| rank = _auto_lora_rank(in_channels * kH * kW, out_channels) | |
| self.in_channels = in_channels | |
| self.out_channels = out_channels | |
| self.kernel_size = kernel_size | |
| self.stride = stride | |
| self.padding = padding | |
| self.dilation = dilation | |
| self.groups = groups | |
| self.rank = rank | |
| self.scaling = scaling | |
| self.dropout = nn.Dropout(p=dropout) if dropout > 0 else nn.Identity() | |
| # A: [rank, in/groups, kH, kW] | |
| self.lora_A = nn.Parameter(torch.zeros(rank, in_channels // groups, kH, kW)) | |
| # B: [out, rank, 1, 1] | |
| self.lora_B = nn.Parameter(torch.zeros(out_channels, rank, 1, 1)) | |
| self.use_bias_delta = use_bias_delta | |
| if use_bias_delta: | |
| self.lora_bias = nn.Parameter(torch.zeros(out_channels)) | |
| else: | |
| self.register_parameter('lora_bias', None) | |
| # init | |
| nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5)) | |
| nn.init.zeros_(self.lora_B) | |
| def init_from_diff(self, weight_diff: torch.Tensor, bias_diff: torch.Tensor = None): | |
| # weight_diff: [out, in, kH, kW] | |
| out_c, in_c, kH, kW = weight_diff.shape | |
| M = weight_diff.reshape(out_c, in_c * kH * kW) | |
| B, A, S = _svd_low_rank(M.float(), self.rank) # B:[out,r], A:[r,in*k*k] | |
| A_reshaped = A.view(self.rank, in_c, kH, kW) | |
| self.lora_A.copy_(A_reshaped) | |
| self.lora_B.copy_(B.view(out_c, self.rank, 1, 1)) | |
| if self.lora_bias is not None and (bias_diff is not None): | |
| self.lora_bias.copy_(bias_diff) | |
| if LORA_DEBUG: | |
| energy_total = (S.float() ** 2).sum().item() | |
| energy_top = (S[: self.rank].float() ** 2).sum().item() | |
| energy_ratio = energy_top / max(1e-12, energy_total) | |
| approx = (B @ A).to(M.device).to(M.dtype) | |
| err = torch.linalg.norm((approx - M).float()).item() | |
| base = torch.linalg.norm(M.float()).item() | |
| rel_err = err / max(1e-12, base) | |
| bias_norm = 0.0 if (bias_diff is None) else float(torch.linalg.norm(bias_diff.float()).item()) | |
| print(f"[LoRA-Conv init] out_in_k=({out_c},{in_c},{kH}x{kW}) rank={self.rank} energy={energy_ratio:.4f} rel_err={rel_err:.6f} bias_norm={bias_norm:.6f}") | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| x_d = self.dropout(x) | |
| u = F.conv2d(x_d, self.lora_A, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) | |
| u = F.conv2d(u, self.lora_B) | |
| if self.lora_bias is not None: | |
| u = u + self.lora_bias.view(1, -1, 1, 1) | |
| return u * self.scaling | |