bochen2079/katherine-k0 / logs /unsloth_compiled_cache /BlockDiagonalLinear_peft_forward.py
bochen2079's picture
download
raw
3.16 kB
"""
2026.5.1
2026.5.2
5.5.0
0.24.0
__UNSLOTH_VERSIONING__
"""
# Unsloth auto generated code
# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False, 'debug': False, 'dce': True, 'memory_planning': True, 'coordinate_descent_tuning': False, 'trace.graph_diagram': False, 'compile_threads': 32, 'group_fusion': True, 'disable_progress': True, 'verbose_progress': False, 'triton.multi_kernel': 0, 'triton.use_block_ptr': False, 'triton.enable_persistent_tma_matmul': True, 'triton.autotune_at_compile_time': False, 'triton.cooperative_reductions': False, 'cuda.compile_opt_level': '-O2', 'cuda.enable_cuda_lto': True, 'combo_kernels': False, 'benchmark_combo_kernel': True, 'combo_kernel_foreach_dynamic_shapes': True}
from torch import Tensor
import torch
import torch.nn as nn
from torch.nn import functional as F
from unsloth_zoo.temporary_patches.common import torch_compile
from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable
from peft.tuners.lora.variants import (torch)
torch_addmm = torch.addmm
torch_add = torch.add
# @torch.compile(fullgraph = False, dynamic = True, options = torch_compile_options)
def lora_forward(result, lora_A, lora_B, dropout, x, scaling):
# Use result.dtype (bfloat16 from base layer) since x may have been cast to float32
# by _cast_input_dtype when autocast is disabled
target_dtype = result.dtype
xA = dropout(x).to(target_dtype) @ lora_A.weight.to(target_dtype).t()
# output = result + scaling * xA @ lora_B.weight.t()
shape = result.shape
output = torch_addmm(
result.view(-1, shape[-1]),
xA.view(-1, xA.shape[-1]),
lora_B.weight.to(target_dtype).t(),
alpha = scaling,
beta = 1,
).view(shape)
bias = lora_B.bias
if bias is not None:
output = torch_add(
output,
bias.to(target_dtype),
alpha = scaling,
)
return output
pass
def unsloth_forward(self, x: torch.Tensor) -> torch.Tensor:
first_dims = x.shape[:-1]
if x.dim() != 2:
x = x.reshape(-1, x.shape[-1])
B = x.shape[0]
nb = self.nblocks
m = x.shape[-1] // nb
n = self.out_features // nb
x = x.reshape(B, nb, m)
w = self.weight.view(nb, n, m)
out = torch.einsum("bim,inm->bin", x, w)
return out.reshape(*first_dims, -1)

Xet Storage Details

Size:
3.16 kB
·
Xet hash:
8256f22c6531c18426b9d05961680406c5e6b35b5007a115eac8ce6b3c5672eb

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.