import torch import torch.nn as nn class LoRALinearLayer(nn.Module): def __init__(self, in_features, out_features, rank=4, network_alpha=None, device=None, dtype=None): super().__init__() self.down = nn.Linear(in_features, rank, bias=False, device=device, dtype=dtype) self.up = nn.Linear(rank, out_features, bias=False, device=device, dtype=dtype) self.network_alpha = network_alpha self.rank = rank nn.init.normal_(self.down.weight, std=1 / rank) nn.init.zeros_(self.up.weight) def forward(self, hidden_states): orig_dtype = hidden_states.dtype dtype = self.down.weight.dtype down_hidden_states = self.down(hidden_states.to(dtype)) up_hidden_states = self.up(down_hidden_states) if self.network_alpha is not None: up_hidden_states *= self.network_alpha / self.rank return up_hidden_states.to(orig_dtype)