changed nn.linear to use 4-bit quant
Browse files- modeling_molmo2.py +2 -2
modeling_molmo2.py
CHANGED
|
@@ -91,7 +91,7 @@ class ViTMLP(nn.Module):
|
|
| 91 |
self.w1 = bnb.nn.Linear4bit(dim, hidden_dim, bias=True, quant_type="nf4", device=device)
|
| 92 |
|
| 93 |
self.act = ACT2FN[hidden_act]
|
| 94 |
-
self.w2 = bnb.nn.Linear4bit(
|
| 95 |
|
| 96 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 97 |
return self.w2(self.act(self.w1(x)))
|
|
@@ -258,7 +258,7 @@ class Molmo2VisionBlock(nn.Module):
|
|
| 258 |
num_heads=config.num_attention_heads,
|
| 259 |
num_key_value_heads=config.num_key_value_heads,
|
| 260 |
head_dim=config.head_dim,
|
| 261 |
-
float32_attention=
|
| 262 |
attention_dropout=config.attention_dropout,
|
| 263 |
residual_dropout=config.residual_dropout,
|
| 264 |
device=device,
|
|
|
|
| 91 |
self.w1 = bnb.nn.Linear4bit(dim, hidden_dim, bias=True, quant_type="nf4", device=device)
|
| 92 |
|
| 93 |
self.act = ACT2FN[hidden_act]
|
| 94 |
+
self.w2 = bnb.nn.Linear4bit(hidden_dim, dim, bias=True, quant_type="nf4", device=device)
|
| 95 |
|
| 96 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 97 |
return self.w2(self.act(self.w1(x)))
|
|
|
|
| 258 |
num_heads=config.num_attention_heads,
|
| 259 |
num_key_value_heads=config.num_key_value_heads,
|
| 260 |
head_dim=config.head_dim,
|
| 261 |
+
float32_attention=config.float32_attention,
|
| 262 |
attention_dropout=config.attention_dropout,
|
| 263 |
residual_dropout=config.residual_dropout,
|
| 264 |
device=device,
|