sunjuice commited on
Commit
89352af
·
1 Parent(s): 435cd2d

changed nn.linear to use 4-bit quant

Browse files
Files changed (1) hide show
  1. modeling_molmo2.py +2 -2
modeling_molmo2.py CHANGED
@@ -91,7 +91,7 @@ class ViTMLP(nn.Module):
91
  self.w1 = bnb.nn.Linear4bit(dim, hidden_dim, bias=True, quant_type="nf4", device=device)
92
 
93
  self.act = ACT2FN[hidden_act]
94
- self.w2 = bnb.nn.Linear4bit(dim, hidden_dim, bias=True, quant_type="nf4", device=device)
95
 
96
  def forward(self, x: torch.Tensor) -> torch.Tensor:
97
  return self.w2(self.act(self.w1(x)))
@@ -258,7 +258,7 @@ class Molmo2VisionBlock(nn.Module):
258
  num_heads=config.num_attention_heads,
259
  num_key_value_heads=config.num_key_value_heads,
260
  head_dim=config.head_dim,
261
- float32_attention=False,
262
  attention_dropout=config.attention_dropout,
263
  residual_dropout=config.residual_dropout,
264
  device=device,
 
91
  self.w1 = bnb.nn.Linear4bit(dim, hidden_dim, bias=True, quant_type="nf4", device=device)
92
 
93
  self.act = ACT2FN[hidden_act]
94
+ self.w2 = bnb.nn.Linear4bit(hidden_dim, dim, bias=True, quant_type="nf4", device=device)
95
 
96
  def forward(self, x: torch.Tensor) -> torch.Tensor:
97
  return self.w2(self.act(self.w1(x)))
 
258
  num_heads=config.num_attention_heads,
259
  num_key_value_heads=config.num_key_value_heads,
260
  head_dim=config.head_dim,
261
+ float32_attention=config.float32_attention,
262
  attention_dropout=config.attention_dropout,
263
  residual_dropout=config.residual_dropout,
264
  device=device,