Charlie81 commited on
Commit
be9d959
·
1 Parent(s): 3325c29
Files changed (1) hide show
  1. modeling_myolmoe.py +1 -0
modeling_myolmoe.py CHANGED
@@ -320,6 +320,7 @@ class MyOLMoESparseMoeBlock(nn.Module):
320
  def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
321
  print(f"DEBUG: MoE forward start - hidden_states shape: {hidden_states.shape}")
322
  batch_size, seq_len, _ = hidden_states.shape
 
323
  hidden_states = hidden_states.view(-1, self.hidden_size)
324
 
325
  # Get routing weights and selected experts
 
320
  def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
321
  print(f"DEBUG: MoE forward start - hidden_states shape: {hidden_states.shape}")
322
  batch_size, seq_len, _ = hidden_states.shape
323
+ print("absolute precision")
324
  hidden_states = hidden_states.view(-1, self.hidden_size)
325
 
326
  # Get routing weights and selected experts