Charlie81
/

ThinExperts

Model card Files Files and versions

Charlie81 commited on Jun 7, 2025

Commit

be9d959

·

1 Parent(s): 3325c29

debug

Files changed (1) hide show

modeling_myolmoe.py +1 -0

modeling_myolmoe.py CHANGED Viewed

@@ -320,6 +320,7 @@ class MyOLMoESparseMoeBlock(nn.Module):
     def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         print(f"DEBUG: MoE forward start - hidden_states shape: {hidden_states.shape}")
         batch_size, seq_len, _ = hidden_states.shape
         hidden_states = hidden_states.view(-1, self.hidden_size)
         # Get routing weights and selected experts

     def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         print(f"DEBUG: MoE forward start - hidden_states shape: {hidden_states.shape}")
         batch_size, seq_len, _ = hidden_states.shape
+        print("absolute precision")
         hidden_states = hidden_states.view(-1, self.hidden_size)
         # Get routing weights and selected experts