debug
Browse files- modeling_myolmoe.py +1 -0
modeling_myolmoe.py
CHANGED
|
@@ -320,6 +320,7 @@ class MyOLMoESparseMoeBlock(nn.Module):
|
|
| 320 |
def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
| 321 |
print(f"DEBUG: MoE forward start - hidden_states shape: {hidden_states.shape}")
|
| 322 |
batch_size, seq_len, _ = hidden_states.shape
|
|
|
|
| 323 |
hidden_states = hidden_states.view(-1, self.hidden_size)
|
| 324 |
|
| 325 |
# Get routing weights and selected experts
|
|
|
|
| 320 |
def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
| 321 |
print(f"DEBUG: MoE forward start - hidden_states shape: {hidden_states.shape}")
|
| 322 |
batch_size, seq_len, _ = hidden_states.shape
|
| 323 |
+
print("absolute precision")
|
| 324 |
hidden_states = hidden_states.view(-1, self.hidden_size)
|
| 325 |
|
| 326 |
# Get routing weights and selected experts
|