Add print statements
Browse files- modeling_cogvlm.py +6 -1
modeling_cogvlm.py
CHANGED
|
@@ -290,6 +290,11 @@ class VisionExpertAttention(nn.Module):
|
|
| 290 |
context_layer = attention_fn(
|
| 291 |
query_layer=query_states, key_layer=key_states, value_layer=value_states, attention_mask=attention_mask,
|
| 292 |
scaling_attention_score=True, attention_dropout=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
if context_layer.size() != (bsz, self.num_heads, q_len, self.head_dim):
|
| 294 |
raise ValueError(
|
| 295 |
f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
|
|
@@ -657,7 +662,7 @@ class CogVLMModel(CogVLMPreTrainedModel):
|
|
| 657 |
past_key_value=past_key_value,
|
| 658 |
output_attentions=output_attentions,
|
| 659 |
use_cache=use_cache,
|
| 660 |
-
print_values=idx==0,
|
| 661 |
)
|
| 662 |
hidden_states = layer_outputs[0]
|
| 663 |
|
|
|
|
| 290 |
context_layer = attention_fn(
|
| 291 |
query_layer=query_states, key_layer=key_states, value_layer=value_states, attention_mask=attention_mask,
|
| 292 |
scaling_attention_score=True, attention_dropout=None)
|
| 293 |
+
|
| 294 |
+
if print_values:
|
| 295 |
+
print("Shape of context_layer:", context_layer.shape)
|
| 296 |
+
print("First values of context_layer:", context_layer[0,0,:3,:3])
|
| 297 |
+
|
| 298 |
if context_layer.size() != (bsz, self.num_heads, q_len, self.head_dim):
|
| 299 |
raise ValueError(
|
| 300 |
f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
|
|
|
|
| 662 |
past_key_value=past_key_value,
|
| 663 |
output_attentions=output_attentions,
|
| 664 |
use_cache=use_cache,
|
| 665 |
+
print_values=idx==0 and step==1,
|
| 666 |
)
|
| 667 |
hidden_states = layer_outputs[0]
|
| 668 |
|