Add print statements
Browse files- modeling_cogvlm.py +5 -9
modeling_cogvlm.py
CHANGED
|
@@ -296,8 +296,8 @@ class CogVLMDecoderLayer(nn.Module):
|
|
| 296 |
|
| 297 |
hidden_states = self.input_layernorm(hidden_states)
|
| 298 |
|
| 299 |
-
|
| 300 |
-
|
| 301 |
|
| 302 |
# Self Attention
|
| 303 |
hidden_states, self_attn_weights, present_key_value = self.self_attn(
|
|
@@ -310,8 +310,8 @@ class CogVLMDecoderLayer(nn.Module):
|
|
| 310 |
use_cache=use_cache,
|
| 311 |
)
|
| 312 |
|
| 313 |
-
|
| 314 |
-
|
| 315 |
|
| 316 |
hidden_states = residual + hidden_states
|
| 317 |
|
|
@@ -600,10 +600,6 @@ class CogVLMModel(CogVLMPreTrainedModel):
|
|
| 600 |
if output_hidden_states:
|
| 601 |
all_hidden_states += (hidden_states,)
|
| 602 |
|
| 603 |
-
# if idx in [0, 1, 2]:
|
| 604 |
-
# print(f"Hidden states before layer {idx}", hidden_states[0,:3,:3])
|
| 605 |
-
# print(f"Mean of hidden states before layer {idx}", hidden_states.mean())
|
| 606 |
-
|
| 607 |
past_key_value = past_key_values[idx] if past_key_values is not None else None
|
| 608 |
layer_outputs = decoder_layer(
|
| 609 |
hidden_states,
|
|
@@ -613,7 +609,7 @@ class CogVLMModel(CogVLMPreTrainedModel):
|
|
| 613 |
past_key_value=past_key_value,
|
| 614 |
output_attentions=output_attentions,
|
| 615 |
use_cache=use_cache,
|
| 616 |
-
print_values=idx
|
| 617 |
)
|
| 618 |
hidden_states = layer_outputs[0]
|
| 619 |
|
|
|
|
| 296 |
|
| 297 |
hidden_states = self.input_layernorm(hidden_states)
|
| 298 |
|
| 299 |
+
if print_values:
|
| 300 |
+
print("Hidden states before self attention:", hidden_states[0,:3,:3])
|
| 301 |
|
| 302 |
# Self Attention
|
| 303 |
hidden_states, self_attn_weights, present_key_value = self.self_attn(
|
|
|
|
| 310 |
use_cache=use_cache,
|
| 311 |
)
|
| 312 |
|
| 313 |
+
if print_values:
|
| 314 |
+
print("Hidden states after self attention:", hidden_states[0,:3,:3])
|
| 315 |
|
| 316 |
hidden_states = residual + hidden_states
|
| 317 |
|
|
|
|
| 600 |
if output_hidden_states:
|
| 601 |
all_hidden_states += (hidden_states,)
|
| 602 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
past_key_value = past_key_values[idx] if past_key_values is not None else None
|
| 604 |
layer_outputs = decoder_layer(
|
| 605 |
hidden_states,
|
|
|
|
| 609 |
past_key_value=past_key_value,
|
| 610 |
output_attentions=output_attentions,
|
| 611 |
use_cache=use_cache,
|
| 612 |
+
print_values=idx==0,
|
| 613 |
)
|
| 614 |
hidden_states = layer_outputs[0]
|
| 615 |
|