added output_hidden_states to allow hidden state output for the model

Files changed (1) hide show

modeling_caption_bert.py CHANGED Viewed

@@ -95,7 +95,8 @@ class CaptionBertModel(PreTrainedModel):
         self.post_init()
-    def forward(self, input_ids=None, attention_mask=None, **kwargs):
         B, L = input_ids.shape
         device = input_ids.device
@@ -110,7 +111,12 @@ class CaptionBertModel(PreTrainedModel):
         else:
             key_padding_mask = (input_ids == self.config.pad_token_id)
-        x = self.encoder(x, src_key_padding_mask=key_padding_mask)
         # Mean pool over non-padding tokens
         if attention_mask is not None:
@@ -123,10 +129,14 @@ class CaptionBertModel(PreTrainedModel):
         embedding = F.normalize(self.output_proj(pooled), dim=-1)
         # Return in HuggingFace-compatible format
-        return type('Output', (), {
             'last_hidden_state': embedding,
             'pooler_output': embedding,
-        })()
     def encode(self, texts, tokenizer=None, max_length=512, batch_size=128,
                device=None):

         self.post_init()
+    def forward(self, input_ids=None, attention_mask=None,
+                output_hidden_states=False, **kwargs):
         B, L = input_ids.shape
         device = input_ids.device
         else:
             key_padding_mask = (input_ids == self.config.pad_token_id)
+        # Layer-by-layer for hidden state capture
+        hidden_states = [x] if output_hidden_states else None
+        for layer in self.encoder.layers:
+            x = layer(x, src_key_padding_mask=key_padding_mask)
+            if output_hidden_states:
+                hidden_states.append(x)
         # Mean pool over non-padding tokens
         if attention_mask is not None:
         embedding = F.normalize(self.output_proj(pooled), dim=-1)
         # Return in HuggingFace-compatible format
+        result = {
             'last_hidden_state': embedding,
             'pooler_output': embedding,
+        }
+        if output_hidden_states:
+            result['hidden_states'] = tuple(hidden_states)
+        return type('Output', (), result)()
     def encode(self, texts, tokenizer=None, max_length=512, batch_size=128,
                device=None):