ltg
/

norbert4-xlarge

@@ -819,13 +819,6 @@ class GptBertForCausalLM(GptBertModel):
             subword_prediction_flatten = subword_prediction[:, :-1].flatten(0, 1)
             causal_lm_loss = F.cross_entropy(subword_prediction_flatten, labels_flatten)
-        if not return_dict:
-            output = (
-                subword_prediction,
-                *([contextualized_embeddings] if output_hidden_states else [])
-            )
-            return ((causal_lm_loss,) + output) if causal_lm_loss is not None else output
         return CausalLMOutput(
             loss=causal_lm_loss,
             logits=subword_prediction,
@@ -932,13 +925,6 @@ class GptBertForSequenceClassification(GptBertModel):
                 loss_fct = nn.BCEWithLogitsLoss()
                 loss = loss_fct(logits, labels)
-        if not return_dict:
-            output = (
-                logits,
-                *([contextualized_embeddings] if output_hidden_states else [])
-            )
-            return ((loss,) + output) if loss is not None else output
         return SequenceClassifierOutput(
             loss=loss,
             logits=logits,
@@ -976,14 +962,6 @@ class GptBertForTokenClassification(GptBertModel):
             loss_fct = nn.CrossEntropyLoss()
             loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
-        if not return_dict:
-            output = (
-                logits,
-                *([contextualized_embeddings] if output_hidden_states else []),
-                *([attention_probs] if output_attentions else [])
-            )
-            return ((loss,) + output) if loss is not None else output
         return TokenClassifierOutput(
             loss=loss,
             logits=logits,
@@ -1040,14 +1018,6 @@ class GptBertForQuestionAnswering(GptBertModel):
             end_loss = loss_fct(end_logits, end_positions)
             total_loss = (start_loss + end_loss) / 2
-        if not return_dict:
-            output = (
-                start_logits,
-                end_logits,
-                *([contextualized_embeddings] if output_hidden_states else [])
-            )
-            return ((total_loss,) + output) if total_loss is not None else output
         return QuestionAnsweringModelOutput(
             loss=total_loss,
             start_logits=start_logits,
@@ -1091,13 +1061,6 @@ class GptBertForMultipleChoice(GptBertModel):
             loss_fct = nn.CrossEntropyLoss()
             loss = loss_fct(reshaped_logits, labels)
-        if not return_dict:
-            output = (
-                reshaped_logits,
-                *([contextualized_embeddings] if output_hidden_states else [])
-            )
-            return ((loss,) + output) if loss is not None else output
         return MultipleChoiceModelOutput(
             loss=loss,
             logits=reshaped_logits,

             subword_prediction_flatten = subword_prediction[:, :-1].flatten(0, 1)
             causal_lm_loss = F.cross_entropy(subword_prediction_flatten, labels_flatten)
         return CausalLMOutput(
             loss=causal_lm_loss,
             logits=subword_prediction,
                 loss_fct = nn.BCEWithLogitsLoss()
                 loss = loss_fct(logits, labels)
         return SequenceClassifierOutput(
             loss=loss,
             logits=logits,
             loss_fct = nn.CrossEntropyLoss()
             loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
         return TokenClassifierOutput(
             loss=loss,
             logits=logits,
             end_loss = loss_fct(end_logits, end_positions)
             total_loss = (start_loss + end_loss) / 2
         return QuestionAnsweringModelOutput(
             loss=total_loss,
             start_logits=start_logits,
             loss_fct = nn.CrossEntropyLoss()
             loss = loss_fct(reshaped_logits, labels)
         return MultipleChoiceModelOutput(
             loss=loss,
             logits=reshaped_logits,