Upload STLForCausalLM

Files changed (3) hide show

config.json CHANGED Viewed

@@ -28,7 +28,7 @@
   "is_decoder": true,
   "is_encoder_decoder": false,
   "max_position_embeddings": 1024,
-  "model_type": "stldec",
   "num_hidden_layers": 12,
   "pad_token_id": 1,
   "scale_embedding": false,

   "is_decoder": true,
   "is_encoder_decoder": false,
   "max_position_embeddings": 1024,
+  "model_type": "stldec1024",
   "num_hidden_layers": 12,
   "pad_token_id": 1,
   "scale_embedding": false,

configuration_stldec.py CHANGED Viewed

@@ -2,7 +2,7 @@ from transformers.configuration_utils import PretrainedConfig
 class STLConfig(PretrainedConfig):
-    model_type = "stldec"
     keys_to_ignore_at_inference = ["past_key_values"]
     attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}

 class STLConfig(PretrainedConfig):
+    model_type = "stldec1024"
     keys_to_ignore_at_inference = ["past_key_values"]
     attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}

modeling_stldec.py CHANGED Viewed

@@ -2139,7 +2139,7 @@ class STLForCausalLM(STLModel, GenerationMixin):
         loss = None
         if labels is not None:
             labels = labels.to(logits.device)
-            loss_fct = CrossEntropyLoss()
             loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1))
         if not return_dict:

         loss = None
         if labels is not None:
             labels = labels.to(logits.device)
+            loss_fct = nn.CrossEntropyLoss()
             loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1))
         if not return_dict: