saracandu commited on
Commit
8241eca
·
verified ·
1 Parent(s): 93c9fd9

Upload STLForCausalLM

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. configuration_stldec.py +1 -1
  3. modeling_stldec.py +1 -1
config.json CHANGED
@@ -28,7 +28,7 @@
28
  "is_decoder": true,
29
  "is_encoder_decoder": false,
30
  "max_position_embeddings": 1024,
31
- "model_type": "stldec",
32
  "num_hidden_layers": 12,
33
  "pad_token_id": 1,
34
  "scale_embedding": false,
 
28
  "is_decoder": true,
29
  "is_encoder_decoder": false,
30
  "max_position_embeddings": 1024,
31
+ "model_type": "stldec1024",
32
  "num_hidden_layers": 12,
33
  "pad_token_id": 1,
34
  "scale_embedding": false,
configuration_stldec.py CHANGED
@@ -2,7 +2,7 @@ from transformers.configuration_utils import PretrainedConfig
2
 
3
  class STLConfig(PretrainedConfig):
4
 
5
- model_type = "stldec"
6
  keys_to_ignore_at_inference = ["past_key_values"]
7
  attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
8
 
 
2
 
3
  class STLConfig(PretrainedConfig):
4
 
5
+ model_type = "stldec1024"
6
  keys_to_ignore_at_inference = ["past_key_values"]
7
  attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
8
 
modeling_stldec.py CHANGED
@@ -2139,7 +2139,7 @@ class STLForCausalLM(STLModel, GenerationMixin):
2139
  loss = None
2140
  if labels is not None:
2141
  labels = labels.to(logits.device)
2142
- loss_fct = CrossEntropyLoss()
2143
  loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1))
2144
 
2145
  if not return_dict:
 
2139
  loss = None
2140
  if labels is not None:
2141
  labels = labels.to(logits.device)
2142
+ loss_fct = nn.CrossEntropyLoss()
2143
  loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1))
2144
 
2145
  if not return_dict: