Upload STLForCausalLM
Browse files- config.json +1 -1
- configuration_stldec.py +1 -1
- modeling_stldec.py +1 -1
config.json
CHANGED
|
@@ -28,7 +28,7 @@
|
|
| 28 |
"is_decoder": true,
|
| 29 |
"is_encoder_decoder": false,
|
| 30 |
"max_position_embeddings": 1024,
|
| 31 |
-
"model_type": "
|
| 32 |
"num_hidden_layers": 12,
|
| 33 |
"pad_token_id": 1,
|
| 34 |
"scale_embedding": false,
|
|
|
|
| 28 |
"is_decoder": true,
|
| 29 |
"is_encoder_decoder": false,
|
| 30 |
"max_position_embeddings": 1024,
|
| 31 |
+
"model_type": "stldec1024",
|
| 32 |
"num_hidden_layers": 12,
|
| 33 |
"pad_token_id": 1,
|
| 34 |
"scale_embedding": false,
|
configuration_stldec.py
CHANGED
|
@@ -2,7 +2,7 @@ from transformers.configuration_utils import PretrainedConfig
|
|
| 2 |
|
| 3 |
class STLConfig(PretrainedConfig):
|
| 4 |
|
| 5 |
-
model_type = "
|
| 6 |
keys_to_ignore_at_inference = ["past_key_values"]
|
| 7 |
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
|
| 8 |
|
|
|
|
| 2 |
|
| 3 |
class STLConfig(PretrainedConfig):
|
| 4 |
|
| 5 |
+
model_type = "stldec1024"
|
| 6 |
keys_to_ignore_at_inference = ["past_key_values"]
|
| 7 |
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
|
| 8 |
|
modeling_stldec.py
CHANGED
|
@@ -2139,7 +2139,7 @@ class STLForCausalLM(STLModel, GenerationMixin):
|
|
| 2139 |
loss = None
|
| 2140 |
if labels is not None:
|
| 2141 |
labels = labels.to(logits.device)
|
| 2142 |
-
loss_fct = CrossEntropyLoss()
|
| 2143 |
loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1))
|
| 2144 |
|
| 2145 |
if not return_dict:
|
|
|
|
| 2139 |
loss = None
|
| 2140 |
if labels is not None:
|
| 2141 |
labels = labels.to(logits.device)
|
| 2142 |
+
loss_fct = nn.CrossEntropyLoss()
|
| 2143 |
loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1))
|
| 2144 |
|
| 2145 |
if not return_dict:
|