saracandu commited on
Commit
633c7a1
·
verified ·
1 Parent(s): 3e4aa3c

Upload STLForCausalLM

Browse files
config.json CHANGED
@@ -12,14 +12,14 @@
12
  "bos_token_id": 2,
13
  "d_model": 32,
14
  "decoder_attention_heads": 16,
15
- "decoder_ffn_dim": 512,
16
  "decoder_layerdrop": 0.0,
17
  "decoder_layers": 12,
18
  "decoder_start_token_id": 3,
19
  "decoder_vocab_size": 35,
20
  "dropout": 0.1,
21
  "encoder_attention_heads": 16,
22
- "encoder_ffn_dim": 512,
23
  "encoder_layerdrop": 0.0,
24
  "encoder_layers": 12,
25
  "eos_token_id": 3,
 
12
  "bos_token_id": 2,
13
  "d_model": 32,
14
  "decoder_attention_heads": 16,
15
+ "decoder_ffn_dim": 4096,
16
  "decoder_layerdrop": 0.0,
17
  "decoder_layers": 12,
18
  "decoder_start_token_id": 3,
19
  "decoder_vocab_size": 35,
20
  "dropout": 0.1,
21
  "encoder_attention_heads": 16,
22
+ "encoder_ffn_dim": 4096,
23
  "encoder_layerdrop": 0.0,
24
  "encoder_layers": 12,
25
  "eos_token_id": 3,
configuration_stldec.py CHANGED
@@ -12,10 +12,10 @@ class STLConfig(PretrainedConfig):
12
  decoder_vocab_size=None, # unused
13
  max_position_embeddings=512,
14
  encoder_layers=12,
15
- encoder_ffn_dim=512,
16
  encoder_attention_heads=16,
17
  decoder_layers=12,
18
- decoder_ffn_dim=512,
19
  decoder_attention_heads=16,
20
  encoder_layerdrop=0.0,
21
  decoder_layerdrop=0.0,
 
12
  decoder_vocab_size=None, # unused
13
  max_position_embeddings=512,
14
  encoder_layers=12,
15
+ encoder_ffn_dim=4096,
16
  encoder_attention_heads=16,
17
  decoder_layers=12,
18
+ decoder_ffn_dim=4096,
19
  decoder_attention_heads=16,
20
  encoder_layerdrop=0.0,
21
  decoder_layerdrop=0.0,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44cd4bccc39663b6347ee8be1dd926750bc659e20ac78edbbe696958411d3010
3
- size 2093488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79370eb12d442094d4a253967bc9f9050d45c3f39f3cff5f73f54a8ae4dfb137
3
+ size 13275880
modeling_stldec.py CHANGED
@@ -2139,7 +2139,7 @@ class STLForCausalLM(STLModel, GenerationMixin):
2139
  loss = None
2140
  if labels is not None:
2141
  labels = labels.to(logits.device)
2142
- loss_fct = nn.CrossEntropyLoss()
2143
  loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1))
2144
 
2145
  if not return_dict:
 
2139
  loss = None
2140
  if labels is not None:
2141
  labels = labels.to(logits.device)
2142
+ loss_fct = CrossEntropyLoss()
2143
  loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1))
2144
 
2145
  if not return_dict: