adjust config
Browse files- config.json +1 -1
- config_emb.json +1 -1
- configuration_aria.py +4 -2
config.json
CHANGED
|
@@ -2,8 +2,8 @@
|
|
| 2 |
"architectures": [
|
| 3 |
"AriaForCausalLM"
|
| 4 |
],
|
| 5 |
-
"bos_token_id": 0,
|
| 6 |
"eos_token_id": 1,
|
|
|
|
| 7 |
"hidden_size": 1536,
|
| 8 |
"intermediate_size": 6144,
|
| 9 |
"max_seq_len": 8192,
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"AriaForCausalLM"
|
| 4 |
],
|
|
|
|
| 5 |
"eos_token_id": 1,
|
| 6 |
+
"pad_token_id": 2,
|
| 7 |
"hidden_size": 1536,
|
| 8 |
"intermediate_size": 6144,
|
| 9 |
"max_seq_len": 8192,
|
config_emb.json
CHANGED
|
@@ -2,8 +2,8 @@
|
|
| 2 |
"architectures": [
|
| 3 |
"AriaForSequenceEmbedding"
|
| 4 |
],
|
| 5 |
-
"bos_token_id": 0,
|
| 6 |
"eos_token_id": 1,
|
|
|
|
| 7 |
"hidden_size": 1536,
|
| 8 |
"embedding_size": 512,
|
| 9 |
"intermediate_size": 6144,
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"AriaForSequenceEmbedding"
|
| 4 |
],
|
|
|
|
| 5 |
"eos_token_id": 1,
|
| 6 |
+
"pad_token_id": 2,
|
| 7 |
"hidden_size": 1536,
|
| 8 |
"embedding_size": 512,
|
| 9 |
"intermediate_size": 6144,
|
configuration_aria.py
CHANGED
|
@@ -15,8 +15,8 @@ class AriaConfig(PretrainedConfig):
|
|
| 15 |
intermediate_size: int = 6144,
|
| 16 |
max_seq_len: int = 8192,
|
| 17 |
use_cache: bool = True,
|
| 18 |
-
bos_token_id: int = 0,
|
| 19 |
eos_token_id: int = 1,
|
|
|
|
| 20 |
tie_word_embeddings: bool = False,
|
| 21 |
output_attentions: bool = False,
|
| 22 |
output_hidden_states: bool = False,
|
|
@@ -24,7 +24,9 @@ class AriaConfig(PretrainedConfig):
|
|
| 24 |
**kwargs,
|
| 25 |
):
|
| 26 |
super().__init__(
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
)
|
| 29 |
self.vocab_size = vocab_size
|
| 30 |
self.hidden_size = hidden_size
|
|
|
|
| 15 |
intermediate_size: int = 6144,
|
| 16 |
max_seq_len: int = 8192,
|
| 17 |
use_cache: bool = True,
|
|
|
|
| 18 |
eos_token_id: int = 1,
|
| 19 |
+
pad_token_id: int = 2,
|
| 20 |
tie_word_embeddings: bool = False,
|
| 21 |
output_attentions: bool = False,
|
| 22 |
output_hidden_states: bool = False,
|
|
|
|
| 24 |
**kwargs,
|
| 25 |
):
|
| 26 |
super().__init__(
|
| 27 |
+
pad_token_id=pad_token_id,
|
| 28 |
+
eos_token_id=eos_token_id,
|
| 29 |
+
**kwargs,
|
| 30 |
)
|
| 31 |
self.vocab_size = vocab_size
|
| 32 |
self.hidden_size = hidden_size
|