Update configuration_Llamoe.py
Browse files- configuration_Llamoe.py +9 -10
configuration_Llamoe.py
CHANGED
|
@@ -13,25 +13,24 @@ LLAMOE_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
|
| 13 |
class LlamoeConfig(PretrainedConfig):
|
| 14 |
model_type = "Llamoe"
|
| 15 |
keys_to_ignore_at_inference = ["past_key_values"]
|
| 16 |
-
|
| 17 |
def __init__(
|
| 18 |
self,
|
| 19 |
vocab_size=32000,
|
| 20 |
-
hidden_size=
|
| 21 |
-
intermediate_size=
|
| 22 |
-
num_hidden_layers=
|
| 23 |
-
num_attention_heads=
|
| 24 |
-
num_key_value_heads=
|
| 25 |
head_dim=256,
|
| 26 |
-
hidden_act="
|
| 27 |
-
max_position_embeddings=
|
| 28 |
initializer_range=0.02,
|
| 29 |
-
rms_norm_eps=1e-
|
| 30 |
use_cache=True,
|
| 31 |
pad_token_id=0,
|
| 32 |
eos_token_id=1,
|
| 33 |
bos_token_id=2,
|
| 34 |
-
tie_word_embeddings=
|
| 35 |
rope_theta=10000.0,
|
| 36 |
attention_bias=False,
|
| 37 |
attention_dropout=0.0,
|
|
|
|
| 13 |
class LlamoeConfig(PretrainedConfig):
|
| 14 |
model_type = "Llamoe"
|
| 15 |
keys_to_ignore_at_inference = ["past_key_values"]
|
|
|
|
| 16 |
def __init__(
|
| 17 |
self,
|
| 18 |
vocab_size=32000,
|
| 19 |
+
hidden_size=4096,
|
| 20 |
+
intermediate_size=11008,
|
| 21 |
+
num_hidden_layers=32,
|
| 22 |
+
num_attention_heads=32,
|
| 23 |
+
num_key_value_heads=32,
|
| 24 |
head_dim=256,
|
| 25 |
+
hidden_act="silu",
|
| 26 |
+
max_position_embeddings=4096,
|
| 27 |
initializer_range=0.02,
|
| 28 |
+
rms_norm_eps=1e-05,
|
| 29 |
use_cache=True,
|
| 30 |
pad_token_id=0,
|
| 31 |
eos_token_id=1,
|
| 32 |
bos_token_id=2,
|
| 33 |
+
tie_word_embeddings=false,
|
| 34 |
rope_theta=10000.0,
|
| 35 |
attention_bias=False,
|
| 36 |
attention_dropout=0.0,
|