Upload config
Browse files- config.json +3 -4
- config.py +5 -5
config.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"architectures": [
|
| 3 |
-
"BharataiForCausalLM"
|
| 4 |
-
],
|
| 5 |
"attention_bias": false,
|
| 6 |
"attention_dropout": 0.0,
|
|
|
|
|
|
|
|
|
|
| 7 |
"bos_token_id": 1,
|
| 8 |
"eos_token_id": 2,
|
| 9 |
"hidden_act": "silu",
|
|
@@ -20,7 +20,6 @@
|
|
| 20 |
"rope_scaling": null,
|
| 21 |
"rope_theta": 10000.0,
|
| 22 |
"tie_word_embeddings": false,
|
| 23 |
-
"torch_dtype": "float32",
|
| 24 |
"transformers_version": "4.36.0.dev0",
|
| 25 |
"use_cache": true,
|
| 26 |
"vocab_size": 5000
|
|
|
|
| 1 |
{
|
|
|
|
|
|
|
|
|
|
| 2 |
"attention_bias": false,
|
| 3 |
"attention_dropout": 0.0,
|
| 4 |
+
"auto_map": {
|
| 5 |
+
"AutoConfig": "config.BharataiConfig"
|
| 6 |
+
},
|
| 7 |
"bos_token_id": 1,
|
| 8 |
"eos_token_id": 2,
|
| 9 |
"hidden_act": "silu",
|
|
|
|
| 20 |
"rope_scaling": null,
|
| 21 |
"rope_theta": 10000.0,
|
| 22 |
"tie_word_embeddings": false,
|
|
|
|
| 23 |
"transformers_version": "4.36.0.dev0",
|
| 24 |
"use_cache": true,
|
| 25 |
"vocab_size": 5000
|
config.py
CHANGED
|
@@ -83,11 +83,11 @@ class BharataiConfig(PretrainedConfig):
|
|
| 83 |
|
| 84 |
def __init__(
|
| 85 |
self,
|
| 86 |
-
vocab_size=
|
| 87 |
-
hidden_size=
|
| 88 |
intermediate_size=11008,
|
| 89 |
-
num_hidden_layers=
|
| 90 |
-
num_attention_heads=
|
| 91 |
num_key_value_heads=None,
|
| 92 |
hidden_act="silu",
|
| 93 |
max_position_embeddings=16384,
|
|
@@ -155,4 +155,4 @@ class BharataiConfig(PretrainedConfig):
|
|
| 155 |
f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
|
| 156 |
)
|
| 157 |
if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
|
| 158 |
-
raise ValueError(f"`rope_scaling`'s factor field must be a float > 1, got {rope_scaling_factor}")
|
|
|
|
| 83 |
|
| 84 |
def __init__(
|
| 85 |
self,
|
| 86 |
+
vocab_size=5000,
|
| 87 |
+
hidden_size=512,
|
| 88 |
intermediate_size=11008,
|
| 89 |
+
num_hidden_layers=8,
|
| 90 |
+
num_attention_heads=8,
|
| 91 |
num_key_value_heads=None,
|
| 92 |
hidden_act="silu",
|
| 93 |
max_position_embeddings=16384,
|
|
|
|
| 155 |
f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
|
| 156 |
)
|
| 157 |
if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
|
| 158 |
+
raise ValueError(f"`rope_scaling`'s factor field must be a float > 1, got {rope_scaling_factor}")
|