Recag
/

Bharatai-v-2

Text Generation

BharataiForCausalLM

Model card Files Files and versions

Recag commited on Dec 4, 2023

Commit

bcc760c

·

1 Parent(s): 4debd9d

Upload config

Files changed (2) hide show

config.json +3 -4
config.py +5 -5

config.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "architectures": [
-    "BharataiForCausalLM"
-  ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 1,
   "eos_token_id": 2,
   "hidden_act": "silu",
@@ -20,7 +20,6 @@
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
-  "torch_dtype": "float32",
   "transformers_version": "4.36.0.dev0",
   "use_cache": true,
   "vocab_size": 5000

 {
   "attention_bias": false,
   "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoConfig": "config.BharataiConfig"
+  },
   "bos_token_id": 1,
   "eos_token_id": 2,
   "hidden_act": "silu",
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "transformers_version": "4.36.0.dev0",
   "use_cache": true,
   "vocab_size": 5000

config.py CHANGED Viewed

@@ -83,11 +83,11 @@ class BharataiConfig(PretrainedConfig):
     def __init__(
         self,
-        vocab_size=32000,
-        hidden_size=4096,
         intermediate_size=11008,
-        num_hidden_layers=32,
-        num_attention_heads=32,
         num_key_value_heads=None,
         hidden_act="silu",
         max_position_embeddings=16384,
@@ -155,4 +155,4 @@ class BharataiConfig(PretrainedConfig):
                 f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
             )
         if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
-            raise ValueError(f"`rope_scaling`'s factor field must be a float > 1, got {rope_scaling_factor}")

     def __init__(
         self,
+        vocab_size=5000,
+        hidden_size=512,
         intermediate_size=11008,
+        num_hidden_layers=8,
+        num_attention_heads=8,
         num_key_value_heads=None,
         hidden_act="silu",
         max_position_embeddings=16384,
                 f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
             )
         if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
+            raise ValueError(f"`rope_scaling`'s factor field must be a float > 1, got {rope_scaling_factor}")