Upload Blt model converted

Browse files

Files changed (3) hide show

config.json +99 -0
model.safetensors +3 -0
tokenizer_config.json +11 -0

config.json ADDED Viewed

	@@ -0,0 +1,99 @@

+{
+  "model_type": "blt",
+  "vocab_size": 260,
+  "max_position_embeddings": 4096,
+  "patch_in_forward": true,
+  "realtime_patching": true,
+  "patching_mode": "entropy",
+  "patch_size": 4,
+  "patching_threshold": 1.335442066192627,
+  "patching_threshold_add": null,
+  "max_patch_length": null,
+  "patching_batch_size": 1,
+  "patching_device": "cuda",
+  "monotonicity": false,
+  "cross_attn_k": 4,
+  "encoder_hash_byte_group_size": [
+    3,
+    4,
+    5,
+    6,
+    7,
+    8
+  ],
+  "encoder_hash_byte_group_vocab": 500002,
+  "encoder_hash_byte_group_nb_functions": 1,
+  "pm_size": 0,
+  "patcher_config": {
+    "vocab_size": 260,
+    "hidden_size": 768,
+    "num_hidden_layers": 14,
+    "num_attention_heads": 12,
+    "num_key_value_heads": null,
+    "max_position_embeddings": 8192,
+    "norm_eps": 1e-05,
+    "dropout": 0.0,
+    "rope_theta": 10000.0,
+    "attn_impl": "xformers",
+    "attn_bias_type": "local_block_causal",
+    "intermediate_size": 2048
+  },
+  "encoder_config": {
+    "vocab_size": 260,
+    "cross_attn_all_layers": false,
+    "cross_attn_k": 4,
+    "hidden_size_global": 2048,
+    "pm_size": 0,
+    "hidden_size": 1280,
+    "num_attention_heads": 20,
+    "num_key_value_heads": null,
+    "num_hidden_layers": 1,
+    "norm_eps": 1e-05,
+    "dropout": 0.0,
+    "max_position_embeddings": 24576,
+    "rope_theta": 500000.0,
+    "rope_scaling": {
+      "rope_type": "default"
+    },
+    "hidden_act": "silu",
+    "_attn_implementation": "sdpa",
+    "intermediate_size": 3584
+  },
+  "decoder_config": {
+    "vocab_size": 260,
+    "cross_attn_all_layers": true,
+    "cross_attn_k": 4,
+    "hidden_size_global": 2048,
+    "hidden_size": 1280,
+    "num_attention_heads": 20,
+    "num_key_value_heads": null,
+    "num_hidden_layers": 6,
+    "norm_eps": 1e-05,
+    "dropout": 0.0,
+    "max_position_embeddings": 24576,
+    "rope_theta": 500000.0,
+    "rope_scaling": {
+      "rope_type": "default"
+    },
+    "hidden_act": "silu",
+    "_attn_implementation": "sdpa",
+    "intermediate_size": 3584
+  },
+  "global_config": {
+    "hidden_size": 4096,
+    "num_attention_heads": 32,
+    "num_key_value_heads": null,
+    "num_hidden_layers": 32,
+    "norm_eps": 1e-05,
+    "dropout": 0.0,
+    "max_position_embeddings": 4096,
+    "rope_theta": 500000.0,
+    "rope_scaling": {
+      "rope_type": "default"
+    },
+    "hidden_act": "silu",
+    "_attn_implementation": "sdpa",
+    "intermediate_size": 11008
+  },
+  "tie_word_embeddings": false
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc3d4af0b585afb391e60bd5eae3c37d7423ea44c08250a5e17c41031fe91587
+size 21306236008

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "tokenizer_class": "BltTokenizer",
+  "vocab_size": 260,
+  "model_max_length": 1024,
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}