Mirror from rkazants/tiny-falcon-mamba

Browse files

Files changed (6) hide show

README.md +33 -0
config.json +37 -0
model.safetensors +3 -0
special_tokens_map.json +22 -0
tokenizer.json +0 -0
tokenizer_config.json +124 -0

README.md ADDED Viewed

	@@ -0,0 +1,33 @@

+---
+license: apache-2.0
+---
+```python
+import os
+from transformers import FalconMambaConfig, FalconMambaModel, AutoTokenizer
+model_dir = "tiiuae/falcon-mamba-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_dir)
+# === Step 1: Define tiny model config ===
+config = FalconMambaConfig(
+    d_model=8,            # Dimensionality of the input embeddings (model hidden size)
+    n_layer=2,             # Number of Mamba layers (or blocks) in the model
+    d_state=32,            # Dimensionality of the internal state used in the Mamba block (e.g., for state-space modeling)
+    expand=2,              # Expansion factor used in the Mamba block, typically to widen the intermediate dimensions
+    conv_kernel=3,         # Size of the convolution kernel used in the Mamba block (affects temporal mixing)
+    vocab_size=50280,      # Size of the vocabulary (number of unique tokens)
+    num_hidden_layers=16,  # Total number of hidden layers in the model (could override `n_layer`)
+    hidden_size=64,        # Size of hidden states used in the model layers (could override `d_model`)
+)
+# === Step 2: Create model from config ===
+model = FalconMambaModel(config)
+# === Step 4: Save model and tokenizer to disk ===
+output_dir = "./tiny-falcon-mamba"
+os.makedirs(output_dir, exist_ok=True)
+model.save_pretrained(output_dir)
+tokenizer.save_pretrained(output_dir)
+print(f"Tiny Mamba model and tokenizer saved to: {output_dir}")
+```

config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "architectures": [
+    "FalconMambaModel"
+  ],
+  "bos_token_id": 0,
+  "conv_kernel": 3,
+  "d_model": 8,
+  "d_state": 32,
+  "eos_token_id": 0,
+  "expand": 2,
+  "hidden_act": "silu",
+  "hidden_size": 64,
+  "initializer_range": 0.1,
+  "intermediate_size": 128,
+  "layer_norm_epsilon": 1e-05,
+  "mixer_rms_eps": 1e-06,
+  "model_type": "falcon_mamba",
+  "n_layer": 2,
+  "num_hidden_layers": 16,
+  "pad_token_id": 0,
+  "rescale_prenorm_residual": false,
+  "residual_in_fp32": true,
+  "state_size": 16,
+  "time_step_floor": 0.0001,
+  "time_step_init_scheme": "random",
+  "time_step_max": 0.1,
+  "time_step_min": 0.001,
+  "time_step_rank": 4,
+  "time_step_scale": 1.0,
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_bias": false,
+  "use_cache": true,
+  "use_conv_bias": true,
+  "use_mambapy": false,
+  "vocab_size": 50280
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a269ef69f0f456e280769a2ea19b211ac3a4ef5493417d0e3bd41b9766ad17b
+size 14972264

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "additional_special_tokens": [
+    ">>TITLE<<",
+    ">>ABSTRACT<<",
+    ">>INTRODUCTION<<",
+    ">>SUMMARY<<",
+    ">>COMMENT<<",
+    ">>ANSWER<<",
+    ">>QUESTION<<",
+    ">>DOMAIN<<",
+    ">>PREFIX<<",
+    ">>SUFFIX<<",
+    ">>MIDDLE<<"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,124 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": ">>TITLE<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": ">>ABSTRACT<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": ">>INTRODUCTION<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": ">>SUMMARY<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": ">>COMMENT<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": ">>ANSWER<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": ">>QUESTION<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": ">>DOMAIN<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": ">>PREFIX<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": ">>SUFFIX<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": ">>MIDDLE<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    ">>TITLE<<",
+    ">>ABSTRACT<<",
+    ">>INTRODUCTION<<",
+    ">>SUMMARY<<",
+    ">>COMMENT<<",
+    ">>ANSWER<<",
+    ">>QUESTION<<",
+    ">>DOMAIN<<",
+    ">>PREFIX<<",
+    ">>SUFFIX<<",
+    ">>MIDDLE<<"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 1000000000000000019884624838656,
+  "padding_side": "left",
+  "tokenizer_class": "PreTrainedTokenizer"
+}