Upload checkpoint at step 215000

Browse files

Files changed (6) hide show

step_215000/config.json +19 -0
step_215000/optimizer.pt +3 -0
step_215000/pytorch_model.bin +3 -0
step_215000/special_tokens_map.json +18 -0
step_215000/tokenizer.json +0 -0
step_215000/tokenizer_config.json +68 -0

step_215000/config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+    "d_model": 768,
+    "d_intermediate": 0,
+    "n_layer": 16,
+    "vocab_size": 10002,
+    "ssm_cfg": {
+        "d_state": 16,
+        "d_conv": 4,
+        "expand": 2,
+        "layer": "Mamba2"
+    },
+    "attn_layer_idx": [],
+    "attn_cfg": {},
+    "rms_norm": true,
+    "residual_in_fp32": true,
+    "fused_add_norm": true,
+    "pad_vocab_size_multiple": 8,
+    "tie_embeddings": true
+}

step_215000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2c6751158f12b4df34800e46e2409f457bf0a4aabc43eeabcc4010b2ddab77b
+size 521414027

step_215000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d0fd942c4eda075cd162e872d27318885e928f3c9864d0f1cad37b482de8093
+size 260700203

step_215000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<|username|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|content|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ]
+}

step_215000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

step_215000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10000": {
+      "content": "<|username|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10001": {
+      "content": "<|content|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|username|>",
+    "<|content|>"
+  ],
+  "clean_up_tokenization_spaces": false,
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}