Upload SCLM Option B - EARCP weights and config

Files changed (5) hide show

README.md CHANGED Viewed

@@ -2,14 +2,14 @@
 license: mit
 tags: [sclm, stateful, memory, earcp, option-b]
 pipeline_tag: text-generation
-base_model: meta-llama/Llama-3.2-1B
 ---
 # SCLM Option B - Deep Integration
 ## Architecture
 - State dimension: 384
 - Injection layers: [4, 8, 12, 16, 20, 24]
-- EARCP params: 64.9M (8.67% overhead)
 - Experts: 3
 ## Features vs Option A

 license: mit
 tags: [sclm, stateful, memory, earcp, option-b]
 pipeline_tag: text-generation
+base_model: meta-llama/Llama-3.2-3B-Instruct
 ---
 # SCLM Option B - Deep Integration
 ## Architecture
 - State dimension: 384
 - Injection layers: [4, 8, 12, 16, 20, 24]
+- EARCP params: 94.6M (5.25% overhead)
 - Experts: 3
 ## Features vs Option A

earcp_weights.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29d6a1406978f31b69cf7cbcb79fa286bdac91056783fb99c36cde00f55fb267
-size 259787619

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d655d2092f1e54d1cfabaff16cb40270bb96d93a85a100e5207ae47f715f0f1
+size 378415971

sclm_config.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "vocab_size": 128256,
-  "hidden_size": 2048,
-  "num_hidden_layers": 16,
-  "num_attention_heads": 32,
   "latent_state_dim": 384,
   "n_experts": 3,
   "n_coherence_heads": 8,

 {
   "vocab_size": 128256,
+  "hidden_size": 3072,
+  "num_hidden_layers": 28,
+  "num_attention_heads": 24,
   "latent_state_dim": 384,
   "n_experts": 3,
   "n_coherence_heads": 8,

special_tokens_map.json CHANGED Viewed

@@ -7,11 +7,11 @@
     "single_word": false
   },
   "eos_token": {
-    "content": "<|end_of_text|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": "<|end_of_text|>"
 }

     "single_word": false
   },
   "eos_token": {
+    "content": "<|eot_id|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
+  "pad_token": "<|eot_id|>"
 }

tokenizer_config.json CHANGED Viewed

@@ -2051,13 +2051,13 @@
   },
   "bos_token": "<|begin_of_text|>",
   "clean_up_tokenization_spaces": true,
-  "eos_token": "<|end_of_text|>",
   "extra_special_tokens": {},
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 131072,
-  "pad_token": "<|end_of_text|>",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }

   },
   "bos_token": "<|begin_of_text|>",
   "clean_up_tokenization_spaces": true,
+  "eos_token": "<|eot_id|>",
   "extra_special_tokens": {},
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 131072,
+  "pad_token": "<|eot_id|>",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }