ngocbh
/

TrimKV-Qwen3-4B-Math

@@ -1,7 +1,7 @@
 {
-  "_attn_implementation_autoset": true,
   "architectures": [
-    "Qwen3ForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
@@ -12,36 +12,74 @@
   "buffer_size": 128,
   "compress_memory": true,
   "compress_strategy": "alpha",
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 2560,
   "initializer_range": 0.02,
   "intermediate_size": 9728,
   "logit_block_size": 8192,
   "max_position_embeddings": 40960,
   "max_seq_len": 32768,
   "max_window_layers": 36,
-  "memory_size": 512.0,
   "model_type": "qwen3",
   "num_attention_heads": 32,
   "num_hidden_layers": 36,
   "num_key_value_heads": 8,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 1000000,
-  "skip_layers": 0,
   "sliding_window": null,
   "tie_word_embeddings": true,
-  "torch_dtype": "bfloat16",
   "trainable_params": "self_attn.retention_gate",
-  "transformers_version": "4.51.0",
   "use_cache": false,
   "use_sliding_window": false,
-  "vocab_size": 151936,
-  "retention_gate": "rg",
-  "retention_gate_bias_init": 8.0,
-  "retention_gate_intermediate_size": 512,
-  "retention_weight": 1.0,
-  "rg_dropout": 0.0
-}

 {
+  "alpha_threshold": 0.0,
   "architectures": [
+    "TrimKVQwen3ForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "buffer_size": 128,
   "compress_memory": true,
   "compress_strategy": "alpha",
+  "dtype": "bfloat16",
   "eos_token_id": 151645,
+  "floor_budget_ratio": 0.0,
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 2560,
   "initializer_range": 0.02,
   "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
   "logit_block_size": 8192,
   "max_position_embeddings": 40960,
   "max_seq_len": 32768,
   "max_window_layers": 36,
+  "memory_size": 256.0,
   "model_type": "qwen3",
   "num_attention_heads": 32,
   "num_hidden_layers": 36,
   "num_key_value_heads": 8,
+  "retention_gate": "rg",
+  "retention_gate_bias_init": 18.0,
+  "retention_gate_intermediate_size": 512,
+  "retention_weight": 1.0,
+  "rg_dropout": 0.0,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 1000000,
   "sliding_window": null,
   "tie_word_embeddings": true,
   "trainable_params": "self_attn.retention_gate",
+  "transformers_version": "4.57.1",
   "use_cache": false,
   "use_sliding_window": false,
+  "vocab_size": 151936
+}

trimkv_weights.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b576910d113419a5bdf5a0ad170637ac36f35c548bac783ee87ab4f31f98ea3e
 size 94725981

 version https://git-lfs.github.com/spec/v1
+oid sha256:825e0c83489aeae0869a3f6957422212d40922c91cdb20af719bd889e66e7b9b
 size 94725981