Marmik
/

crosscoder-tinymoe-tinygpt-27M

Model card Files Files and versions

Marmik commited on Aug 12, 2025

Commit

cf39def

·

verified ·

1 Parent(s): 4de5a38

Push model using huggingface_hub.

Files changed (3) hide show

README.md +9 -3
config.json +15 -21
model.safetensors +3 -0

README.md CHANGED Viewed

@@ -1,4 +1,10 @@
 ---
-datasets:
-- Marmik/rjarxiv_starcoder_simplestories_1B
----

 ---
+tags:
+- model_hub_mixin
+- pytorch_model_hub_mixin
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Code: [More Information Needed]
+- Paper: [More Information Needed]
+- Docs: [More Information Needed]

config.json CHANGED Viewed

@@ -1,23 +1,17 @@
 {
-    "trainer": {
-        "dict_class": "CrossCoder",
-        "trainer_class": "CrossCoderTrainer",
-        "activation_dim": 768,
-        "dict_size": 49152,
-        "lr": 0.001,
-        "l1_penalty": 0.03,
-        "warmup_steps": 1000,
-        "resample_steps": null,
-        "device": "cuda",
-        "layer": 3,
-        "lm_name": "tiny-gpt-27M-mixtral-5l-active-27M",
-        "wandb_name": "L3-mu3.0e-02-lr1e-03-64-mixed",
-        "submodule_name": null,
-        "use_mse_loss": false,
-        "code_normalization": "MIXED",
-        "code_normalization_alpha_sae": 0.7,
-        "code_normalization_alpha_cc": 0.3,
-        "target_rms": 1.0,
-        "num_layers": 2
-    }
 }

 {
+  "activation_dim": 768,
+  "activation_mean": null,
+  "activation_std": null,
+  "code_normalization": "MIXED",
+  "code_normalization_alpha_cc": 0.1,
+  "code_normalization_alpha_sae": 1.0,
+  "dict_size": 49152,
+  "encoder_layers": null,
+  "init_with_transpose": true,
+  "latent_processor": null,
+  "norm_init_scale": null,
+  "num_decoder_layers": null,
+  "num_layers": 2,
+  "same_init_for_all_layers": false,
+  "target_rms": null
 }

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:706b36bfd314463744cde550914f6bf4380cf4de94be2b03e9a1d9b86772f3db
+size 604195564