| { | |
| "d_sae": 16384, | |
| "actual_l0": 19.6816084, | |
| "effective_l0": 12, | |
| "cross_entropy": 90.97328370361328, | |
| "sae_type": "saebench", | |
| "sae_release": "canrager/saebench_gemma-2-2b_width-2pow14_date-0107", | |
| "num_tokens": 10000000, | |
| "num_epochs": 1, | |
| "lr": 0.0005, | |
| "loaded_dag": null, | |
| "allow_negative_influence": false, | |
| "history": { | |
| "train_loss": [ | |
| 123.50254767281669, | |
| 116.86768455505371, | |
| 111.47526424952916, | |
| 107.15069541931152, | |
| 103.60219999040876, | |
| 100.70831909179688, | |
| 98.08268596104213, | |
| 95.82541280473981, | |
| 93.8609597342355, | |
| 91.96091542925153, | |
| 91.88661466326032 | |
| ], | |
| "train_steps": [ | |
| 140, | |
| 280, | |
| 420, | |
| 560, | |
| 700, | |
| 840, | |
| 980, | |
| 1120, | |
| 1260, | |
| 1400, | |
| 1407 | |
| ], | |
| "val_loss": [ | |
| 119.9688098022461, | |
| 113.95728345947266, | |
| 109.10677463378906, | |
| 105.2270860961914, | |
| 102.0273933959961, | |
| 99.294438671875, | |
| 96.89552971191407, | |
| 94.75061934814453, | |
| 92.81334992675781, | |
| 91.05568310546874, | |
| 90.97328370361328 | |
| ], | |
| "val_cross_entropy": [ | |
| 119.9688098022461, | |
| 113.95728345947266, | |
| 109.10677463378906, | |
| 105.2270860961914, | |
| 102.0273933959961, | |
| 99.294438671875, | |
| 96.89552971191407, | |
| 94.75061934814453, | |
| 92.81334992675781, | |
| 91.05568310546874, | |
| 90.97328370361328 | |
| ], | |
| "effective_l0": [ | |
| 16, | |
| 15, | |
| 14, | |
| 14, | |
| 13, | |
| 13, | |
| 13, | |
| 12, | |
| 12, | |
| 12, | |
| 12 | |
| ], | |
| "val_steps": [ | |
| 140, | |
| 280, | |
| 420, | |
| 560, | |
| 700, | |
| 840, | |
| 980, | |
| 1120, | |
| 1260, | |
| 1400, | |
| 1407 | |
| ] | |
| }, | |
| "sae_id": "gemma-2-2b_matryoshka_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0" | |
| } |