{ "trainer": { "trainer_class": "MatryoshkaBatchTopKTrainer", "dict_class": "MatryoshkaBatchTopKSAE", "lr": 5e-05, "steps": 4394, "auxk_alpha": 0.03125, "warmup_steps": 1000, "decay_start": 3515, "threshold_beta": 0.999, "threshold_start_step": 1000, "top_k_aux": 64, "seed": 0, "activation_dim": 128, "dict_size": 8192, "group_fractions": [ 0.03125, 0.0625, 0.125, 0.25, 0.53125 ], "group_weights": [ 0.2, 0.2, 0.2, 0.2, 0.2 ], "group_sizes": [ 256, 512, 1024, 2048, 4352 ], "k": 10, "device": "cuda:0", "layer": "unknown", "lm_name": "Lightricks/LTX-Video-0.9.5", "wandb_name": "MatryoshkaBatchTopKTrainer-Lightricks/LTX-Video-0.9.5-resid_post_layer_unknown_trainer_2", "submodule_name": "resid_post_layer_unknown" }, "buffer": { "activation_dir": "/mnt/nw/home/m.yu/repos/dictionary_learning_demo/ltx_activations_vae", "d_submodule": 128, "out_batch_size": 2048, "num_shards": 70, "metadata": { "model_name": "Lightricks/LTX-Video-0.9.5", "hook_target": "vae_latent_mean", "d_model": 128, "num_frames": 321, "height": 256, "width": 256, "max_norm_multiple": 10, "total_tokens": 9063296, "num_shards": 70, "num_videos": 3454, "save_dtype": "float32" } } }