{ "MIloss_lambda": 0.01, "architectures": [ "TrainingMISpace" ], "attn_dim_key": 64, "attn_dropout": 0.05, "cvloss_lambda": 0.005, "depth": 11, "dim": 1536, "dim_divisible_by": 128, "dropout_rate": 0.4, "heads": 8, "model_type": "space", "moe": [ "species", "tracks" ], "species": [ "human", "mouse", "ce11", "dm6" ], "num": 1536, "num_downsamples": 7, "output_heads": { "ce11": 354, "dm6": 588, "human": 5313, "mouse": 1643 }, "pos_dropout": 0.01, "seq_length": 196608, "species_num_experts": 4, "target_length": 896, "topk": 3, "torch_dtype": "float32", "tracks_num_experts": 8, "tracks_topk": 3, "transformers_version": "4.45.2", "use_checkpointing": false, "use_tf_gamma": false, "zloss_lambda": 0.001, "index_table_path": "/home/jiwei_zhu/works/SPACE/datasets/pretrain/" }