{ "aggregator_params": {}, "aggregator_type": "sum", "common_params": { "dropout": 0.1, "hidden_dim": 512, "interaction_encoder_enable": true, "interaction_encoder_ffn_dim": 1024, "interaction_encoder_n_heads": 8, "interaction_encoder_n_layers": 4, "projector_intermediate_dim": 512, "track_encoder_ffn_dim": 1024, "track_encoder_n_heads": 8, "track_encoder_n_layers": 4 }, "mm_dim": 1024, "per_feature_params": { "bbox": { "feature_encoder_params": { "input_dim": 5 }, "feature_encoder_type": "motion" } }, "similarity_prediction_head_hidden_dim": 512, "sph_common_params": { "hidden_dim": 512 }, "sph_per_feature_params": { "bbox": { "hidden_dim": 512 } } }