{ "aggregator_params": {}, "aggregator_type": "sum", "common_params": { "dropout": 0.1, "hidden_dim": 512, "interaction_encoder_enable": true, "interaction_encoder_ffn_dim": 1024, "interaction_encoder_n_heads": 8, "interaction_encoder_n_layers": 4, "projector_intermediate_dim": 512, "track_encoder_ffn_dim": 1024, "track_encoder_n_heads": 8, "track_encoder_n_layers": 4 }, "mm_dim": 1024, "object_interaction_encoder_enable": true, "object_interaction_encoder_params": { "dropout": 0.1, "hidden_dim": 1024, "n_heads": 8, "n_layers": 2 }, "per_feature_params": { "appearance": { "feature_encoder_params": { "emb_size": 128, "hidden_dim": 512 }, "feature_encoder_type": "parts_appearance", "hidden_dim": 512, "interaction_encoder_ffn_dim": 1024, "track_encoder_enable_motion_encoder": false, "track_encoder_ffn_dim": 1024 }, "bbox": { "feature_encoder_params": { "input_dim": 5 }, "feature_encoder_type": "motion" }, "keypoints": { "feature_encoder_params": { "input_dim": 35 }, "feature_encoder_type": "motion" } }, "similarity_prediction_head_hidden_dim": 1024, "sph_common_params": { "hidden_dim": 512 }, "sph_per_feature_params": { "appearance": { "hidden_dim": 512 }, "bbox": { "hidden_dim": 512 }, "keypoints": { "hidden_dim": 512 } } }