| { | |
| "aggregator_params": {}, | |
| "aggregator_type": "sum", | |
| "common_params": { | |
| "dropout": 0.1, | |
| "hidden_dim": 512, | |
| "interaction_encoder_enable": true, | |
| "interaction_encoder_ffn_dim": 1024, | |
| "interaction_encoder_n_heads": 8, | |
| "interaction_encoder_n_layers": 4, | |
| "projector_intermediate_dim": 512, | |
| "track_encoder_ffn_dim": 1024, | |
| "track_encoder_n_heads": 8, | |
| "track_encoder_n_layers": 4 | |
| }, | |
| "mm_dim": 1024, | |
| "object_interaction_encoder_enable": true, | |
| "object_interaction_encoder_params": { | |
| "dropout": 0.1, | |
| "hidden_dim": 1024, | |
| "n_heads": 8, | |
| "n_layers": 2 | |
| }, | |
| "per_feature_params": { | |
| "appearance": { | |
| "feature_encoder_params": { | |
| "emb_size": 128, | |
| "hidden_dim": 512 | |
| }, | |
| "feature_encoder_type": "parts_appearance", | |
| "hidden_dim": 512, | |
| "interaction_encoder_ffn_dim": 1024, | |
| "track_encoder_enable_motion_encoder": false, | |
| "track_encoder_ffn_dim": 1024 | |
| }, | |
| "bbox": { | |
| "feature_encoder_params": { | |
| "input_dim": 5 | |
| }, | |
| "feature_encoder_type": "motion" | |
| }, | |
| "keypoints": { | |
| "feature_encoder_params": { | |
| "input_dim": 35 | |
| }, | |
| "feature_encoder_type": "motion" | |
| } | |
| }, | |
| "similarity_prediction_head_hidden_dim": 1024, | |
| "sph_common_params": { | |
| "hidden_dim": 512 | |
| }, | |
| "sph_per_feature_params": { | |
| "appearance": { | |
| "hidden_dim": 512 | |
| }, | |
| "bbox": { | |
| "hidden_dim": 512 | |
| }, | |
| "keypoints": { | |
| "hidden_dim": 512 | |
| } | |
| } | |
| } |