| { | |
| "aggregator_params": {}, | |
| "aggregator_type": "sum", | |
| "common_params": { | |
| "dropout": 0.1, | |
| "hidden_dim": 512, | |
| "interaction_encoder_enable": true, | |
| "interaction_encoder_ffn_dim": 1024, | |
| "interaction_encoder_n_heads": 8, | |
| "interaction_encoder_n_layers": 4, | |
| "projector_intermediate_dim": 512, | |
| "track_encoder_ffn_dim": 1024, | |
| "track_encoder_n_heads": 8, | |
| "track_encoder_n_layers": 4 | |
| }, | |
| "mm_dim": 1024, | |
| "per_feature_params": { | |
| "bbox": { | |
| "feature_encoder_params": { | |
| "input_dim": 5 | |
| }, | |
| "feature_encoder_type": "motion" | |
| } | |
| }, | |
| "similarity_prediction_head_hidden_dim": 512, | |
| "sph_common_params": { | |
| "hidden_dim": 512 | |
| }, | |
| "sph_per_feature_params": { | |
| "bbox": { | |
| "hidden_dim": 512 | |
| } | |
| } | |
| } |