{ "patch_size": [1, 1], "phase_mode": "real_imag", "embed_dim": 32, "depth": 12, "num_heads": 8, "mlp_ratio": 4.0, "same_frame_window": 2, "same_frame_window_h": null, "same_frame_window_w": null, "same_frame_dilation_h": 1, "same_frame_dilation_w": 1, "temporal_offsets": [-4, -3, -2, -1, 1, 2, 3], "temporal_spatial_window": 2, "temporal_spatial_window_h": null, "temporal_spatial_window_w": null, "temporal_spatial_dilation_h": 1, "temporal_spatial_dilation_w": 1, "temporal_drift_h": 1, "temporal_drift_w": 1, "spatial_only": false, "routing_topk_enable": true, "routing_topk_fraction": 0.2, "routing_topk_min": 8, "routing_topk_max": 32, "routing_topk_per_head": true, "topk_neighbors": null, "topk_per_head": true, "global_cls": false, "posenc": "learned", "rope_base": 10000.0, "rope_mode": "flat", "rope_base_t": null, "rope_base_h": null, "rope_base_w": null, "max_seq_len": null }