| { | |
| "patch_size": [1, 1], | |
| "phase_mode": "real_imag", | |
| "embed_dim": 32, | |
| "depth": 12, | |
| "num_heads": 8, | |
| "mlp_ratio": 4.0, | |
| "same_frame_window": 2, | |
| "same_frame_window_h": null, | |
| "same_frame_window_w": null, | |
| "same_frame_dilation_h": 1, | |
| "same_frame_dilation_w": 1, | |
| "temporal_offsets": [-4, -3, -2, -1, 1, 2, 3], | |
| "temporal_spatial_window": 2, | |
| "temporal_spatial_window_h": null, | |
| "temporal_spatial_window_w": null, | |
| "temporal_spatial_dilation_h": 1, | |
| "temporal_spatial_dilation_w": 1, | |
| "temporal_drift_h": 1, | |
| "temporal_drift_w": 1, | |
| "spatial_only": false, | |
| "routing_topk_enable": true, | |
| "routing_topk_fraction": 0.2, | |
| "routing_topk_min": 8, | |
| "routing_topk_max": 32, | |
| "routing_topk_per_head": true, | |
| "topk_neighbors": null, | |
| "topk_per_head": true, | |
| "global_cls": false, | |
| "posenc": "learned", | |
| "rope_base": 10000.0, | |
| "rope_mode": "flat", | |
| "rope_base_t": null, | |
| "rope_base_h": null, | |
| "rope_base_w": null, | |
| "max_seq_len": null | |
| } |