{ "tr": 1.49, "num_subjects": 4, "num_rois": 1000, "modality_fusion_transformer_num_layers": 1, "modality_fusion_transformer_num_heads": 4, "modality_fusion_transformer_dim": 1024, "modality_fusion_transformer_fuse_mode": "concat", "modality_fusion_transformer_num_projection_layers": 1, "predictor_transformer_num_heads": 8, "predictor_transformer_num_layers": 3, "text_extractor": "Qwen/Qwen2.5-14B", "text_extractor_feature_size": 5120, "text_extractor_dtype": "float16", "text_extractor_num_last_hidden_states": 4, "video_extractor": "facebook/vjepa2-vitg-fpc64-256", "video_extractor_feature_size": 1408, "video_extractor_pool_size": 2, "video_extractor_dtype": "bfloat16", "video_extractor_num_last_hidden_states": 3, "video_extractor_chunk_length_seconds": 16, "video_extractor_batch_size": 8, "audio_extractor_last_layer_index": 2, "audio_extractor_batch_size": 32, "audio_extractor_feature_size": 1536, "architectures": [ "VIBE" ], "model_type": "vibe" }