vla4ad / results /va_models.json
worldbench's picture
update through web
01ba831 verified
[
{
"id": 1,
"model": "LBC",
"venue": "CoRL'20",
"input": [
"camera"
],
"dataset": [
"CARLA",
"NoCrash"
],
"vision": "ResNet",
"action": "RL",
"output": "Ctrl.+Traj.",
"category": "Action-Only Models"
},
{
"id": 2,
"model": "Latent-DRL",
"venue": "CVPR'20",
"input": [
"camera"
],
"dataset": [
"CARLA"
],
"vision": "ResNet",
"action": "RL",
"output": "Ctrl.",
"category": "Action-Only Models"
},
{
"id": 3,
"model": "NEAT",
"venue": "ICCV'21",
"input": [
"camera"
],
"dataset": [
"CARLA"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Action-Only Models"
},
{
"id": 4,
"model": "Roach",
"venue": "ICCV'21",
"input": [
"camera"
],
"dataset": [
"CARLA",
"NoCrash"
],
"vision": "ResNet",
"action": "RL",
"output": "Ctrl.",
"category": "Action-Only Models"
},
{
"id": 5,
"model": "WoR",
"venue": "ICCV'21",
"input": [
"camera"
],
"dataset": [
"CARLA",
"NoCrash",
"ProcGen"
],
"vision": "ResNet",
"action": "REG",
"output": "Ctrl.",
"category": "Action-Only Models"
},
{
"id": 6,
"model": "TCP",
"venue": "NeurIPS'22",
"input": [
"camera"
],
"dataset": [
"CARLA"
],
"vision": "ResNet",
"action": "REG",
"output": "Ctrl.+Traj.",
"category": "Action-Only Models"
},
{
"id": 7,
"model": "Urban-Driver",
"venue": "CoRL'22",
"input": [
"camera"
],
"dataset": [
"Lyft"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Action-Only Models"
},
{
"id": 8,
"model": "LAV",
"venue": "CVPR'22",
"input": [
"camera",
"lidar"
],
"dataset": [
"CARLA"
],
"vision": "ResNet",
"action": "REG",
"output": "Ctrl.+Traj.",
"category": "Action-Only Models"
},
{
"id": 9,
"model": "TransFuser",
"venue": "TPAMI'23",
"input": [
"camera",
"lidar"
],
"dataset": [
"CARLA"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Action-Only Models"
},
{
"id": 10,
"model": "GRI",
"venue": "Robotics'23",
"input": [
"camera"
],
"dataset": [
"CARLA"
],
"vision": "EfficientNet",
"action": "RL",
"output": "Ctrl.",
"category": "Action-Only Models"
},
{
"id": 11,
"model": "BEVPlanner",
"venue": "CVPR'24",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Action-Only Models"
},
{
"id": 12,
"model": "Raw2Drive",
"venue": "NeurIPS'25",
"input": [
"camera"
],
"dataset": [
"CARLA",
"Bench2Drive"
],
"vision": "ResNet",
"action": "RL",
"output": "Ctrl.",
"category": "Action-Only Models"
},
{
"id": 13,
"model": "RAD",
"venue": "NeurIPS'25",
"input": [
"camera"
],
"dataset": [
"Private"
],
"vision": "ResNet",
"action": "RL",
"output": "Traj.",
"category": "Action-Only Models"
},
{
"id": 14,
"model": "TrajDiff",
"venue": "arXiv'25",
"input": [
"camera",
"lidar",
"status"
],
"dataset": [
"NAVSIM"
],
"vision": "ResNet",
"action": "GEN",
"output": "Traj.",
"category": "Action-Only Models"
},
{
"id": 15,
"model": "ST-P3",
"venue": "ECCV'22",
"input": [
"camera"
],
"dataset": [
"nuScenes",
"CARLA"
],
"vision": "EfficientNet",
"action": "SEL",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 16,
"model": "UniAD",
"venue": "CVPR'23",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 17,
"model": "VAD",
"venue": "ICCV'23",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 18,
"model": "OccNet",
"venue": "ICCV'23",
"input": [
"camera"
],
"dataset": [
"nuScenes",
"OpenOcc"
],
"vision": "ResNet",
"action": "SEL",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 19,
"model": "GenAD",
"venue": "ECCV'24",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "GEN",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 20,
"model": "PARA-Drive",
"venue": "CVPR'24",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 21,
"model": "Hydra-MDP",
"venue": "CVPRW'24",
"input": [
"camera",
"lidar"
],
"dataset": [
"NAVSIM"
],
"vision": "ResNet",
"action": "SEL",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 22,
"model": "SparseAD",
"venue": "arXiv'24",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 23,
"model": "GaussianAD",
"venue": "arXiv'24",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 24,
"model": "DiFSD",
"venue": "arXiv'24",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "GEN",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 25,
"model": "DriveTransformer",
"venue": "ICLR'25",
"input": [
"camera"
],
"dataset": [
"nuScenes",
"Bench2Drive"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 26,
"model": "SparseDrive",
"venue": "ICRA'25",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 27,
"model": "DiffusionDrive",
"venue": "CVPR'25",
"input": [
"camera",
"lidar"
],
"dataset": [
"nuScenes",
"NAVSIM"
],
"vision": "ResNet",
"action": "GEN",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 28,
"model": "GoalFlow",
"venue": "CVPR'25",
"input": [
"camera",
"lidar",
"status"
],
"dataset": [
"NAVSIM"
],
"vision": "VoVNet",
"action": "GEN",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 29,
"model": "GuideFlow",
"venue": "arXiv'25",
"input": [
"camera"
],
"dataset": [
"nuScenes",
"NAVSIM",
"Bench2Drive"
],
"vision": "ResNet",
"action": "GEN",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 30,
"model": "ETA",
"venue": "arXiv'25",
"input": [
"camera"
],
"dataset": [
"Bench2Drive"
],
"vision": "CLIP-ViT",
"action": "REG",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 31,
"model": "Geo",
"venue": "arXiv'25",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 32,
"model": "DiffusionDriveV2",
"venue": "arXiv'25",
"input": [
"camera",
"lidar"
],
"dataset": [
"NAVSIM"
],
"vision": "ResNet",
"action": "GEN",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 33,
"model": "NaviHydra",
"venue": "arXiv'25",
"input": [
"camera",
"lidar"
],
"dataset": [
"NAVSIM"
],
"vision": "ResNet",
"action": "SEL",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 34,
"model": "Mimir",
"venue": "arXiv'25",
"input": [
"camera",
"lidar"
],
"dataset": [
"NAVSIM"
],
"vision": "ResNet",
"action": "GEN",
"output": "Traj.",
"category": "Perception-Action Models"
},
{
"id": 35,
"model": "DriveDreamer",
"venue": "ECCV'24",
"input": [
"camera",
"status"
],
"dataset": [
"nuScenes"
],
"vision": "SD",
"action": "REG",
"output": "Traj.",
"category": "Image-Based World Models"
},
{
"id": 36,
"model": "GenAD",
"venue": "CVPR'24",
"input": [
"camera"
],
"dataset": [
"OpenDV"
],
"vision": "SDXL",
"action": "REG",
"output": "Traj.",
"category": "Image-Based World Models"
},
{
"id": 37,
"model": "Drive-WM",
"venue": "CVPR'24",
"input": [
"camera",
"status"
],
"dataset": [
"nuScenes"
],
"vision": "ConvNeXt",
"action": "SEL",
"output": "Traj.",
"category": "Image-Based World Models"
},
{
"id": 38,
"model": "DrivingWorld",
"venue": "arXiv'24",
"input": [
"camera",
"status"
],
"dataset": [
"nuPlan"
],
"vision": "VQ-VAE",
"action": "REG",
"output": "Traj.",
"category": "Image-Based World Models"
},
{
"id": 39,
"model": "Imagine-2-Drive",
"venue": "IROS'25",
"input": [
"camera"
],
"dataset": [
"CARLA"
],
"vision": "SVD",
"action": "SEL",
"output": "Traj.",
"category": "Image-Based World Models"
},
{
"id": 40,
"model": "DrivingGPT",
"venue": "ICCV'25",
"input": [
"camera",
"status"
],
"dataset": [
"nuPlan",
"NAVSIM"
],
"vision": "VQ-VAE",
"action": "REG",
"output": "Traj.",
"category": "Image-Based World Models"
},
{
"id": 41,
"model": "Epona",
"venue": "ICCV'25",
"input": [
"camera",
"status"
],
"dataset": [
"nuScenes",
"NAVSIM",
"nuPlan"
],
"vision": "DC-AE",
"action": "REG",
"output": "Traj.",
"category": "Image-Based World Models"
},
{
"id": 42,
"model": "VaViM",
"venue": "arXiv'25",
"input": [
"camera"
],
"dataset": [
"OpenDV",
"nuScenes",
"nuPlan"
],
"vision": "LLaMAGen",
"action": "GEN",
"output": "Traj.",
"category": "Image-Based World Models"
},
{
"id": 43,
"model": "OccWorld",
"venue": "ECCV'24",
"input": [
"camera",
"status"
],
"dataset": [
"nuScenes",
"Occ3D"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Occupancy-Based World Models"
},
{
"id": 44,
"model": "NeMo",
"venue": "ECCV'24",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Occupancy-Based World Models"
},
{
"id": 45,
"model": "OccVAR",
"venue": "-",
"input": [
"camera",
"status"
],
"dataset": [
"nuScenes",
"Occ3D"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Occupancy-Based World Models"
},
{
"id": 46,
"model": "RenderWorld",
"venue": "arXiv'24",
"input": [
"camera"
],
"dataset": [
"nuScenes",
"Occ3D"
],
"vision": "Swin-T",
"action": "REG",
"output": "Traj.",
"category": "Occupancy-Based World Models"
},
{
"id": 47,
"model": "DFIT-OccWorld",
"venue": "arXiv'24",
"input": [
"camera",
"status"
],
"dataset": [
"nuScenes",
"Occ3D"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Occupancy-Based World Models"
},
{
"id": 48,
"model": "Drive-OccWorld",
"venue": "AAAI'25",
"input": [
"camera"
],
"dataset": [
"nuScenes",
"Cam4DOcc"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Occupancy-Based World Models"
},
{
"id": 49,
"model": "T³Former",
"venue": "arXiv'25",
"input": [
"camera",
"status"
],
"dataset": [
"nuScenes",
"Occ3D"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Occupancy-Based World Models"
},
{
"id": 50,
"model": "AD-R1",
"venue": "arXiv'25",
"input": [
"camera",
"lidar",
"status"
],
"dataset": [
"nuScenes",
"NAVSIM"
],
"vision": "-",
"action": "RL",
"output": "Traj.",
"category": "Occupancy-Based World Models"
},
{
"id": 51,
"model": "Covariate-Shift",
"venue": "arXiv'24",
"input": [
"camera",
"status"
],
"dataset": [
"CARLA"
],
"vision": "DINOv2",
"action": "REG",
"output": "Traj.",
"category": "Latent-Based World Models"
},
{
"id": 52,
"model": "World4Drive",
"venue": "ICCV'25",
"input": [
"camera"
],
"dataset": [
"nuScenes",
"NAVSIM"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Latent-Based World Models"
},
{
"id": 53,
"model": "WoTE",
"venue": "ICCV'25",
"input": [
"camera",
"lidar"
],
"dataset": [
"NAVSIM",
"Bench2Drive"
],
"vision": "ResNet",
"action": "SEL",
"output": "Traj.",
"category": "Latent-Based World Models"
},
{
"id": 54,
"model": "LAW",
"venue": "ICLR'25",
"input": [
"camera"
],
"dataset": [
"nuScenes",
"NAVSIM",
"CARLA"
],
"vision": "Swin-T",
"action": "REG",
"output": "Traj.",
"category": "Latent-Based World Models"
},
{
"id": 55,
"model": "SSR",
"venue": "ICLR'25",
"input": [
"camera"
],
"dataset": [
"nuScenes",
"CARLA"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Latent-Based World Models"
},
{
"id": 56,
"model": "Echo-Planning",
"venue": "arXiv'25",
"input": [
"camera"
],
"dataset": [
"nuScenes"
],
"vision": "ResNet",
"action": "REG",
"output": "Traj.",
"category": "Latent-Based World Models"
},
{
"id": 57,
"model": "SeerDrive",
"venue": "NeurIPS'25",
"input": [
"camera",
"lidar"
],
"dataset": [
"nuScenes",
"NAVSIM"
],
"vision": "VoVNet",
"action": "SEL",
"output": "Traj.",
"category": "Latent-Based World Models"
}
]