[ { "index": 0, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/00a80ab64403420eb629044965b386c0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Tall ninja monkey with a long tail is beating drums with drumsticks in both hands. ", "A dark-brown monkey with drumsticks jumps, twists and hits a big wooden drum.", "A dark brown monkey with drumsticks in its hands jumps and beats a big wooden drum", "A dark human legged monkey holding drumsticks in its hands jumps and beats on a big wooden drum.", "A green-and-brown monkey with a long tail hops around an axis and beats a drum." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/00a81e40abb74c61bffe34e37516d48e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Brown fox with steady ears looking right and left. ", "A 3D model of a red fox with black ears turns its head from side to side and wags its white, fluffy tail.", "3D model of a red fox with black ears turns its head left and right and moves its white fluffy tail.", "3D model of an orange fox with black legs and a white tail tip moves its head and tail left and right at different paces.", "3D model of an red fox with a puffy tail with a white end stands and shakes his head to the left and right side while wagging his tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 2, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/00b45680f31d47819cee2bab91643195", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Bearded man with blue trucker hat and a flannel shirt with pair of jeans walking", "A 3D Male in a red checked shirt,blue jeans,yellow boots and blue and white cap walking in different directions.", "3D model of a bearded man in a red flannel shirt with rolled up sleeves, baseball cap, blue jeans and yellow boots walking in different directions.", "3D model of a bearded man wearing a blue and white cap, a red flannel shirt, blue jeans and light brown boots is walking in four directions.", "3D brutal man with a beard in a blue cap, red plaid shirt with rolled up sleeves and blue skinny jeans walks with a calm step." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 3, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/00cc1d22bb1249d49435b5e14ab22fa0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Pink wreaking ball smashing into a stack of gold bars . ", "A red chained ball crashes on a cube shaped golden bricks placed on a black and white patterned floor.", "Red Wrecking Ball on a metal chain smashes a wall of golden bricks", "A red wrecking ball with black chains swings into a big brown cube sitting on a metallic surface that scatters into smaller cubes after being hit.", "A red wrecking ball on a black chain breaks a square that consists of a bunch of little cubes that stand on a shimmering patterned floor and breaks into many cubes." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 4, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/00f1d24701bd4b80bb191a3cdf4fbee5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Blank book with leather cover opening and closing. ", "A 3D model of a red leather cover standing book which opens and closes its pages.", "3D model of an old book in a red leather cover opens and closes in the middle.", "A red book with a glowing blue swirl icon on one side and a bookmark sticking from the bottom opens up.", "3D model of a red book with patterns around the perimeter of the book.In the middle with a glowing logo opens and closes." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 5, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/00f38b09026146f4a9ac299ea0c7cdf1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A woman wearing a pair of combat pants and a tank top throwing a punch. ", "A 3D model of a woman in a black crop top, military pants, black gloves, a gray cap, and brown boots is throwing a left hook.", "3D model of woman with short hair in grey cap, short black top, black gloves and military pants makes left hook.", "3D woman wearing a grey beanie, a black tank crop top, camo pants and brown shoes thows a left hook.", "A woman with a short haircut in a gray hat, dark top, sports gloves, camouflage pants and dark boots trains kicks." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 6, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0a0b504f51a94d95a2d492d3c372ebe5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Black puppy with white nose wiggling its tail. ", "A 3D black and white puppy standing while waging its tail.", "Black and white puppy stands on four legs and wags its tail", "3D model of a black dog with a white muzzle and white paws is wagging its tail.", "A dog of black color with white spots on the muzzle, front and back legs is standing straight and waving his tail to the left and right." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 7, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0a1be4094d844d72b225de98da809b02", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A green turtle with a blocky-texured brown shell is slightly moving. ", "A 3D model of a green turtle with a brown shell swimming.", "3D model of a green turtle with a deep-brown colored shell is making swimming moves.", "3D green turtle with a brown shell is moving its front limbs.", "A green turtle with a brown pancreas moves its paws front and back." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 8, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0a2f7e9f8aa54c4291087d65f16e20e8", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A fighter wielding a sword and a lamp busting a move. ", "A 3D model of a character cartoon, dressed in purple clothes with blue eyes and a feathered hat, is attacking and defending with a sword.", "3D model of the character from the middle-eastern cartoon Aladdin in purple clothes with blue eyes and a hat with feathers, fencing with a sword.", "A cartoonish 3D model wearing blue clothes and a dark blue turban with a feather, and holding a sword with a lamp attachet to its base, dashes right then makes a front flip. ", "3D cartoon magician with blue eyes, a large round hat with a feather in a suit of blue color, holding a sword and and practicing blows." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 9, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0a3c5b95993f499ea615fa97193d0533", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a female dancer with a high blonde ponytail is jumping up and down. ", "A female 3D model covered in purple and white mesh, wearing VR glasses, is warming up.", "A female 3D model covered by purple and white mesh wearing VR glasses is warming up.", "3D model of a woman covered in white and purple mesh is warming up and shadow boxing.", "A female model with yellow hair gathered in a ponytail covered with a net of black, purple colors with purple glasses jumping and practicing blows." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 10, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0b9df8b7d0db4093b7a94746b55a4d88", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A big monster with a slump posture walking. ", "A 3D gray troll with blue horns and big hands, dressed in black jeans, runs around in different directions.", "Gray 3D model of a troll with blue horns and giant hands in black jeans runs in different directions", "3D gray monster in dark blue jeans with a big right hand with blue glowing claws, and a blue glowing blade for a right hand is running. ", "A brown-gray mutant in blue pants with large arms with blue stones sticking out of them and a large sword sticking out at the end of his left arm is running." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 11, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0b966f2d42214c1e94d0bfbb74bb6ab0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of two dark metal balls on a rich-brown wooden pedestal are moving each in the opposite direction, demonsrating the laws of physics. ", "A 3D model of a brown wooden tool used to show how the famous brachistochrone curve works.", "A 3D model of a brown-colored wooden apparatus is demonstrating some of the properties of the Famous Cycloid Curve.", "3D model of a brown wooden tool with two metal balls sliding from each side conducting a physics experiment.", "A 3D model of a brown wooden apparatus demonstrates the laws of physics and rolls 2 balls through the force of gravity." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 12, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0b83526973a447d1a162d3d30015277d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Animated baby reindeer with cute button-like eyes. ", "A 3D model of a dark-brown cartoon moose with yellow horns and a red nose is happily stomping its limbs.", "A dark-brown left-leaning moose with yellow horns and a red nose walks in different directions.", "dark small cartoonish moose with red lips and disproportionate anatomy walking happily.", "A small black moose with yellow horns, big eyes and red lips. Walks with his body tilted to the left and his head wagging." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 13, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0bffc9d297954510a0742d7d2adca87d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An animated bearded man with brown hair, red beanie and a flannel shirt is wielding an axe in his right hand while running. ", "A 3D model of a bearded man in a red hat, a casual red checked shirt, blue jeans and yellow boots is running, smiling and carrying a gray axe.", "A 3D model of a happy man with dark-brown beard and hair, in a bright red hat, black and red flannel shirt, dark-blue jeans and yellow shoes carries an axe in his hand.", "3D model of a bearded man wearing a red beanie, a red flannel shirt, blue jeans and light brown gloves and boots is running happily with an axe in his right hand.", "3D model of a cartoon man with brown hair, big black eyes, beard, wearing a red hat, black checkered shirt with short sleeves, brown gloves, holding a gray axe and running." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 14, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0c83e28627b247e194212d9fbe119ad2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Man with an over coat and a hat, drawing his hands from behind. ", "A 3D model of a man wearing black trench coat with maroon belted, hat, white shirt, black tie, putting his hand behind and back to front.", "A secret agent in a black coat, hat, white shirt and black tie takes something out from behind his back.", "3D Model of a man with ginger hair wearing a black hat, a black trenchcoat, gray pants and black shoes pulls out an invisible firearm from his back", "A tall man with gingrt hair, wearing a dark hat, dark coat with a red belt, gray pants stands bent and pulls an imaginary gun out of his back pocket." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 15, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0c655c7993134f5fb8e8173f3285145f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dog wearing egyptian style regalia and holding a royal crook in one hand. ", "A 3D model of a cartoon dog character is shown. The dog is wearing a gray helmet, a red dress made of fabric, and gray metal armor. It is walking while holding a scepter in its right hand.", "3D model of the game character the god Anubis - a dog in a protective gray helmet, a red fabric dress and gray metal armor walks with a scepter in its right hand.", "A bipedal dog waering egyptian style clothes ,protective armor, a backpack and a magic staff is walking", "Egyptian dog of black color with protruding ears in obummering, holding a weapon in his hand goes straight ahead" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 16, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0c23248fb5be4bc585f26e31ff18d837", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Spherical models moving down a model ramp that is triangular and supported by a trapezium model at the back", "A 3D model of two brown cylinders that are sliding slowly downwards the other woodens.", "Two wooden brown cylinders are sliding down from two wooden slides.", "A 3D model of a wooden cylinder is sliding down two wooden objects of which one is a triangle.", "The 6-piece brown design shows how gravity works where 2 cone-shaped objects roll away" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 17, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0ca10f4929a04a5b894d445caa7920c1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown wooded wardrobe opening and closing both doors. ", "3D model of an empty brown wooden wardrobe which is opening and closing its doors.", "Dark brown empty wooden wardrobe opens doors.", "Dark wooden wardrobe with 3 compartments inside opens up.", "3D model of a brown wooden cabinet with 2 doors opens and closes the door in different sequence" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 18, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0cc299d9026c4ec19f2ce44555e85272", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A model of a man wearing tactical attires and wielding a gun. ", "3D model of a dark skinned man wearing military uniform holding a weapon and being vigilant all round. ", "3D model of a man with dark skin in white clothes holds a weapon and looks around.", "A man in a ski mask and tactical gear is holding a shotgun and looking around.", "A 3D model of a dark-skinned man in a light-colored suit with a gun in his hands looks around anxiously" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 19, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0d532cfcae7c46239e878034a9e28fb5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A grey bunny with long ears moving up, sideways and down. ", "A 3D model of a grey fuzzy rabbit with pink ears twitching its whiskers, standing up on its back legs and looking around.", "A grey fluffy rabbit with pink ears twitches its whiskers and rises up on its hind legs.", "A gray bunny with twitching whiskers stands on its hind legs, looks to the left, then lands back on all four", "Ag rey rabbit stands on two hind legs, turns his head in different directions wagging whiskers" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 20, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6beedfe1cb3d4158ab4c9d7cb3241363", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young Afro-American girl with short brown hair, purple t-shirt and blue leggings is doing crunchies.", "A woman wearing a blue trouser,a red belt and a purple top with shaggy hair doing sit-ups", "3D model of a dark skinned woman in a purple top and blue jeans doing pumping up.", "3D model of a woman wearing a purple top, blue pants and a red belt doing sit-ups. ", "A dark-skinned woman with short black hair in purple T-shirt, blue jeans pumping abs" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 21, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6ceb6e355cad4e029fcbf2b9cd3c7e19", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An angry-looking man with brown hair and a mustache, wearing a brown suit, white shirt and a brown tie, is having a phone call while looking annoyed, throwing his right hand in the air.", "A man with glasses, gray suit, black tie, white shirt and black shoes explaining something on a mobile phone.", "A man with dark skin with a short haircut in yellow glasses in a dark gray suit and a black tie is talking on the phone.", "3D model of a man with a mustache wearing a gray suit, a black tie and black shoes is arguing on the phone .", "A man with short black hair, wearing glasses and a brown suit, holding a phone, waving his arms and shouting angrily into the phone." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 22, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6ced1774c5534b5f9320c2db930affe2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A beaten up, bloody soldier with a beard and a hat, wearing torn military uniform and knee-high boots, is slowly walking like a zombie.", "A bloody police zombie wearing torn uniform with one shoe, staggering forward slowly.", "Zombie 3d model of a dead soldier in a grey uniform in blood goes forward.", "Zombie soldier with bloody, torn off clothes is missing a shoe and walking slowly.", "A man in a military uniform covered in wounds and blood walks forward like a zombie." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 23, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6d5b1f9d65a74fe792370446b17a5129", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A blue bird with its wide and long wings spread out is flying. ", "An odd-looking purple hawk with grey tail and wide-spread wings is soaring through the sky.", "A grey violet eagle spreads wide wings and flies.", "A 3D model of a blue-ish grey and purple mythical bird is gliding with its wings spread out.", "A strange violet-coloured bird with tattered long wings levitates." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 24, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6d7dd4949426430d886906875af776bc", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A girl with long brown hair, dressed in a blue shirt and jeans, is dancing happily. ", "A girl with mid-length brown hair, blue t-shirt and jeans is moving hands and legs in patterns while making bouncy dance moves.", "A young female with messy chestnut-colored hair and pale skin, dressed in a blue t-shirt and dark-blue denim jeans is dancing witth her hands up moving side to side.", "A girl with long brown hair, wearing a blue t-shirt, blue jeans and white shoes is dancing.", "A woman with long brown hair, wearing a blue T-shirt, blue jeans and sneakers is dancing, actively moving her arms in different directions and turning around." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 25, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6d78fe9cec03483ba6d3e4ff30dc6265", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A fat pink pig with a twirly tail is moving its head. ", "A pink pig looks to its left side while moving its right hind leg ,then looks forward while moving its front legs and then looks down", "A realistic-looking pink piglet is sniffing and looking around. ", "A pink pig looks to its left, then looks down.", "Pink-colored adult piglet moves its front legs, turns its head from side to side and tilts it down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 26, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6d54580ee627474d9e462a75b509463d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A girl with punk like clothing and purple hair dancing. ", "A girl with purple short hair with bracelets on both arms is dancing.", "3d model white skinned girl with short purple haircut in latex green dress gray jacket and rubber green boots is dancing", "A girl with pink hair wearing a grey vest and, a leather green dress underneath, black leather pants with a green stripe on each side and green boots is dancing.", "Woman with short purple hair in a green suit, green striped dress with green shoes dancing hip hop" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 27, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6dd82d65d7b946f796cafc3eea1815b9", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A doll like girl with long tail flapping its tail. ", "A 3D model of a cartoon character wearing white with light green sweater top,purple skirt and white-purple boots wagging its purple and orange long tail.", "cartoon anime character 3d model with purple hair, orange ears, in white blouse and purple skirt waving long purple-orange tail", "An anime cat girl with purple hair, orange ears, wearing a white top, a purple skirt and purple, green and white shoes, is slowly wagging her purble and orange tail. ", "Cartoon girl with wings on her head and purple hairstyle, wearing a white T-shirt, purple skirt, white sneakers, with a long tail sticking out in orange and purple colours, standing still and waving her tail" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 28, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6e0c872483164267b9b581cb691bc601", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A tribal warrior with grey boots carry a spear on his shoulder. ", "A 3D representation of a warrior adorned with black short hair,a yellow scarf, shoulder armor, a brown kilt featuring yellow and purple designs, and metallic boots, is gently tapping his spear against his left shoulder.", "3D model of warrior from the game in animal print armor and metal gray boots holds a spear on his left shoulder", "3D model of a warrior wearing a yellow donut shaped scarf, shoulder armor, a brown skirt with yellow and purple patterns, and metal boots is lightly tapping his spear on his left shoulder", "3d model of a man with short black hair with a circle on the neck, wearing a red skirt, high grey boots with a spear in his hand moving a spear" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 29, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6e12ceb244e74cc792941362fabf7f54", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A panda wearing black shades waving its hand. ", "A black and white panda cartoon wearing sunglasses waving its right paw.", "3D model black and white panda in black sunglasses waving right paw", "A low poly 3D panda with sunglasses is waving its right paw", "3D model of a large plastic panda with black and white coloured glasses, right hand waving" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 30, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6eb4190c3ca847c8853226c90d1ea37a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Empty red nike shoe box opening up. ", "An empty red-white Nike box with price tag labelled 9.5 opens up.", "red and white cardboard shoe box from Nike opens the top lid", "red and white Nike box with an orange pricetag opens up", "Red coloured nike trainers box with white logo opens lid top up " ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 31, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6edcc1ba283b408480b99121e02d1cfd", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A girl with tribal tattos on her arms, wearing a red dress, dark shoes and dark sheer tights is moving from side to side. ", "A young girl with a short pigtail in a red off-shoulder dress, brown tights and black dolly shoes is dancing.", "white-skinned girl with black hair in a red dress, black leggings and black slippers dancing Capoeira", "3D girl wearing a black ribbon with white polka dots, a red dress, dark rown leggings and black slippers is doing a capoeira move.", "A girl of white colour with black hair in a ponytail with a bow, in a white dress with black tights and black shoes with a tattoo on her left arm is practicing left and right movements" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 32, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6efcfd71eb54471da0ddeccc79336d80", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A white goat opening its mouth. ", "white goat with long horns is bleating while moving its legs slowly backwards.", "A white hairy goat with two long horns is screaming.", "A white goat with horns bleats.", "A white coloured adult goat with horns stands upright and makes sounds by opening its mouth and stretching its neck, taking a step backwards" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 33, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6emFKCba4CdnOdbefGCoK2sF00o", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown rusty metal heating gun is exploding. ", "A stun spear, post-apocalyptic weapon, with bits of rusting which is producing dark sparks with bluish sports.", "A metal brown post apocalyptic gun shoots", "A dark colored compass-looking gun with wires loosely wrapped around it charges up a black ball.", "a brown coloured metal device with wires sticking out at the end of it from which a large black ball appears and it grows and shrinks" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 34, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6f2fef837677482a9693bd44aec4eb0e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A yellow queen bee flapping its wings. ", "A yellow bee with black strips that is tail-wagging and turning its head slightly.", "A black and white wasp with white wings moves its black whiskers and wings", "a bee slightly turns its head left then flutters its wings.", "a large wasp with large wings stands upright and wiggles its wings and antennae" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 35, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6f8133161f66462d9474bfbc124b7470", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A grey electric kettle opening its lid and closing. ", "A black electric water jug positioned on a gray surface and plugged into a gray wall behind it, activates and opens itself, then glides toward the wall and aligns itself..", "A metal electric kettle is charged from the socket and opens the top lid", "A black electric kettle sitting on a gray surface and plugged to a gray wall behind it opens up, then slides towards the wall", "The kettle is black with a wire sticking out of it and it is connected to a socket, which is mounted in a grey wall, opens the top lid and moves backwards" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 36, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/07a2a1e5782d4302b5610a93084faf9d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A mystic girl with a jet pack on her back flying. ", "A silver-haired heroine clad in a streamlined, advanced white bodysuit, fitted with a corresponding propulsion device, ascends into the air as she strives to achieve flight.", "A model of a female warrior from the game in white spacesuit with white hair trying to fly", "White haired girl wearing a white futuristic jumpsuit and a white jetpack flies up.", "A female superhero with white hair, white costume with metal wings on the back, sits down and takes off" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 37, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7a5b5f516881457a941937fc667fc61d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A grey shark wiggling its tail. ", "A silver and white shark with a glossy finish is gliding through the water.", "A grey smooth shark with large fins swims", "a gray and white hark with shiny texture is swimming. ", "3d model of a grey shark swimming" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 38, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7a9f67c3d0584869927fbbfe85194187", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A huge brown teddy bear looking side ways. ", "An individual wearing a brown bear suit and a bow tie is sitting on the floor, gazing to the right and then to the left.", "brown teddy bear sits on the floor and looks around", "A person wearring a brown bear suit with a bowtie is sitting on the floor and looks right then left.", "A brown coloured toy bear with a bow on its neck sits with its hand on its knee and turns its head" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 39, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7a40aa9ed1304cf0b20441767c60c0c7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A woman wearing brown attire taking huge steps. ", "A 3D representation of a female figure with black hair, dressed in a brown top and matching brown pants, is depicted strolling while wearing bright red shoes.", "3D model of a woman with black long hair in a leather vest and leather pants walks straight", "A long black haired woman wearing a sleeveless brown leather jacket, brown pants and dark red shoes is walking", "A woman with long black hair wearing a brown T-shirt and brown trousers walks straight with her legs held high." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 40, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7a69e9d4cf3f454f85262e6e057630f0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Green fairy with wings flying in the air. ", "A petite green fairy, adorned with delicate green wings and long, flowing antennae, is gracefully flying through the air.", "Green Little Fairy with green wings and long whiskers flies", "A Green fairy with long antennae and fluttering wings is flying.", "Green coloured cartoon fairy with high whiskers, wings, flapping wings and levitating up and down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 41, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7af0f723f1444ef399221c265d149be6", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A blue robot walking. ", "A 3D model of a metallic blue robot, featuring gray hands and legs and equipped with an antenna on its back, is advancing forward.", "Metallic blue robot 3d model with antenna goes forward", "A blue robot with gray hands and legs and an antenna on its back is walking.", "The robot is blue with grey elements, antenna on the back and walks forward in a free-walking gait." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 42, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7b9b151a336f4edcb9163a4d4bd06607", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown peg opening and closing. ", "A wooden clothespin in a rich brown hue, featuring a metal spring in a sleek gray finish, is in the process of opening.", "A wooden brown clothes pin with metal grey spring is opening and closing.", "A wooden clothespin with a metal spring opens then closes.", "A brown coloured wooden clothes peg opens and closes." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 43, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7b14c3f36ad14010a0e4ba481282a69a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A turtle like creature with stone head dancing. ", "A 3D model of a game character, rendered in a muted gray color, is equipped with a sturdy brown protective shell.", "3D model of a character from a game in gray with a brown protective shell", "3D model od a bipedal creature with a thick rectangular strands of hair pointing up, no face, and limbs with a greenish hue is wearing a wooden shield on its back and wiggling around.", "3D model game character with a grey body and hair in the shape of a pineapple with a wooden pineapple mask with the image of dancing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 44, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7b0571072bbc445a9b10a1eafa2df6ed", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A round white tin box made out of metal, with pink and black stripes and with robotic golden metal legs, is walking.", "A white-pink-balck cartoon tin written'can it corporation'is moving forward while its open lid swinging up and down.", "A metal tin can with a yellow key moves forward on its legs", "Tin can monster with a golden key coming out of it, golden robot feet,red round eyes and sharp fangs is walking.", "A white coloured metal tin can with an open lid in the shape of a gear, with a lock and legs, with eyes and a mouth on the front, turns a key and moves its legs forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 45, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7beb8a109697489e884f6455b2e856be", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown dragon with elongated tip on the back of its head and sharp pointed tail flying", "A brown Zhenyuanopterus with orange spots and black neck is flying.", "A brown pterodactyl with large wings flies", "a light gray pterodactyl with orange patters beats its wings strongly.", "A brown-coloured pterodactyl with long wings, flying rapidly downwards, then spreading its wings to the side." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 46, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7c09afd1dd604455ace96edd4243b5e7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A chinese porcelain pot with decorative flowers has its lid opening up.", "A blue-white flower teapot opens its lid going upwards.", "A porcelain white and blue teapot with blue flowers opens lid.", "a white porcelain teapot with blue flowers on it sits still while its lid lifts up. ", "White teapot with blue patterns around the perimeter and a lid that lifts upwards." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 47, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7c75022d219c433da27b0df4e7813209", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A wooden puzzle being disassembled. ", "A wooden cross-like shape detaches itself one by one to various directions. ", "3d model of wooden square brown elements spinning in the air", "a light and dark brown wooden construction falls diagonally then disassembles itself piece by piece.", "Wooden construction set in the shape of squares and rectangles, falls down, and falls apart into different parts." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 48, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7cddac29d2644be4abdec7e558450991", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dragon statue in color red with a white-colored belly and a long, pointy tail is moving its head left to right. ", "A red dragon with white belly standing with its hind legs,looking around while wagging its tail.", "A red smooth lizard with a white belly stands on a round platform and looks around", "A red wingless dragon with a white belly is standing on both legs on a round platform and looks around.", "A cartoon red coloured dragon with a white belly that stands upright and turns its head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 49, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7d1db28f62944a86953a35b57c39cacd", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A wooden sea saw toy with spiny wheels and a pair of wooden dolls, dressed in multi-colored clothes and brown hats, are moving up and down. ", "A brown wooden seesaw with balck wheels.two male cartoon sitting on top balancing their weights.", "wooden toy vintage men sit on a platform on wheels", "two wooden puppets wearing pink and blue top are playing on a wooden balance swing with wheels.", "A toy for children who sit on a wooden swing on wheels with 2 wooden men in caps, blue and pink T-shirts, swinging." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 50, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7d64d8f3bbd14de68d4537d0998a41c3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a young male with chalk-white mid-length hair, wearing a brown leather jacket, white pants and shoes, is laying on his back and slightly moving his torso up.", "A man with brown jacket and white jeans and boots lying on the ground with his both hands holding his stomach lifting his upper body only.", "3D model of Anime man with silver hair, in a brown jacket and white pants Lies on his back holding his stomach.", "A white haired man wearing a yellow jacket, white pants, and whote shoes is laying on his back with his head lift up and hands on his belly.", "A young man with short grey hair in a brown jacket, with grey legs, with high grey shoes, lying down and raising his neck up." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 51, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7d596e87658b41c6861f2a6d9ccd4fe0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A white robo-hand made out of metal, stationed on a purple platform, is stretching out, slowly turning and moving up and down. ", "A white and black robot arm rotates on its axis.", "a metallic black and white robot arm spins on a round purple platform", "A white and black robot arm on a purple round surface rotates slightly while moving around.", "A white robot with black tip elements stands on a purple platform and moves its parts in different directions." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 52, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7dde986b68834de6b5a9deff6819d3f1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young blonde-haired man in a multi-clored striped shirt, brown shorts and black vans is playing a guitar enthusiastically.", "A white skinned male with checked shirt and brown short is playing a guitar.", "a man with yellow hair in a Hawaiian shirt and brown pants plays the bass guitar", "A blonde man wearing a white shirt with red stripes, beige shorts and black shoes is playing the guitar.", "A young man with green hair in a striped shirt, brown shorts, black shoes, holding a guitar and playing it shaking his head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 53, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7e8cc1728cfd455cba6c27354304c851", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An orange baby dragon with a long tail and black horizontal stripes is breathing steadily and slightly wiggling its tail. ", "A red and black stripes Growlmon with white pawsand belly dancing its tail.", "red cartoon dinosaur with black stripes and yellow eyes moves tail", "A blonde man wearing a white shirt with red stripes, beige shorts and black shoes is playing the guitar.", "Cartoon dragon red with black stripes, yellow eyes stands swaying and wagging its tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 54, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7ec522873aba405485ba75ec9841dcec", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young lady with short red hair, wearing purple-toned gym clothes, is dancing and throwing her right hand up in the air.", "A 3D model of a female wearing blue jumpsuit is dancing hiphop barefoot.", "with short brown hair in a blue top and blue leggings dancing and waving her right fist", "A red lizard digimon with devil wing like ears and black markingsis moving while on standby.", "Girl with short brown hair, blue T-shirt, blue jeans, relaxing dance with her hands in the air." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 55, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7ed0516cba434c8ca230f6d9645ff0f3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An orange wooden guitar on a metal stand is spinning round. ", "A brown-yellow and pink guitar is rolling all round on top of a black round object.", "wooden brown guitar with four black strings and brown black strap spins on a round platform", "A beige, yellow and pink guitar with a brown belt held up by a metal pole on a black round surface does a 360 spin", "3d model of a guitar of yellow colour with a coloured print stands on a stand of brown colour, on a metal support and spins." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 56, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7f3cdc053c064387802b6f4eefeaa97a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A well built man with huge muscles wearing a green shirt is looking over his shoulder.", "A muscular male wearing green and black striped t-shirt and a brown trouser looking left,right and forward while standing still.", "man with short brown hair in green striped t-shirt and brown pants looking around", "A man in a black striped green V shape t-shirt, brown pants, and brown shoes looks around.", "A large man with brown short hair, dressed in a striped green T-shirt, brown trousers, and boots, stands alert and looks around." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 57, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7f69215ba47e40bf8c8e6c18d8397d36", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dark-colored machine gun with a spinning noozle is turning left and right. ", "A Heavy Machine Gun Turret,shoots different directions.", "metallic dark gray machine gun from the game shoots to the sides", "A gatling gin spins on a tripod while its barrels spin as well.", "The metal weapon stands on three legs with a scope and turns around in different directions." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 58, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7fb1496976d34f258be5d50616db5c39", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "White cookie-shaped blocks are falling into a clear glass jar with its lid closing shortly after. ", "3D open glass bottle with gray lid lying on one side while the bottle is filled with elements falling down and then the lid closes itself.", "A glass jar with a golden metal lid has some gray square elements with a golden dot in the middle fallin into it.", "yellow dotted white bags fall in a jar which then closes up with a dark brown lid.", "Round capsules of white colour with yellow dots in the middle fall into a transparent jar with a metal lid, the lid is closed." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 59, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7fc0d6658dbc4d4a850c568d915295f7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young mermaid with long red hair, green tail and purple sea shells as a bra is wielding a trident. ", "A 3D princess mermaid with long red hair,purple corset and green tail holding a spear. ", "mermaid ariel with red long hair with green fish tail holds golden spear in her hands", "A red haired mermaid with a green tail moves her tail while holding a goden trident.", "A character from the cartoon \"The Little Mermaid\" with red hair and a green tail holding a trident and wagging her tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 60, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/008b75a0c8df44bfa1410c23f9d291e2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A skeleton is standing still and slowly flipping its right hand as if inspecting it. ", "3D model of a skeleton ,twisting its left hand.", "half of the human skeleton turns over the right hand", "A 3D model of a skeleton with its hand lifted spins its forearm.", "An upper half of a white-coloured skeleton moves its right limb." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 61, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8a04bc92e42e48efa80f1235e07818d3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A faceless man with only a nose standing up. ", "A white-skinned toddler wearing a blue loper looking all around.", "little boy with brown short hair in blue swimsuit looking left and right", "A brown haired child with no eyes or mouth wearing greenish gray onesie looks around.", "Model of a boy without face, with short brown hair in an emerald bodysuit looks around and withdraws his arms." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 62, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8a9b14f2660947e68de1a42687bb708a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A strange man wearing the money heist costume is doing a Fortnite dance. ", "A 3D man wearing a money heist face mask with red overall,black t-shirt and black boots dancing energetically", "Man in a red jumpsuit with a Dali mask is breakdancing", "Man in a red jumpsuit and a salvador dali mask does a shuffle dance", "A character from the film named Paper House in a red suit with a Dali mask dances to the beat with his hands and feet." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 63, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8a46aaaafc1643a68be9ee736db40fdb", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A wooden bird standing on a wooden peg with a bowl infront of it. ", "A brown wooden clothes pin with a bowl placed at the center and a yellow bird dipping it beak in it.", "a wooden brown clothespin with a yellow bird on it opens and closes", "A wooden clothespin featuring a yellow bird and a cup opens up as the bird dips its beak in the cup.", "The yellow coloured wooden bird is attached to a brown coloured wooden clothes peg which opens and closes." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 64, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8aed3493e0784874ad700cc1cee9c0bd", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A big and strong tiger is wiggling its tail and making a roar. ", "A 3D model of tiger with a orange and brown stripes growling and roaring. ", "orange tiger with black stripes wags its tail", "A faraway orange tiger aith sharp black stripes lowers its center of gravity, then roars.", "A big tiger standing there growling and wagging his tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 65, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8b0efd3c81ba4b63a60863f47b6b0850", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A tired soldier with green combat clothes walking. ", "A vietnam war soldier wearing jungle green uniform and brown boots walking very fast. ", "3D model of a male military man in a green uniform walks forward", "a soldier wearing a green helmen and a green uniform sakes a auick step forward then slumps.", "3d model of a man in a military uniform walking upset with his body lowered." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 66, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8b2b7bf7334e4882b92337a74bfcfb93", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A leopard moving its head. ", "Amur leopard with its moth open looks around while moving its front legs.", "an orange leopard with brown and black spots wags its tail and looks around", "A yellow leopard with brown markings lifts its head and looks around while Its head warps weirdly at times.", "3D model of a panther looking around and wagging its tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 67, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8b3cc80356474b569b8d442d79bb2ff2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A tiger runnning with its mouth opened wide. ", "A orange with brown stripes leopard is running.", "an orange leopard with brown and black spots opens its mouth and runs", "Light yellow leopard with dark brown markings runs.", "The evil panther is running at a fast pace" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 68, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8b29d94e25744685870d623065f48026", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A white chair spinning round and round. ", "A wooden white chair with brown flames and legs is rotating all round.", "white leather chair with wooden legs spins in a circle", "A white leather chair with brown wooden legs spins around.", "3D model of a white coloured armchair with wooden legs twists on an axis" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 69, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8b68b1cd0df44b89b43135ad59f4fd5b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man wearing a black suit throwing punches and kicks.", "A man in a sleek black suit and shiny shoes, paired with a crisp white shirt, is expertly defending himself with precise karate moves.", "a half-bald man in a black suit fights", "A bald man in a black suit defends and attacks as if he were fighting somebody.", "A man with a short haircut, beard, black dinner jacket, black shoes, practising fighting moves" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 70, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8b947ee1bf7a4e3d8ffa1c24893ac160", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dark-colored RTX graphics card has its fan spinning. ", "A black GeForce RTX 3080 spins its fan.", "A black metal fan in black metal construction spins.", "A black graphics card stands while its built in fan spins.", "Black coloured metal rectangular fan rotates." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 71, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8bb593fde9f84713b7d92c28bb333d86", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A tall, bearded, brown-haired man, dressed in a brown west with gun pouches, beige shirt, jeans, brown shoes and black fingerless glowes is standing still in a ready-to-fight position. .", "A male wearing white shirt ,gray half coat,denim jeans and brown shoes stands still while he breathes.", "A man with black hair in a white bandage, in a white sweater, black vest and gray pants stands with right leg forward.", "A bearded man wearing a beige headband, a beige shirt, a black sleeveless vest, guns in holsters on each side attached by straps on his shoulders, blue jeans andblack shoes is on standby.", "A character from the game with brown hair, beard, bandana, beige sweatshirt, brown waistcoat, blue jeans, dark shoes is standing in a stand wary." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 72, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8bf27d0842594c9eaf063bcb8a86859b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A strong soldier man, wearing a combat uniform, is holding an invisible gun and kicking an imaginary door open. ", "A soldier with black beards and long black hair wearing jungle green uniform is fighting with his right foots and hands.", "a soldier in a military suit and a military cap fights with his right foot forward", "A soldier in camo uniform does a 180 spin, a front kick, then aims his invisible firearm.", "A man in a white military uniform with a ponytail and a beard is practising kicks and punches" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 73, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8bfa354e4afb4d95b2274a7b45cf4986", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a grey gaming headset has its mic moving from up and down to left and right. ", "gray silver headphones open wide while lifts its mic up,down ,forward and left side.", "metal headphones lift their microphone", "Dark gray headphones spread slightly, then the microphone moves , down, then bends outwards.", "black headphones with long microphone, open and close the side ears, microphone moves in different directions" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 74, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8c0f8088a5d546c886d1b07e3f4eafae", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A blue-colored water bottle made out of plastic has its lid popping open. ", "A plastic water bottle throws its lid on the ground.", "Grey plastic bottle of water opens the blue lid", "A blue water bottle cap spins by itself then flies up", "3d model of a blue coloured bottle showing a drop of water and a beach, stands flat, the cap unscrews and flies downwards" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 75, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8c12ecf41f934490942d59fb54f34458", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A yellow catepillar machine with its hand extended out. ", "A yellow and white patches escavator streches its long arm while rotating.", "yellow metal escalator with white cabin spins in a circle", "A yellow excavator spins while moving its rod.", "The yellow metal escalator is moving a rod and carrying something to the left hand side" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 76, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8c38bb8f496d4529a96bdb1d4f6726eb", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A blue whale with a white belly is flapping its tail up and down.", "A 3D blue whale fish with two big fins swims", "blue whale swims", "A blue whale with shiny texture swims.", "A blue whale swims with its tail wagging" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 77, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8c2553c03f0d419fae5e4b0eef8ffc1f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A lady with long hair, wearing official clothing making a call. ", "A female with pink blouse,brown shoulder length hair,denim jeans and white and black shoes picks a phone call.", "woman with brown hair in a pink blouse, blue jeans and blue sneakers talking on the phone", "A light brown haired woman wearing a pink shirt, light blue jeans and black and white shoes answers the phone.", "A woman with brown hair, wearing a pink sweatshirt, blue jeans, grey sneakers, looks at her hands and puts her hand to her ear" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 78, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8c8141711ab84308950de9054ad498d2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A squashy orange pumpkin is deflating. ", "A orange pumpkin with gray tip squeezes its shape inside as it falls down.", "An orange pumpkin changes shape chaotically and falls down", "An orange pumpkin distorts then falls down as it melts. ", "An orange pumpkin with a dark tail elastically loses its shape, inflates and deflates" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 79, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8cd6c072be3a41e9aef0afcc6b0aae6c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A retro record player with a brown base has its record spinning. ", "A vitange looking record player with brown elements in its design is playing.", "Black vinyl record player with wooden bottom play black record", "A black vynil spins on a wooden record player.", "The player is black, with a wooden stand on which the disc spins" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 80, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8cffdc73a21d452581b6887e41872733", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An old coffe grinder with the handle spinning. ", "an Iron wood coffee grinder rotates its lid all round.", "metal antique coffee grinder spins top", "Old rusty coffee grinder handle spins slowly.", "a dark brown coffee grinder with a handle, wrapped around it" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 81, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8d1aa170cf8c41e7aeb75b7b1d70bc46", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dog falling down on its side. ", "A red sesame and cream brushwood dog wagging its tails fall down lifelessly. ", "A red dog with a white belly sits on its hind legs and then falls on its right side", "A yellow shiba inu looks up while smiling, then falls down playing dead.", "Yellow Akita fluffy dog with white spots, round curled tail, does the command \"die\"" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 82, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8d862ec1bcc9474eb52da4f3cb3e928f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A cute little girl with brown hair and ponty tails and a shocked facial expression running. ", "A brown hairy cartoon girl wearing white shirt ,brown shoes and a pink dress is running .", "a girl with brown hair in a pink dress and a white shirt runs forward with her arms out to the side", "A girl with pigtails wearing a white blouse and red overalls runs with her hands behind her back", "Anime girl with brown hair, 2 ponytails, bows, in a pink dress running fast with her arms out to the sides" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 83, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8d486996594a4714be1adf0d20745aa4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A spaceman ina white space suit is floating in the air. ", "A 3d model of an astronaut wearing white space suit with blue and red stripes is flying .", "astronaut in white spacesuit with black glass protection on face balances in air", "An astronaut in a white spacesuit floats in 0 gravity.", "An astronaut in a white suit with blue and red elements levitates in the air" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 84, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8da8a154c56c44efafa4d43b4dcf666b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A green parrot on a tree moving its head ", "Green parrot lovebird with orange neck and cream peak is sitting on a dark gray tree branch with green leaves.", "Green parrot with an orange head sits on a brown branch looks around", "A green parrod with an orange face and neck and a blue square on its back turns its head slowly while sitting on a branch", "The parrot is green in colour, with an orange spot on its face, sitting on a tree branch with leaves, looking around" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 85, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8daef262f3dd4e4b948b4c3c6b1b801e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An elf with a green cap on its head and brown boots standing up. ", "A zelda cartoon from the game wearing white t-shirt,green uniform,brown boots and cream pants lifts right arm and then left arm looking around ready to fight.", "Elf 3D model from the game with yellow hair in a green uniform, black gloves and black boots Looking around", "A blonde elf wearing a green hat, a green tunic, beige pants and brown boots scratches the back of his head with his right hand, then assumes a fighting pose.", "The game character in the form of an elf with white hair, green hat, green tunic, beige trousers, brown boots looks around and pulls out an imaginary gun from behind his back." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 86, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8dc73a1346424e53ad02fb05163ead13", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A tall man with huge muscles wearing a tight tshirt and green combat pants jumping up and down", "A muscular male ,wearing white t-shirt ,jungle green pant black shoes and gloves jumping high.", "a man with a short blond haircut in a gray T-shirt, gray pants, black gloves and black boots jumps up", "A brozn haired man wearing a gray t-shirt, camo pants, black gloves and black shoes jumps up.", "A large man with a short haircut wearing a grey T-shirt, camouflage trousers and dark boots, makes a high jump." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 87, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8e99b15e31e74d85a739b499466dc8b8", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A deer wearing shorts and christmas lights across its chest. ", "A Rudolph cartoon,with a green ribbon across his shoulders,red and white striped short looking left and right side.", "orange Christmas deer with white horns in red shorts stands on hind legs and looks around", "An orange humanoid raindeer wearing chismas lights across its chest, red shorts, and red ice skating shoes lifts its hands slightly and looks around.", "A New Year's moose in a human body of orange colour, with big horns, with a garland over his shoulder, in red shorts and red skates, looks around him." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 88, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8e247fd8586b46b08f4fdc9bcaa994a4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A chinese man showing how to fight. ", "A man with blue suit and black shoes performing kung fu moves.", "chinese man in blue coat in blue in blue shirt and blue pants doing karate", "An old man in a blue suit does tai chi moves", "a Japanese man in a blue suit stands in a defence posture and prepares to attack." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 89, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8ede6f6ead9b4dc79ea51c050fefb594", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An office worker stretching out his back. ", "A man with black short hair ,silver shirt,black trouser , a tie and shoes holds his back with both hands.", "a man in a gray shirt, a black tie and black trousers holds his back", "A man in a bluish gray shirt, a black tie, black pants and brown shoes puts his hands behind his back and bends backwards.", "A man with short black hair wearing a grey shirt, black tie, black trousers and boots, raises his arms to his sides and waves his head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 90, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8ee2f6ce680a4875a9938f1596eedb57", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A rubiks cube spinning. ", "A Rubiks cube color blue,white,yellow,red and green moves its cubes one side at a time.", "A 3D model of a multi-colored Rubik's cube moves its pieces", "A solved rubics cube gets undone as its pieces move mixing its colors.", "3d model of rubik's cube blue, yellow, green, red colors is shown from all sides how it is folded." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 91, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8f6f0ec635e24d8998a42f798dd97be2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A soldier wearing a brown hat and combat clothing saluting.", "A male military army wearing jungle green uniform,gray cap standing upright is saluting", "military soldier in military uniform and grey cap saluting", "A soldier in military uniform and salutes", "A 3D model of a young man in green military uniform and a white cap is saluting." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 92, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8f9f6ceb81804839bb5c570253bc6f1f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A light-skinned girl with blue hair, beige top and blue skirt is dancing and spinning around. ", "A female cartoon with blue hair,white top,checked flare skirt and black long heels is dancing with one leg infront and then interchanges.", "Anime cartoon-looking girl in a checkered blue skirt, with light-blue hair is making dancing moves, twisting and turning to a steady beat.", "Anime girl with light blue hair wearing a white sweater, a nlue skirt, white high socks, and black heels is dancing. ", "A pale anime girl with turquoise hair, wearing a white blouse, black shoes and a blue plaid skirt is dancing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 93, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8f95d8fb2aee41b5b853f8c32f922ef3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown dragon with dark-brown spots all over its body is flying. ", "Pteranodon rigged with brown and balck spots is flying.", "A 3D model of a brown pterodactyl flying.", "A pterodactyl with stone texture beats its wings once.", "A brown pterodactyl with big wings flying and flapping its wings." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 94, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8f350c5617504a24b67c6e744ab8803e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dog made up of chocolate and candy standing up. ", "melted chocolate robot raises its ears,then right ,then head and left hand respectively.", "brown metal robot dog raises right hand", "A proken down melted chocolate rabbit robot moves its hand awkwardly.", "A robot rabbit in brown mud, wiggling its ears, raising its arm in a frightening way." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 95, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8f787467a11046a6a90e5a13531563b4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Gears made up of a blue disk and long piston hand moving round. ", "trammel engine blue in color with white-green arm spins.", "Blue metal circle rotates", "A 3d model is showcasing the trammel engine mechanism", "The turquoise-colored round flat mechanism with white rod drives the mechanism in a circular motion." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 96, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8fb150270adb45aa9653e857f3fa351b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Great american eagle flying in the sky.", "black hawk with white tail and head is fying", "Black Eagle with a white head flaps its large wings", "A bald eagle is beating its wings.", "A large eagle flies and flaps its wings." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 97, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/09aee7146f38441880c78380c01b6919", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A pathologist holding a skull on one hand and drill on the other hand. ", "A male inkeeper wearing white shirt,green sweater,brown trouser and apron holding a skull and a black object in the other hand.", "male doctor with black hair in glasses in a white shirt and a brown apron holding a skull in his hands", "A man with glasses wearing an old fashioned suit and a bloody apron hits a skull with a black object", "A man with black hair, glasses, mustache in a white shirt, green vest, gray pants, yellow apron fixing something in his hands." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 98, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9a8da94452b14ceabbbb9ebfc4ef8759", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A camping lamp placed on a camping chair. ", "A black with colorless glass lantern and a metallic gray mug rest on a green chair while an insect surrounding it.", "a camping lamp and a metallic gray mug stand on a green chair", "A lamp and a metal cup are surrounded with flies while sitting on a green chair.", "A black folding chair, on which a flashlight with a transparent glass and a metal cup stands, and a gnat flies around it." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 99, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9aa4951d6f7c4a0ca1dc12a7eae2843e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A microwave opening its door.", "A 3D empty black microwave opens its door wide with a white reflection inside.", "A black-coloured metalic microwave opens its door.", "A black microwave door opens up", "A black microwave opens the door." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 100, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0dd0a4ae5f3f4fd8a0ff75fc7f9c1cb2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Purple mouth with pink lips and white sharp teeth opening and closing.", "A 3D purple cartoon-looking creature with pink lips and big white teeth is screaming.", "3d cartoon purple mouth opens and shows white teeth and throat", "A purple ball with pink lips and sharp teeth opens its mouth wide.", "3D purple balloon with a big pink colored mouth and big teeth opens its mouth wide." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 101, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0de156e0eb7c4bb3ae22d9ebaa691612", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Brown spotted lionnes with elongated canine teeth moving its head sideways.", "A 3D model of a brown tiger with black spots and long sharp fangs,stands still and turns its head left and right.", "A brown saber-toothed tiger with white large fangs looks around", "A yellow sabertooth tiger looks around on the floor.", "A yellow cartoon tiger with black dots and protruding long fangs stands still and turns its head left and right." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 102, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0e4da40e2ac6422ba046d6342893b93b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Black platter with potatoe chips, chicken and coleslaw spinning round. ", "A 3D model of a black rectangular plate with chicken meat, French fries, and purple cabbage is spun around completely.", "Rectangular black wooden board with chicken wings, French fries and coleslaw.", "A black platter with french fries, chicken wings and coleslaw as spinning.", "3D plate of black color rectangular shape with red meat, french fries and purple cabbage is turned around at three hundred and sixty degrees." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 103, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0f1d1f11a1644c75b1b53f004cf79dd3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Lady pirate wearing a brown hat and boots walking. ", "A 3D model of a pirate with long black hair, wearing a black hat, gray shirt, black armor, and black boots moves in different directions.", "3d model of a pirate in a black hat, gray shirt, black armor and boots goes in different directions", "A pirate woman with a brown and white top, black pants and boots, a black hat and dreads is walking.", "A 3D woman with black hair gathered in a braid in pirate clothes confidently walks waving her arms." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 104, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0f91cf975bbd4f1290cf6ab90c12e7f7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Two fighters one with blue hair and another one with black hair throwing punches. ", "A 3D model of a two cartoon characters, both wearing identical gray suits with orange belts and boots, are making the same boxing moves. The character on the left has black hair, while the one on the right has blue hair.", "3D model of characters from the game, one with black hair, the other with blue, in gray kimonos with orange belts doing karate.", "3D models of two fighters, one with blue spiky hair and another one with black hair, both wearing a gray Gi, orange wristbands and boots and an orange belt are throwing punches", "Two cartoon characters in identical gray suits with orange belt and boots, left with black hair, right with blue hair, repeating the same boxing movements." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 105, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0f454a4410c84c349b56a843ca64293f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A grey revolving cannon shooting out a black cannon ball. ", "A 3D model of a gray cannon rotates as it fires a round black ball that drops to the ground.", "A grey metal cannon with a rotating body fires a round black cannonball.", "A gray cannon ifires a black ball, then does a 360 rotation.", "A gray cannon that turns its head on its axis and releases a round ball that falls down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 106, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0f481b684b75485885926f0bb3e7a49c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown dragon with long tail preparing for flight. ", "A 3D model of a brown dragon with large wings and a long tail stands still and moves its body.", "Brown 3D model of a dragon with light grey neck, paws, wings, long claws and a long tail stands waiting.", "A mysterious dark-brown wyvern with a dark back and a long tail is breathing.", "A brown dragon with large wings, pointy claws and a long tail is standing still, with its body calmly moving from breathing. " ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 107, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0fea625e330a4c0f85b8e5f06079bbf2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Lady with checked tshirt and blue shorts waving her hands up in the air. ", "A 3D model of a female with shoulder length brown hair,black and orange t-shirt,denim short and black shoes ,raises her hands and waves.", "3D model of a girl with brown long hair, orange and black T-shirt and blue denim shorts waving with both hands.", "A girl wearing a yellow, purple and black checkered t-shirt, blue shorts and gray shoes is waving her hands.", "A cartoon woman with shoulder-length hair stands in a T-shirt with orange squares, short blue shorts and dark boots, raises her hands up and waves." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 108, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/01ff1eb227e34b77817ef2a2b8405bd2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A golden robot dog is walking on thin gold metal legs. ", "A 3D model of a yellow robot walking like an animal.", "3d model of yellow metal pet robo-dog is walking forward.", "Yellow Boston Dynamics robot dog walking.", "Golden metal futuristic-looking robot is walking on all four like a dog." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 109, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1af29459a33d4834a3dcc0b25a1da0ce", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Blue bow tie with a white snowflakes pattern is being tied up. ", "A 3D model of a blue bow tie with white dots tying itself.", "3D model of blue bow tie with white dots is displayed in an animated tying sequence.", "A blue bowtie with white patterns ties itself up.", "A blue untied bow tie with white dots begins to tie itself." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 110, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1b0e42d2c517466d8941ce8c1d5ac10f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A cheerful young lady with pale skin and red hair, dressed in a white cardigan, peach shirt and black leggings has a bouncy walk, while dancing with her hands up. ", "A 3D model of a female with dark orange long hair, white sweater and sneakers, black pants and beige top is dancing in different directions.", "3D model of a girl with long brown hair, in a gray sweater, black leggings and white sneakers is dancing.", "A girl with brown hair, a white shirt, black pants and white shoes is walking and dancing.", "A woman with red wavy hair in a white sweater, black pants and light shoes dances with her arms raised and walks forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 111, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1bbc296b706740fc86345f00b97b43b0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Brown dressing table with cream drawers opening and closing. ", "A 3D model of a brown empty drawers with a mirror and two drawers, which slide out one at a time.", "A brown wooden table with a mirror and light-brown drawers that slide in and out.", "3D model of a brown wooden table with a mirror and two cream colored drawers sliding out.", "3D dark brown chest of drawers with mirror and two drawers that slide out one after the other." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 112, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1bf85fbd7417479791a6cebbf950cb3c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A tall giraffe moving its head. ", "A 3D model of a giraffe standing as its wags its tail and moving its head side by side", "An orange adult geraffe with brown spots on its fur looks around and wags its tail", "A 3D model of a giraffe is moving its head left and right.", "Cartoon Giraffe standing upright with a long tail and waving his head left and right." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 113, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1c709a63a1be45fb903977f92a97b94d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man opening an empty gift box and putting it upside down. ", "A 3D model of a male game character with blonde hair,grabs an empty orange gift box with yellow ribbon, throws the lid on the floor, flips the box around, shakes it, and then lifts it up.", "3D model of a blond game character opens a red gift box with a yellow ribbon and looks at what's inside", "A blonde man wearing a black sleeveless vest, light brown pants and black boots opens an empty present box.", "A character from the game with white hair, catches an empty orange gift box with a bow and throws the lid on the floor, turns it around, shakes it, lifts it up." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 114, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1d0c965ea0124bf0acf426de59034364", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "White spray bottle with green spray nozzle spinning round. ", "A 3D model of a plastic white spray bottle with green cap turns all round.", "Plastic spray bottle with green cap and white body turns in different directions", "A white spray bottle with a green cap is spinning slowly.", "Spray bottle with green cap twists in one place." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 115, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1db3477bb35140a5ac2387c8f6dfee7f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Revolver pistol with its cylinder spinning round. ", "A 3D model of a gray metal revolver with a brown wooden handle spins its drum filled with bullets.", "A firearms-type metal gray revolver with a brown wooden handle spins a drum with bullets.", "a gray gun with a brown handle's barrel comes out and starts spinning.", "grey revolver with brown grip, long muzzle, drum bracket opens and rotates." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 116, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1ea5247fe18d4ea28cc066a6911bfe6f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A pair of brown wooden tables, one bigger and one smaller, with the smaller one sliding out from beneath the bigger one. ", "A 3D model of a brown wooden table with four legs expands into a larger sized dining table.", "Chestnut-colored wooden table on four legs opens up into a larger version.", "A small brown table made ou of wood slides out from beneath a bigger brown wooden table.", "A dark brown wooden table with another table that is sliding out of it." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 117, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1eeb594196e14d00871fb9a766fa3535", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A pair of green plant leaves is slowly swinging. ", "A 3D model of a two green broad leaves sway in different directions.", "Two rich-colored green leaves on a stem move up and down.", "Two green leaves are being gently swayed by the wind.", "Two dark-green textured leaves wobbling in different directions." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 118, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1f2c51af4c5c43fe941b4342ea6bbc4a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A male Naruto characher with golden hair, dressed in an orange uniform, blue shoes and white headband is patiently waiting on standby. ", "A 3D model of a male game character with yellow hair, wearing an orange jumpsuit and blue shoes stands in a pose and breathes.", "3D model of a male game character with yellow hair, in an orange jumpsuit, stands in a rack and breathes.", "3D model of a boy with blonde spiky hair, wearing an orange and blue outfit is standing still and breathing.", "3d model of a Naruto character with yellow hair, in an orange costume with blue elements, is standing and breathing steadily." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 119, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1f4fc59dd6c54e36ac5f689f3ae1a439", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A bright orange helicopter made out of plastic has its blades spinning round.", "A 3D model of a toy helicopter in color orange spins its black and white propeller blades.", "3D model of a metal helicopter with an orange body rotates a black and white propeller", "An orange helicopter with tinted green windows spins its striped propellers.", "An orange helicopter with green windows and black-and-yellow striped blades is levitating in the air." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 120, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1f2885c2daf4402ca8e734df2b7725a4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A cute kitty in a blue dress and a brown puppy in a grey suit are holding hands and a number 35, wishing you a happy birthday. ", "A 3D model of a male orange dog in a dark grey formal suit is waving his paw and wagging its tail, and a female multi-colored cat holding the pink and white number 35, while also wagging her tail.", "A cute cartoon-styled pair of an orange doggy and white, orange and blue kitty, dressed in formal aire, are holding a number 35 and waving at you.", "Carton style small puppy and kitty, wearing a dark grey suit and a light-blue dress, are holding a pink star with a white number 35 written on it.", "3D cartoon dog of orange colour in a dark suit and a white cat with orange and blue ears in a dress of blue colour and a crown on the head, holding the number 35." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 121, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1fe8da5c7bfc491e91bda989f5c9423a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A black chest with grey top and bottom moving up and down. ", "A 3D model of a black and gray metal safe moves up and down while it rotates.", "Black and grey metal safe moves up and down", "a black briefcase drops to the ground and bounces up.", "A dark grey box with grey stripes flies up, spins, and falls down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 122, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2a15d80273794f628b52f055f2ece8ef", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Lady with purple hair and wearing combat gear dancing. ", "A 3D model of a female character with purple hair and a black costume dances salsa,using her arms and legs.", "3D model of a girl with purple hair in a black uniform dancing the twist", "a purple hared woman wearing a dog tag, a black top, black gloves, a gray bag on her hip, gray pants, black knee protectors, and black boots is dancing.", "A female character with purple hair and a dark green costume dances salsa with her arms and legs" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 123, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2a23f8110a714bb0a99db9588da04f08", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Bazooka firing a blue ray of light.", "A 3D model of a lightsaber which is emitting blue saber.", "gray metal lightsaber from Star Wars releases blue saber", "A blue beam comes out of a star wars light saber.", "A grey metal rig with a scope that releases water" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 124, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2a321e9bca1a4bb3b3f12df910f57f71", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A blue fish with round eyes and purple mouth is flapping its tail.", "A 3D model of a sky blue fish with a purple mouth is swimming.", "A model of a blue fish with purple lips and blue fins is swimming.", "A light blue fish with dark blue fins and purple lips is swimming", "A blue coloured fish with purple lips swims forward and moves its body" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 125, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2b6ec396e7fe4eaeb784e0e566d72cc0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Blonde haired lady wearing a black skirt and a black and white stripped top walking. ", "A 3D model of a woman with blonde hair, wearing a animal print T-shirt and a black skirt, walks confidently in different direction.", "3D model of blonde woman in striped tight top and black breeches walking in different directions", "A blonde woman wearing a camisole with zebra patterns, a black skirt and white shoes is walking.", "A woman with white hair in a ponytail in a Zebra print T-shirt, black knee-length skirt walks confidently straight" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 126, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2b7c0624091a4e7793e7b4748a61795c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Grey crockodile that is stationary and not moving. ", "A light gray crocodile with faint markings and a rough, scaly texture is standing still.", "A light gray crocodile with subtle markings and a rough, scale-covered texture is standing still", "A light gray crocodile is moving ever so slightly as it breathes.", "A light-grey crocodile with a long tail stands frozen and breathes slowly" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 127, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2ba4a1d9cdd84748beb5d51eac3623a4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A person waering pants that look like Aladin's sitting on a bench", "A 3D model of a male cartoon wearing a red turban and t-shirt,white trouser sitting on a bench ,moves his legs and head side by side.", "3D model of middle eastern man with brown skin in churban and white wide pants sitting on bench", "A thin faced man wearing a red turban, a red vest, a green belt and white rousers wiggles his legs while sitting on a gray bench.", "A cartoon character wearing a red headdress with a brooch, a red T-shirt, white trousers above the ankles sits on a brown bench and moves his legs and head actively." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 128, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2bd8f15010a54ee58428e085034d7485", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A girl wearing a silver flowy dress and black shades dancing. ", "A 3D model of a cartoon girl with long pink hair and black glasses is dancing salsa in a gray metallic dress.", "anime girl with pink long hair and black glasses in grey metallic dress dancing", "An Anime pink haired girl wearing a white dress and white shoes dances.", "Anime cartoon girl with long pink wavy hair in a white coloured dress who dances Latin dance" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 129, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2be25c300ff04d819a3818bbe4b086a6", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A tiger cub spititng out chinese lettters. ", "A 3D postcard features a yellow tiger with black stripes crawling at the Center of the frame, with Chinese characters flying out of its mouth.", "3D postcard with a yellow tiger with black stripes that crawls out of the frame and Chinese hieroglyphics fly out of the mouth", "A 3D cartoon cub tiger poking its head from a hole on a rectangle frame spits out chinese characters.", "A cartoon tiger cub that pokes its head into a poster with a hole with hieroglyphics, opens its mouth, from which white hieroglyphics fly out" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 130, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2bea7dce1a944ec0ae4aaa29f294caaa", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A smart looking gentleman with clear spectacles looking up and down. ", "A 3D model of a man with dark skin and short black hair,dressed in a light gray shirt and dark gray pants is standing moving the head sides and upwards.", "3D model of a man with dark skin, black short hair, wearing a light gray shirt and dark gray pants is standing.", "A Man wearing glasses, a white shirt, a black belt, gray pants and black shoes looks to his left, than looks up slightly.", "a dark-skinned man with glasses, dark hair, gray shirt and gray pants is standing still" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 131, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2c9f6fca1bd748058c69350994b4d7ae", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Brown metal machine gun revolving. ", "A cartoon 3D model of a metallic brown gun turret rotates.", "Cartoon 3D Model metallic brown Turret turns around", "a brown machine gun is aiming around.", "a brown metal cannon makes circular movements with the muzzle of the weapon" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 132, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2c64c18496294459b3e0774213bea396", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown treasure chest full of gold coins opening up. ", "A 3D model of a brown wooden treasure chest with metal gold frames,opens with gold key.", "brown wooden chest with metal gold framing opens with golden key", "A brown wooden chest with gold inlays on the sides and in the middle is opened with a gold key that is inserted into the lock and opens the lid", "A brown wooden chest with gold inlays on the sides and in the middle is opened with a gold key that is inserted into the lock and opens the lid" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 133, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2ca1db4e890e4b24a68624597e7d2fc8", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Brown cow with small horns walking. ", "A 3D model of a brown adult cow with a white spot on its head is walking straight as it wags its tail.", "brown adult cow with white spot on head walking straight", "A light brown cow takes a few steps forward then stops.", "a big brown cow with horns and a tail with white on the end walks forward and stops wagging her tail" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 134, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2cb4d07f6d9b449193e0b39684a2b683", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A monster with a shoe stuck on its head dancing. ", "A 3D model of a zombie with a bloody head and a heel stuck inward, wearing a blue T-shirt, yellow shorts, and big black slippers, is dancing.", "3D model Zombie with a bloody head with a heel stuck inward, in a blue T-shirt, yellow shorts and black house slippers is dancing", "a cartoon zombie in a blue bloody shirt, light brown pants and dark brown slippers is dancing with a heel buried in his head.", "A cartoon green zombie with red wounds on his head, big red eyes and a heel in his head, wearing a blue t-shirt and brown pants, turns around and shakes his hands" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 135, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2ccbb4dc4f88444ea1851cd60b4308de", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A soldier simulating shooting of an arrow. ", "A 3D model of a game archer in black and gray armor, wearing a horned helmet, is shooting", "3D model of a game archer in black and gray armor with a helmet with horns shoots a bow", "an archer with a black outfit and horns coming out of his hood aims with his bow without holding an arrow.", "a game character with white horns on his head in a dark costume has taken the archer's stance and is taking arrows from his back and aiming with his bow" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 136, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2ce9e484a7d7419ba9a8ef3c64c95298", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An axe floating on a platform. ", "A metal axe with a red wooden handle swings through the air up and down above a gray stone.", "a metal axe with a red wooden handle spins in the air above a gray stone", "An axe with a crooked wood handle partly wrapped with red cloth is floating up and down while spinning over a gray stone platform.", "a gray metal axe with a red handle levitates over a round gray stone stele" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 137, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2d1dc6d86022475a849abeac8f714c25", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Mystic creature with white wings moving its tail. ", "A 3D model of a brown lion from the game, featuring a yellow mane, a round yellow tail, red claws, and large white wings, is facing forward while moving its parts of the body in a slow motion.", "3D model of a brown lion from the game with a yellow mane and a yellow round tail, red claws and large white wings, looking straight ahead", "A brown mythical lion with a yellow mane, metal bracelets and knuckles and white wings stands in place while moving parts of its body.", "mystical animal in the form of a lion with white hair, white wings, a long tail with a white ball at the end with blue spikes sticking out of the neck, with gray bracelets on the body stands in place moving" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 138, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2d297b6b4b994f649a0eb70ec9dadfd5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Cream model of a man walking on a platform. ", "A 3D representation of a white human figure walking forward on a gray rectangular surface.", "3D model of a white human body walking straight on a gray rectangle", "a light brown wooden puppet takes two steps forward on a gray rectangular block", "3D model of a white human demonstrates gait on a gray rectangle" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 139, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2d0701a199924a9ca36e76759ceaeea6", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A white chicken with a red crown and a red lobes walking. ", "A 3D model of a white chicken with a red comb and yellow legs running.", "3D model chicken with white feathers, red comb and yellow legs goes forward", "A cartoon white chicken with a red crown and yellow legs is walking forward.", "The white chicken with the yellow legs goes straight ahead" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 140, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/3b75c31b592d4d8ba153accc7d1722c0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Multi coloured slime", "The pink, yellow, and burgundy substance shifts and stretches in a back-and-forth motion.", "an attractive jelly mass of pink, orange and dark purple colors tries to breathe", "a shiny pink, brown, yellow and bloack gelatinous mass expands, then shrinks back dow.n", "The pink, yellow, burgundy-colored substance moves and extends back and forth" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 141, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/3f2fb5b4ba214ba4998c1fa081ea3aed", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Gold and blue gears moving round and round", "A 3D model of a red metal square featuring five blue gears and four yellow gears spinning on its surface.", "a red metal square with five blue gears and four yellow gears spinning on it", "Red and blue rectangular gears are moving on a red board.", "Children's developmental set with square gears in blue and brown colors twists with a metal handle" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 142, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/3f407bf68fd24694a923c8d45a83c22e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Slumped old man with elongated nose walking. ", "A 3D model of a old male cartoon with long nose,short black hair with bald spot in the middle,black cardigan and red shoes walks in diffrent directions and then folds its hand.", "3D model of an old man with a bald head, long nose and black eyebrows in a black robe, walking straight", "A bald long nosed man in a black robe and red shoes is walking", "3D plastic man with a long nose, short dark hair with a bald spot in the middle, wearing a long black coat and red shoes walks with a disgruntled face, hunched over, folding his hands at the end of the" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 143, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/3fa76ea2ae6e4c6899d6ce181150306b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Grey rectangular block made out of stone, with an aggresive scary face is walking and falling face first in the end. ", "A 3D model of a gray square brick with arms and legs, featuring a white cross on its back and a menacing, frightening face, moves forward, swivels its body, and then collapses.", "Grey metallic evil rectangle with arms and legs, and an open red mouth, is walking straight then falls face down.", "Angry rectangular stone with adhesive bandages on its back falls forward.", "A square gray brick with arms and legs with a white cross on its back and with an evil scary face walks forward turning its body and falls down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 144, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/4c2789a0f86044b8ad8c4b6d261443b7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Fat man wearing a suit throwing his hand up in dispointment. ", "A 3D model of a tall overweight man with a gray hair,wearing spects and a suit with green tie raises his right hand", "3D model of an old gray-haired man in glasses and a black suit is throwing something with his right hand", "A grey-haired fat man in a dark formal suit and a green tie angrily throws his hand up.", "A tall, overweight man with gray hair in a suit with a green tie raises his left hand and waves downward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 145, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/4d8ec130cf7145128718bf0bdcdeb1e6", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Multi coloured threads woven into a basket with motion. ", "A spinograph of rainbow colors rotates on its axis, with its intensity increasing and decreasing.", "colorful animated 3D model spinograph that moves in a circle", "Multi colored spinograph shrinks while curvy lines come out of it.", "Spinograph of rainbow colors increases and decreases turning on the axis." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 146, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/05a6e0b0b2bc4c4497f58a555911bbf3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Grey stone built pizza chimney opening up. ", "A 3D model of a gray stone with chimney opens its white door.", "A grey stone stove with opening white doors", "3D model of a gray stove opens its doors.", "Stone stove with high chimney opens doors." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 147, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5af395d28f664b1b97bcf5c9d8085ec5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man wearing blue shorts and a white tshirt throwing a ball at a wall.", "A 3D model of a young man with short brown hair, wearing a white T-shirt and blue shorts, throws a yellow ball with his right hand and receives back.", "3D model of a young man with brown short hair in a white T-shirt with blue shorts throws a yellow ball with his right hand", "A man in a white shirt and ble shorts throws an orange ball then catches it as it comes back his way.", "A model of a man with brown hair wearing a white t-shirt, blue shorts and shoes holds an orange ball, throws it, it bounces back and falls back into his hands." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 148, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5b0d1b089a94420c93f56a4c2a5aa8b3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man and a woman wearing traditional attires waving then bending down. ", "A man with black hair wearing a black hat and a woman with red hair in a white uniform are waving and greeting someone.", "Black-haired man in a black hat and a red-haired woman in a white uniform waving and greeting someone", "A man in a black hat, white clothes and an apron and a woman with ginger hair and white clothes are both waving their hands.", "3d models of a man and a woman in white outfits waving their hands at the same time, then lowering their hands and looking for something in their pockets." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 149, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5b91e9ec25a14a6da18b4ca8e238e496", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A warrior carrying an axe made up of a skull looking ready for war. ", "A 3D model of a Scandinavian warrior from the game, featuring a muscular chest and dressed in black pants, wielding an axe in his right hand, looking side by side.", "3D model of a Scandinavian warrior from the game with a pumped-up chest in black pants holding an axe in his right hand", "A warrior wearing a red shoulder protector with horns coming out of it is holding a skull axe an taking a fighting stance.", "Game character with no clothes on top, beard, dark hair in a ponytail, dark pants and shoes, holding an axe with a skull on the end, breathing heavily and looking around." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 150, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5bb2709145754197a056f88ce6b42810", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A model of a space craft opening up. ", "A 3D model of a light gray metal lunar rover with six wheels unfolds its components.", "A light grey metal lunar rover on six wheels opens its elements", "A 3D model of a gray lunar rover opens up.", "A gray-colored lunar rover on wheels deploys its rig." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 151, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5bde3f6df6fa4d5b9be11fdd1addcc93", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Orange and brown coloured frog moving its head. ", "A 3D model of a yellow-brown striped toad rests on a green leaf, breathing gently.", "A yellow-brown striped toad sits on a green leaf", "orange and brown frog moves its throat and belly while sitting on a leaf.", "Orange-colored toad with dark stripes sits on a green leaf actively moves its stomach and mouth, raises its head up and to the sides." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 152, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5be73d61e32e43a4a8fb89c7ac71cc43", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A well built hare with big ears walking. ", "A 3D model of a brown, woolly rabbit walking on its hind legs.", "3D model of brown woolly rabbit walking on two legs", "A brown bipedal rabbit with a human torso and hands walks.", "Orange-colored rabbit in a human body, with rabbit legs, round tail, walks like a human." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 153, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5bee13dcfc5b4af0a6cdb5ea4e349770", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Blonde haired lady with red top and black skirt dancing.", "A 3D model of a girl with yellow hair, wearing black glasses, a red blouse, and a black skirt and black heels is dancing energetically.", "3D model of a girl with yellow hair, in black glasses, red blouse and black skirt is dancing", "A blonde woman wearing sunglasses; a red top, a black skirt and black heels is dancing.", "A woman with white hair, wearing glasses, a red sweatshirt with an open back, a black skirt and high black heels dances actively moving." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 154, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5c92fe5c54e44f9f95da2cf2e7a3e700", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Purple wild mushrooms moving with the wind. ", "A 3D model of a cartoon tree with purple and blue mushrooms swaying in various directions.", "cartoon tree with purple and blue mushrooms moving in different directions", "A dark blue branch wth purple and blue mushrooms coming out of it is swaying.", "A green-colored plant from which mushrooms of blue, purple, turquoise colors grow, swaying." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 155, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5c684eb8ebc6412a80146e26f75a0f04", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A doctor wearing a white labcoat covering his face. ", "A 3D model of a young male doctor with brown hair, wearing a white coat, green trousers, and black pants and brown shoes covers his face with both hands.", "Young male doctor with brown hair in white coat, green trousers and black pants covers his face with both hands", "A brown haired doctor wearing a whit coat, green pants and brown shoes covers his face with his hands.", "A male doctor with short brown hair in a white coat, green pants raises his hands to his face and shakes his head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 156, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5cab7bc488a84b4abbf949cc3cf9adfb", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Long haired man wearing blue tshirt, black pants and white shoes doing squats. ", "A 3D model of a young man with brown hair, dressed in a blue T-shirt, black jeans, and white sneakers, is performing squats.", "Young man with brown hair in blue t-shirt, black jeans and white sneakers doing squats", "A brown haired man wearing a blue t-shirt; black pants and white shoes does squats.", "A man with brown medium hair, beard, blue t-shirt, dark pants and white sneakers, stretches out his arms and sits down a few times and stands up." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 157, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5d04d27d281743799e3eea8578a979d2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Black pot with green liquid over a fire spinning round. ", "A 3D model of a round black cast iron cauldron containing a green potion and white bones sits over the fire and rotates.", "a round black cast iron cauldron with green potion and white bones stands on the fire and rotates", "A black cauldron with green liquid and bones inside is cooking on a brick rocket stove while being mixed with a dark wooden stick.", "A black cauldron stands on fire on a gray stone, in which a green substance with bones is boiled, which is twisted and stirred potion with a wooden stick." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 158, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5d274fcdbf2d4e59b6bdc5b9afa66839", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown rein deer moving its head sideways. ", "A 3D model of a brown, hairy elk with large white antlers stands and turns its head from right to left.", "brown hairy elk with white large antlers stands and turns head right and left", "3D model of an angry looking elk looks right, then left.", "A brown moose with big white antlers stands still and turns his head in different directions." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 159, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5d516fdbd1bb47fb9c78273fc46e935e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man wearing a long black over coat and red tshirt walking. ", "A 3D model of a male zombie with green skin, black hair, a red shirt, and black pants, walking straight ahead in different directions.", "a male zombie 3d model with green skin, black hair and black clothes walking straight", "A green skinned zombie with back hair wearing a red shirt; a black vest; black pants and black shoes is walking.", "A zombie in the form of a green colored man with black hair in a black suit walks straight ahead." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 160, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5d2543fe1382487fa211ffe1424dbdae", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A surgeon in blue scrubs and blue hat searching for something. ", "A 3D model of a medical worker wearing a blue cap, white mask, and blue uniform raises his right hand to his forehead and looks around.", "3D model of a medical worker in a blue cap, white mask and blue uniform raised his right hand to his forehead and looks around", "a dmale doctor wearing a light blue suit puts his hand over his eyes and looks into the distance.", "A male doctor in a hat, blue suit, dark shoes bends over, puts his hand to his head and looks for something in different directions." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 161, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5de15f7398fa4cf08a232f649cbcda8d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Shrek waving with his hands up and waving them in the air. ", "A 3D model of a green orc resembling Shrek from the cartoon, dressed in a white shirt and brown vest, is raising his hands and dancing.", "3D model of green orc shrek from cartoon in white shirt and brown vest is raising his hands up and dancing", "3D model of shrek wearking a brown vest, a white tunic; brown pands and brown shoes is dancing while waving his hands in the air.", "Shrek cartoon character in a white shirt, brown vest, brown pants and shoes dances with his arms held up high." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 162, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5deb007a05a946d28ce6403616f9629b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A grey and black mouse wiggling its long tail. ", "A 3D model of a rat, primarily black and gray with pink ears, paws, and a long tail, surveys its surroundings.", "black and gray rat with pink ears, paws and a long tail looks around", "black and gray mouse with pink legs and ears moves its head and tail, then scratches its left ear.", "a small grey mouse with dark spots on the face and back with a long tail looking around and wagging its tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 163, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5e1ad0ab785c4055a23673c059721fa1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A purple female robot dancing. ", "A 3D model of a pink metallic robot with brown accents is performing a breakdancing routine.", "3D model of pink metallic robot with brown accents is breakdancing", "a brown and purple puppet looking robot is doing breakdance moves.", "A robot in the shape of a woman in white is break dancing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 164, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5e17bb9b09a64ce2aa32c7c598daa992", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A small fox with big blue eyes moving its head. ", "3D model of a cartoon little fox with orange fur, black ears, and a white tail with blue eyes smiles, tilts its head, and adjusts its hands.", "Cartoon little fox with orange fur, black ears and white tail with blue eyes smiles and moves head", "a small cartoonish orange fox with blue eyes stands on both legs and happily swings its arms while smiling.", "Orange coloured cartoon fox with blue eyes with a lush tail happily wiggles its body in different directions and opens its mouth." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 165, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5e41e52f553c4a7d9ec2abf88f8791fc", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Grey shinny shark moving its tail. ", "A 3D model of a gray-silver shark with a white belly is swimming in various directions.", "A dark grey shark with a white belly and large fins is swimming", "a gray shark with realistic skin rendering is swimming.", "3D model of a grey shark that swims and wags its tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 166, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5e892fc786db4ca1b36738ab5e40e64d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Brown and blue vintage gramofone with a vinyl spinning. ", "A 3D model of a metallic black gramophone with a large blue horn plays a black record.", "metallic black gramophone with a large blue shade plays a black record", "a dark brown gramophone with a blue brass with rad and gold patterns insite is playing a blavk vynil.", "Brown wood gramophone with a bouruze trumpet, on which a disc is turned by means of a handle." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 167, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5eb38ec707044307bb5c512aea1fa91c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man with blue jeans and brown coat dancing.", "3D model of a young man with light brown hair, dressed in a brown leather jacket, dark blue jeans, and black shoes, dances.", "3D models of young man in light and hair in brown leather jacket and dark blue jeans dancing dance", "A man wearing brown glasses, a black shirt, a brown jacket, blue jeans and black shoes is dancing.", "A man with brown hair, brown jacket, blue jeans and dark shoes dances energetically spreading arms and legs." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 168, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5f1a279ba58748e488faacb4e4352558", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A fat man wearing black clothes dancing.", "3D model of a young male in a black and gray suit with short black hair dancing.", "3D young male model in black and gray suit with black short hair dancing dance", "an overweight man with a buzzcut wearing overall black clothes is throwing dance moves.", "Dark large man with black hair in a black suit dancing hip hop." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 169, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5fa78231c06d4622860dee9e4cb3cfa6", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A tall delivery man in a blue, pink and yellow striped uniform is making dancing moves.", "3D model of a man with black hair and white skin, wearing a blue cap, a blue-yellow-pink shirt, and blue pants, dances by moving his hips and performing squats,in different directions.", "3D model of a man with black hair and white skin in a blue cap, in a blue-yellow-pink shirt and blue pants dances with his hips and squats", "a man in a cyan hat, a cyan, yellow and magenta shirt, cyan pants and gray shoes dances while swaying his hips and lowering then lifting his center of gravity.", "3d model of a man with black hair in a blue cap, blue suit with a pink and yellow stripe on the right, in grey shoes dancing moving hips and sitting down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 170, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5fefed29a5d14c36ac6ad41943ea7e56", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Well built man with huge muscles throwing a punch. ", "A 3D model of a muscular man with a short haircut,black shoes, dressed in a black jacket and dark gray pants, throws a punch forward with his right fist.", "strong build man with a short haircut in a black jacket and dark gray pants punches forward with his right fist", "a large well built man with a buzzcut wearing a gray shirt, a black vest, dark blue jeans and brown shoes puts his fist forward, then pulls his hand back.", "A large formidable man with a short haircut and brown hair, wearing a brown jacket and grey trousers, standing in a wide stance with clenched fists and one hand moving back and forth." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 171, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/06cb55f941d94dc8b95ac46f92d89e7c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Skeleton of a dinosaur moving its head and tail.", "A 3D model of a brown dinosaur skeleton is positioned on its right side with a spear embedded within it.", "brown dinosaur skeleton lying on its right side with a spear inside", "a triceratops skeleton lays down on its side while its ribs grow out of proportions.", "a brown dinosaur skeleton with a tail that starts to move and the bones inside rise up." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 172, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6a006afce57a447baa60c7a6791f0086", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A wind house with its propeler moving round and round.", "A wooden brown windmill stands atop a stone-gray house, turning its white blades.", "a wooden brown mill stands on the Stone Gray House and turns its white liners", "A windmill attachet to a brown house is spinning slowly.", "A brown coloured wooden mill with an annex stands and the blades spin clockwise." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 173, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6a80ea1b0c2546148c6a9a3b158ffa4d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A ninja-looking robot in black and red armor with a shield and sword is jumping up, twisting and slashing the air with his sword before landing down. ", "A male ninja warrior in dark-red gear, black boots and a red helmet is holding a katana and shield while executing a karate move.", "A cool Ninja wearing metal red protective armor ttop to botttom is holding a sword and shield, while performing a fighting move.", "A futuristic samurai wearing red skin tight armor and a red shield swingings their sword while moving agilely.", "Ninja character in a red suit with a helmet, holding a sword in his right hand, a shield in his left and practising jumping kicks." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 174, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6a6683d8533a412c822dd59627349985", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A light-brown wooden chest opening and closing.", "A 3D model of an empty square wooden box in light brown is opening and closing on repeat.", "A rustic rectangular box in light brown made out of wood is opening.", "An old brown rectangular wooden box opens up.", "A wooden rectangular chest with a metal latch lock opens its top lid." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 175, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6b1ac0c1cb8b47408abed5672bf553b2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A black and white CCTV camera moving sideways. ", "A 3D model of a white metal video camera with a black lens swivels left and right.", "A white metal video camera with black lens is steadily turning from left to right.", "A white surveilance camera moves left and right.", "A white-colored video surveillance camera made out of metal moves left and right as if it's following someone." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 176, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6b1ee6b2ceda455c901afbaef4452bc9", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A surgeon in a light-blue uniform, blue medical hat and facemask is sleeping on his back on the floor, then sitting up. ", "A 3D model depicts a male medical worker, dressed in a blue cap, mask and uniform, is lying on the floor and then rising up.", "A male medical worker in blue cap, blue mask and blue uniform lying on floor and getting up from it", "A medical worker in a blue uniform is laying on his back, then gets up from the floor.", "A male doctor in a blue uniform with a blue mask on his face gets up from the floor and shakes his head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 177, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6b21d1f106314bc680da315d31e03d5b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A farmer dressed in a blue dangarie, red flannel shirt, black rubber boots and a white hat is calmly walking. ", "A 3D model of a male farmer with a bald head, black eyebrows and mustache, wearing a white cap, red-and-black checkered shirt, blue overalls and black boots is walking forward.", "A male farmer in a white cap, red flannel shirt, blue overalls and black rubber boots is moving forward in a relaxed, casual manner.", "A farmer wearing a light gray hat, a red checkered shirt, blue jean overalls and black boots starts walking.", "A farmer man with a moustache, a white hat on his head, blue overalls, a red checked shirt and black high boots with a big belly is going straight ahead." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 178, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6bae6a5220874f309448e21ff56f05e9", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young woman with short ginger hair, wearing a bright-red shirt dress with the letter A in front is falling down unexpectedly. ", "A 3D model of a red-haired woman in an oversized red sweater featuring a white letter A, and black leggings, collapses on the floor, lifeless.", "A female redhead in a red long sleeve mini-dress with a letter A, black tights and black shoes is standing before suddenly falling dead on the floor.", "A red-haired woman in a red sweatshirt with the letter A on it, black leggings and pitch-black shoes falls backwards as if she was shot.", "A model of a woman with red hair in a red sweatshirt, black trousers and black shoes loses coordination, throws her hands up and falls to the floor with her hands waving." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 179, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6bdef1fe69a74c16873d1ac2c3bb9e44", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A fire fighter with yellow protective clothing walking. ", "A 3D model of a male firefighter in a red safety helmet and yellow uniform is walking straight in different directions.", "male firefighter in red safety helmet and yellow uniform walking forward", "A firefighter wearing a red hat and a yellow uniform walks forward.", "A man in a yellow firefighter's uniform with a red helmet walks forward in a wide stride." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 180, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9acf2fd8538f41cb9ec9eef2ebc0dd5b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A girl 3D model with a creepy smile, a crazy hair style, and is dressed with a pink dress, pink gloves and pink pointed boots is moving from left to right", "A 3D model of a cartoon with pink hair, wearing pink dress, gloves, and big pointed shoes dancing", "A 3D model of a spinel cartoon, wearing pink dress, pink gloves and pointed sharp boots moving to the right and back. ", "A scary little girl with a big spooky smile, pink hair tied into two ponytails, a pink dress and gloves is bouncing left to right.", "Сartoon girl with evil face with pink hair with 2 tails, pink dress, gloves and boots moves left and right" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 181, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9b0e925733894676bcba1a630fd5f307", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A grey horse with black hair is standing next to a brown horse cart, and is drinking from the bucket", "A dark gray animal cart with a white and black horse standing aside drinking something from the bucket.", "A 3D model of a dark brown stationed animal cart and a gray horse with black spots moving slowly as it drinks water from the black bucket.", "A grey horse with a black tail and dark hair is chilling in front of a wooden carriage and tilting its head down, drinking water.", "A grey horse, standing by a wooden chariot, is drinking water from a bucket" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 182, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9b8faffb825d4e678609a06cca35ea33", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A green and white trophy of a competition named CITY QUIZ is on a wooden stand and is rotating", "A green and white trophy written city quiz placed in brown wooden platform spins", "A 3D model of a white and green award called CITY QUIZ, attached on top of a brown wooden base, is rotating all round.", "A trophy of green and white colors with a CITY QUIZ logo is spinning in circles on a wooden stand.", "The green figure, standing on a wooden stand, rotates on its axis." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 183, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9b56df44b95c44fa91bf20cc847b18d0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown skinned man with short black hair, is wearing black shirt and black pants, a brown blazer and white shoes is walking straight.", "A 3D model of a male with short black hair cut wearing brown coat,black shirt and trouser and white shoes is walking straight.", "A short-haired man wearing a brown coat, black shirt and black pants walking straightfoward.", "A dark-skinned gentleman with dark hair, dressed in a smart suit with a dark colored shirt and pants is walking in a relaxed manner.", "Dark man with brown hair short in a brown suit goes straight relaxing" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 184, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9b73b36ba2184bd1bde5b42810752b69", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A military soldier dressed in a green uniform is armed, sitting on one knee and looking left and right.", "A military soldier wearing jungle green uniform ,one knee down holding a black gun looking left and right.", "A 3D model of a combat man kneeling on right leg, while holding a gun as he points forward, while his head turning side to side.", "A strong soldier man, wearing green camo uniform and a camo helmet, is standing on one knee, holding a rifle and looking around.", "Military man in uniform standing on one knee, holding a shotgun, taking aim and preparing to shoot while looking around." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 185, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9b83f5e5841241f68ce1d85525b0a2bd", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Yellow giraffe with brown spots walks straight ahead", "A yellow and brown stripes giraffe is running quickly.", "A 3D model of a tall yellow with brown patches giraffe running.", "Yellow giraffe with brown spots is moving forward", "Yellow giraffe with brown spots is calmly walking." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 186, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9b908a8d401e45ca94852f9bf3a0e7e7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A yellow bee with black stripes moves its antennae then wings", "A yellow bee with black stripes flaps its white wings, moving its attenna while its breathes.", "A yellow bee with black stripes flutters its camouflage white wings, moves its antennae.", "A bee coloured in yellow with black stripes flutters its antennae and then its wings", "A bee with big wings stands and wiggles its antennae then wings." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 187, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9bd083d89a1d4d809e452f342cdb8e03", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man with long braided hair and a black beard is bare-chested and wears a kind of long skirt and brown boots is dancing with steps from right to left.", "A 3D model of a man wearing tradition gray and white wrap with black gloves doing a capoeira dancing moves.", "A traditional man with long hair dances, wearing a brown wrap with gray patches around his waist and draped over his shoulder.", "A man dancing from right to left has long braided hair, a black beard, is bare-chested, wearing a long skirt-like outfit, and brown boots.", "A man with grey hair in a ponytail, wearing a long skirt, high boots, moving with a wide step to the left and to the right." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 188, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9c5f945882364f0ba26f2302ae6b23ce", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man with brown hair, a moustache and sunglasses wears a green coat, black pants, a white shirt and a black tie walks straight then turns raising his right hand up.", "A 3D model of a male with black glasses,long gray coat,white shirt,black tie,black trouser and shoes walks straight and then turn throwing his right hands away.", "A bearded man with mid-length brown hair, wearing a green trench coat, white shirt, black tie, pants, shoes, and sunglasses walks straight, then turns right while throwing his right hand up.", "Wearing a green coat, black pants, white shirt, and black tie, a man with brown hair, a mustache, and sunglasses walks straight before turning and lifting his right hand.", "A man with a short haircut, glasses, grey trend, dark trousers, walks straight ahead, turns around sharply and spreads his arms." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 189, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9c7cbb6c2ab14c6c98beb57d091bcf8e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a round brown table with springs that turn left and right, with a red ball on top.", "A gray silver rounded shape stool rolling left and right while placed a red ball on top.", "A 3D model of a dark brown round shaped table with rolling stands while holding a red ball like at the top.", "3D model of a circular, brown table with a red ball on top, and left and right-moving springs.", "3D model of a brown metal rig with two discs and rods in the middle, wobbles from side to side red ball on top." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 190, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9c42450fee7f48f1921ddd9176af19cf", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a red car transforms into a giant robot figurine.", "A red saloon car vehicle dismantles to a standing giant robot ", "A 3D model of a red saloon car reconstructing itself into a giantic robot that is standing.", "3D model of a red car changes to a gigantic standing robot", "3D model of a red car that stands upright and turns into a transformer robot." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 191, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9c2285567c6c4037b6ec477622939409", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Three-piece black foot pedal with small black objects falling down", "A black pedal like shaped objects with small element falling down.", "A black Presser Foot for Industrial Walking Foot Machines with small object falling down.", "Black foot pedal in three pieces with tiny black objects tumbling down", "Four black parts of smooth-shaped forms lying flat on the surface, from which small parts emerge and fall downwards." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 192, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9ca6ec00e97746698024beecf4ef7874", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A woman with black hair in a bun, wearing a black jumpsuit and heels, with her right eye covered, kneels on the floor in fighting position.", "A female cartoon with black hair,gray garment and heels squatting placing left hand on top of left knee and looks down.", "A female cartoon character with black hair, wearing a gray outfit and high heels, is squatting. She places her left hand on her left knee and gazes downward.", "A woman with black hair pulled back into a bun, dressed in a black jumpsuit and heels, covering her right eye, kneels on the ground in a combat stance.", "A woman with a ponytail on her head, in a black suit, on heels with her eyes covered, squats on the floor in a fighting stance." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 193, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9cb8cd9e5d1447a6b22016e4d93c3b6f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man in a red swimsuit, blue swimming cap and goggles, swims the crowl", "A boy wearing red short,blue cap and glasses is swimming.", "A 3D model of a middle-aged man, dressed in red shorts, a blue cap, and black glasses, swimming.", "A man swims the crowl wearing goggles, a blue swimming cap, and a red bathing suit.", "A 3D model of a man in a blue cap, red swimming trunks depicts swimming with wide arms." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 194, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9cb90374f8fc48dea78bcb64f86bc778", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a child in a red cap, red sweater and pants, black shoes and burgundy backpack dancing and spinning on himself", "A red cartoon boy with black boots carrying maroon back bag is dancing salsa.", "A red animated character wearing black boots, red cap and carrying a maroon backpack is dancing.", "3D model of a young child dancing and spinning on himself while wearing a red cap, red sweater, red pants, black shoes, and a burgundy backpack", "3d model of a little boy without a face, in a red cap, red suit, burgundy backpack dancing hip hop" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 195, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9cf16e5afd834f909a4796d1f9d4ec0c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Lego in the form of a black dragon with wings that move up and down", "A gray Minecraft Ender Dragon made out of lego pieces is flying.", "A charcoal-colored Ender Dragon from Minecraft soaring through the air.", "Lego created in the shape of a black dragon with movable wings", "The black coloured lego dragon flaps its wings" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 196, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9d0bbe4e982444b2b51d1046ec9b1e37", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown cartoon character with turquoise hair and sleeves, and red eyes is running", "Skel walk animation wearing black and blue clothes with a shoulder bag is running. ", "A cartoon in black and light blue clothing, with a shoulder bag, is depicted in a running animation.", "A brown cartoon figure with red eyes, turquoise hair, and sleeves is sprinting", "Threatening mystical mutant with red eyes, turquoise fur runs fast" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 197, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9d0d63528aa249af96f103501bf94e40", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A yellow robot with black support moves by extending its sleeve", "A yellow and black hand robot streches.", "A 3D model of a yellow and black robotic hand reaching out.", "A 3D model of a yellow coloured robot standing on a black support moves its piston", "3d robot of yellow colour with black elements moves its piston" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 198, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9d02f12a468b46acb3b5ca6031a4fe5b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A black dragon with a brown chest and large wings moves his tail and wings", "A back dragon with yellow belly and wings ,wagging its long tail, flaps its wings while standing on a round black platform.", "A black dragon with yellow underbelly and wings is positioned on a circular black platform, flicking its long tail and flapping its wings.", "A large, black dragon with a brown chest and tail moves its wings and tail.", "A black fury with big wings, raises its head up and flaps its wings." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 199, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9d3a3e42c0054c35aa39c3ee07388d16", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A black dinosaur with a long tail walks straight and opens its mouth", "A black dinosaur opens it mouth wide while walking.", "A dark gray dinosaur walks with its mouth wide open.", "A long-tailed, black dinosaur walks straight and opens its mouth.", "A black dinosaur with a big tail walks on two legs and growls." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 200, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9d3be7ffd8e64e308aab640d21a4d7a1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An anime character with short blond hair, gray hands and feet, and a wagging tail, dressed in a yellow dress and brown pants, pretending to play archery", "A 3D model of cartoon cat girl with brown hair ,pink crop top and short throws her right hand pulling it back.", "A 3D model of a cartoon cat girl with brown hair, with long tail,wearing a pink crop top and shorts, extends her right arm and then pulls it back.", "An anime figure with short blond hair, brown pants and a yellow dress, gray hands and feet, and a wagging tail, posing as an archer", "An anime character with ears, yellow clothes with fur and a tail, opens his arms, puts them behind his back and imitates archery." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 201, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9d3f3674bcf849e099d1d0da4de33e40", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man with black hair moustach and beard, is wearing glasses, a black shirt with white stripes, black pants and shoes, walks straight and takes his phone out of his pocket.", "A 3D model of a man wearing blue checked shirt,black pant and shoes picks something from his pocket while walking straight.", "3D model of a short haired man wearing blue checked shirt, blaack trousers,black sunglasses and black shoes walking as he stretchs to pick something from his right side pocket.", "A man with a black beard and mustache, glasses, black pants and shoes, a black shirt with white stripes, and a straight gait pulls his phone out of his pocket.", "A respectable man with a short haircut, beard, glasses, shirt and trousers, walks straight ahead and takes a phone out of his pocket." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 202, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9d7e445a4b17475ea8a99c3e15020668", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a figure wearing blue and grey armour, has large blue eyes, turns her head to the right and then looks ahead.", "A 3D model of a blue metallic cartoon from Spiral Knight looks right and left", "A 3D model of a blue metallic cartoon character from Spiral Knights looks both to the right and to the left.", "3D model of a woman with big blue eyes, wearing blue and grey armour, turning her head to the right, and then looking forward.", "A robot character in a blue suit, with a helmet on his head and glowing turquoise eyes, looking around." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 203, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9d8ca2dca6ff4978a39545a8909d7b6c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man in a yellow rabbit costume with big eyes makes boxing movements", "A 3D model of a yellow rabbit standing with its hind legs doing boxing moves.", "A 3D model of a yellow cartoon rabbit standing on its hind legs, performing boxing movements.", "A man with large eyes and a yellow rabbit costume performs boxing moves.", "A man in a yellow bunny suit, with crazy eyes, a pink bow around his neck, showing boxing moves." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 204, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9d80b98ac3d54dffa3134a055430a302", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A skeleton of red muscles makes boxing movements", "A red plasticine human fighting.", "A red clay figure is engaged in combat.", "Red muscles on a skeleton move like boxing pins.", "A red-coloured alien creature with no skin, only muscles visible, showing boxing moves." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 205, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9d7279dd0cc442f99975bfd1acfbb3bb", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a white robot man walking straight ahead", "A white skinned cartoon is walking upright.", "A 3D model of an untextured male cartoon character walking straight.", "3D representation of a white robot man moving forward", "3d model of a white male robot that walks straight with a wide stride" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 206, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9d5567162cc44e70a3b6cb7279e09d1e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A red car with four doors that open and close in the same time", "A 3D model of a red saloon car opens its bonnet ,boot and front doors and closes again", "3D model of saloon car opening all its doors including bonnet and boot and closes them. ", "A red vehicle with four simultaneous opening and closing doors", "A red car stands there and opens all the doors, boot and bonnet at the same time and closes them again" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 207, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9dc47a6134964289af8ad3245183e118", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A pink fish with grey head, has a tappering form, opens and closes its mouth", "A 3D model of pink and white fish is swimming.", "A 3D model of a pink and white fish gliding through the water.", "A tappering fish with a pink body and a grey head opens and closes its mouth.", "A pink-coloured fish in the shape of an eel opens its mouth wide" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 208, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9e19b8cd9f924e92b947ba8cc42acc10", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A big man wearing black suit, a white shirt and a black tie and shoes puts his hands in his pockets", "A man wearing black suit with a black tie,white shirt put his hands inside coats pockets", "A 3D model of a man dressed in a black suit, white shirt, black tie, and black shoes, with both hands resting on his waist.", "A large man puts his hands in his pockets while sporting a black suit, white shirt, black tie, and shoes.", "a black and white man in a suit with a big belly stands up straight and puts his hands on his hips" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 209, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9e74e8d998c645239647819c3b425ad7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young man with brown hair, blue T-shirt, black pants and white shoes is doing push ups", "A 3D model of a young man wearing blue t-shirt,black pant and white shoes is doing press-ups.", "A 3D model of a man dressed in a sky blue t-shirt, black pants, and white shoes performing push-ups.", "A young man performing push-ups has brown hair, a blue T-shirt, black pants, and white shoes.", "A man with short brown hair wearing a blue T-shirt, black jeans, white sneakers, doing push-ups" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 210, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9fa8c07df4e1408cbb24bb48bc902fc2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A metal fan rotates from right to left on a brown wooden desk", "A gray fan placed on top of a brown wooden table rotates left and right.", "3d model of a gray fan rotating while placed on top of brown table that has black stands.", "3D model of a brown wooden desk with a metal fan rotating from right to left.", "a grey metal fan stands on a wooden table and spins the base in different directions" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 211, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9fc71f190a6645c89c8160bf5d8190c4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Children's game with blue, red and yellow circles on a wooden platform with three sticks. The circles change places towards other sticks. A white box with the logo is next to the wooden platform.", "A light-brown wooden platform with wooden sticks and three multi-colored circles that are moving from one stick to another.", "A white box displaying a logo is hanging next to a children’s board game featuring a wooden setup with wooden sticks has its blue, red, and yellow circles moving from one stick onto the next one.", "Children's game featuring three sticks and three circles—yellow, red, and blue—on a wooden platform. The circles swap positions on other sticks. Adjacent to the wooden platform is a white box bearing the logo.", "Children's game with sticks and circles on a wooden installation, near which there is a white box with the logo, circles of blue, red and yellow colour change their position to the next stick" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 212, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9fcdf6e8664e4011a03d26d68c2fa7b5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model yellow kangoroo with pink hair with two ponytails is jumping", "A 3D female kangaroo with pink hair jumping while running.", "A 3D model of a female kangaroo with pink hair leaping as she runs.", "A 3D model of a yellow kangoroo with two ponytails and pink hair is leaping.", "Yellow kangaroo in the form of a girl, with pink hair-tails on her head, jumps high" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 213, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9ff38817962b44ed95c75de191c3821e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A small red dragon with green eyes stands on a rock and moves its tail and wings.", "A pink Chameleon wagging its tail while breathes is standing on a gray rocks. ", "A 3D model of a tiny pink and red dragon with green eyes, standing on a rock and swaying its body.", "Perched ontop a rock, a small red dragon with green eyes wags its tail and flaps.", "A small red dragon with green eyes, sitting on a rock and moving its body" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 214, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/11d4334bf42b495e91128a050fe93ba2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a young blond man dressed in green suit and black shoes, wearing an eye mask, walking straight ahead and then stops and raises his left hand.", "A 3D model of a male with white hair,blue garment and black shoes walks then stops and waves.", "3d model of a man with white hair, hunter green suits, glasses , walking boldy then stops to wave.", "3D model of a young, fair-haired man wearing a green suit, black shoes, and an eye mask. The man is moving straight ahead, stopping, and raising his left hand.", "A man with white hair, emerald suit, glasses, walks confidently, stops and waves" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 215, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/12ad1547bf74421781481cec3dbf18ab", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A nun wearing glasses walks with a large hammer in her hand", "A 3D Model of female evil nun wearing black clothes holding axe with her right hand walking.", "A 3D model of a female sinister nun in black attire, carrying an axe in her right hand, as she walks.", "A scary nun strolls along holding a big hammer.", "An evil woman in the guise of a nun, holding a large hammer, walks menacingly forward" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 216, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/12b9fe42168a406c98ccf330b9998093", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A character of emerald color has long braided hair and a beard, very muscular, is shirtless and wears a kind of skirt, he hits his chest with his hand ", "A 3D model of a blue mascular man wearing taditional brown and white skirt beating his chest with his right hand.", "A game character dressed in blue, resembling a tall man with a ponytail and wearing an ethnic skirt, is striking his chest and yelling.", "An emerald-colored figure with long braided hair and a beard, who is also very muscular, wears a sort of skirt and is shirtless, hits his chest with his hand. ", "A blue-coloured game character in the form of a large man in an ethnic skirt with a ponytail on his head, beating his chest and shouting" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 217, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/12bd08d66fe04a84be446e583d6663ac", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D star wars character wearing white is dancing with his hands", "A 3D white cartoon star wars is dancing in patterns.", "3d cartoon wearing white Starwars Helmet-Anovos dancing with his hands.", "A 3D white-clad Star Wars character is making hand gestures while dancing.", "Star wars character white coloured dances with his hands" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 218, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/13fa2131ada14963bd095ea39fe39c02", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a brown dinosaur with long tail opens his mouth and moves his arm", "A 3D gray silver dinosaur standing with its hind legs ,mouth wide open moves its upper body up and down", "A 3D model of a brown and silver dinosaur standing on its hind legs, with its mouth wide open and its upper body moving up and down.", "A brown dinosaur with a long tail opens its mouth and moves its arm.", "3d model of a brown dinosaur that opens its mouth wide and moves its arms" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 219, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/14afeda06f604fd68940bb629a4235e9", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man in a long brown coat, black pants, black shoes, black gloves and a black beret is ready to fight", "A man wearing brown long coats,black cap and black boots performing boxing moves.", "A 3D model of a man dressed in a sleeveless long brown coat, black cap, and boots, as he performs box exercises.", "A man is performing box while dressed in a long brown coat, black jeans, black shoes, black gloves, and a black beret.", "Man in a brown trench coat with metal elements, with a visor on his head, boots, boxing on the spot" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 220, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/14cc06375f994f10bab87baf0cda2f55", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A green long crocodile jumps and opens his mouth", "A green crocodile picks something with its big mouth.", "3d model of an olive green crocodile running as it catches something.", "A long, green crocodile leaps and opens his jaws.", "A green crocodile with a long tail opens its mouth aggressively and steps forward" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 221, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/14f901dfbeda4779af0fca7e27123d66", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man in black clothes and a brown vest, wearing a black hat, is running at high speed.", "A army man wearing black cap,black shirt and jungle green pants and coat is running.", "A 3D model of a combat soldier dressed in a black cap, black shirt, green pants, and a jungle green coat, running.", "A man running at a fast pace is dressed in black clothing, a brown vest, and a black hat.", "3d model of a man in black overalls, wearing a bulletproof vest, wearing a black cap actively running" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 222, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/24b0ef19fb344e8aa1cc4e79e5a81564", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A grey remote control folds in two and jumps out", "A 3D model of a gray remote jumps folding and unfolding itself.", "A 3D model of a gray remote control that alternates between folding and unfolding.", "A grey remote control squeezes then pops out.", "Grey metal remote control with buttons, squeezes and jumps" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 223, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/30ddd6d6d8a6491d8b52975c19968099", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A large tree with light brown leaves stands on a large rock, with a swing hanging on th branch moving, and leaves are falling from the tree", "A 3D model of a orange fantasy tree swaying in the wind", "A 3D model of an orange and yellow patched fantasy tree swaying gently in the breeze.", "A big tree with light brown leaves is leaning over a large rock, and leaves are falling from the tree. A swing is dangling from one of the branches.", "A tree with orange leaves is standing on rocks with a swing on the branch, swaying, and leaves are falling" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 224, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/036bfedf83644943ae45cf42cf8c4b4e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model wearing a big black hat, black dress and black high boots walking straight", "A 3D model of a black cartoon girl walking .", "A 3D model of a cartoon girl wearing black dress and boots strolling.", "3D model walking straight in a black dress, black high boots, and a large black hat.", "3d model in a black suit with a skirt, with heels going straight up" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 225, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/41fb721bef284546a224cf612e446afc", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a metal-grey box opening from the middle and rising to the top ", "A 3D model of a gray tool box opens its lid ", "A 3D model of a gray toolbox lifting its lid.", "3D model of a gray-and-metal box that opens from the middle and the upper part rises to the top.", "2 metal rectangular elements detach from each other and levitate" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 226, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/47e1f4abe10e4ea9909708529b67a488", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a soldiers arms wearing blue sleeves, black gloves, holding a big gun and moving slowly", "A 3D model of a two half hands wearing blue shirt holding a black gun moves.", "A 3D model of two halves of a pair of hands, dressed in a camouflage blue shirt and holding a black gun, in motion. ", "3D model of a soldier's arms donning black gloves and blue sleeves, holding a large gun while moving slowly", "3d model of arms in blue camouflage with weapons, moves arms left and right" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 227, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/50b68816a4f44e01b6e00399137d25d8", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a wooden drawer that expands and closes", "A 3D model of a marron and brown drawers opens and closes.", "A 3D model of maroon and brown drawers that slide open and shut.", "3D model of a brown coloured wooden drawer stretching and pulling back.", "Brown coloured wooden pump extends and retracts" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 228, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/50e060a0d57e4e378a87d34256270fe8", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Grey metal box that opens and contains matches, a blue ribbon and a sticker on the box saying 'Survival is simple just don't die'.", "A 3D gray silver box with objects inside opens itself ", "A 3D model of a gray and silver box with contents inside that opens on its own.", "Self-opened grey metal box with a sticker that reads \"Survival is simple, just don't die,\" has objects inside", "Metal box with stuff in it, stickers on the lid, opens and closes" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 229, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/061f3043eceb4e988ca89cb27d687356", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a metallic object with a yellow stone standing in the air", "A 3D model of a green metallic object with a black basement", "A 3d model of a Ancient Rock Garden with black basement.", "A 3D model of a metal object featuring a yellow stone suspended in midair", "3D model of a metal construction with a hemisphere, inside with a levitating stone of yellow colour" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 230, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/97b8cc2493de46b9826691e9a7765df4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a brown wardrobe with multiple doors that open", "An empty brown wardrobe opens its drawers", "3d model of an empty brown wardrobe that is oprning its doors.", "3D model of a brown wardrobe with several doors that open in the same time", "3D wardrobe designer brown colour opens all its doors" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 231, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/98f52fa4dc3242adbbe6caf3101b64d4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Sony brand black cassette player with lid that opens and closes", "A 3D model of a black sony speaker opens wide and closes back", "A 3D model of a black Sony speaker that opens wide and then closes again.", "A black cassette player with an opening and closing lid manufactured by Sony", "Sony cassette player opens its lid" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 232, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/119c5e10733142b197aa53b86f6aeb04", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A grey car with five doors and the bonnet that opens all in the same time", "A 3D model of silver saloon car with red backlights ,opening its bonnet,front doors and boot", "A 3D model of a silver saloon car with red backlights, with its bonnet, front doors, and trunk opening.", "A grey car with five doors and a simultaneous opening bonnet", "The grey car opens all its doors, bonnet, boot" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 233, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/201b107b351c45288c0740797198606d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a character dressed in a silver-black jumpsuit with small orange dots, carrying daggers in his hands, dancing with his hands and feet.", "A 3D model of a game character wearing gray-brown garment dancing.", "3d model of a combat soldier cartoon wearing gray- brown uniform dancing all round.", "A 3D model of a man holding daggers in his hands and dancing with his feet and hands while wearing a silver-black jumpsuit with tiny orange dots.", "A tall man in a dark red suit with daggers on his hands dances with his hands and feet" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 234, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/216f855f4f9d432aa29e92c1d1b08378", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A green object with grey streaks that rotates ", "An evergreen mountain rotating.", "A 3d model of a Mountain Coaster caartoon rotating", "A rotating green object with streaks of grey ", "An elongated green coloured object with ribbed indentations rotates" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 235, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/433f5b2d4e1d4d1794d63079cf0d700e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of grey orange metallic object that moves in different directions", "A 3D model of a purple and orange object spins ,folds and unfolds.", "3d model of a orange grey robotic hand folding and unfolding.", "3D model of a metallic, grey-orange object that moves in various directions", "Metallic plant of purple colour with orange inserts, moves its base in different directions" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 236, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/823ca185916d48d0972f8694aceb3829", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a design consisting of several fruits and vegetables in different colors that rotate in a spiral.", "A 3D model of a multicoloured chrismas tree spins.", "3d model of an electric multicolored christmas tree rotating on its own axis.", "A 3D model of a pattern with multiple colorful fruits and vegetables rotating in a spiral pattern.", "3D coloured elements in the form of fruits, vegetables rotate in the form of a spiral" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 237, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/824cca31e9d747ea89f6c63e4d710d68", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a transparent-roofed house, with exterior walls in light blue, and interior walls in gray, in which a wall collapses", "A 3D model of a built house with light blue walls,green bed one side of wall falling down.", "A 3D model of a glass greenhouse with light blue walls and a green bed, featuring a purple wall as another wall collapses.", "3D model of a transparent-roofed home with light blue exterior walls and gray interior walls that has a collapsing wall", "3d model of a blue-coloured house with a transparent roof, in which the wall moves down and up" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 238, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/908b13271d7d48ac880df8b6f4ed1f30", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a secure door that opens from right, left and from the top ", "A 3D model of a door animation opening left,right and upwards.", "A 3D model of a gate animated to swing open to the left, right, and upwards.", "3D model of a metallic secured door that opens from the top, left, and right ", "grey metal unit with dark grey elements in the form of gates, opens side and top doors" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 239, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1361d022a4b3434bafec50440f5a748e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown metallic syringe is pressed on the handle", "A 3D model of a large metal syringe closes and opens its handle", "A 3D model of a large metal syringe with a handle that opens and closes.", "The handle of a brown metallic syringe is pressed.", "a metal old rusty syringe presses on the entrance handle" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 240, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/01782ab4a2994d0a89e58ef96e951958", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a blond woman wearing brown top and a long brown skirt, break dancing", "A 3D model of a princess with long brown hair and long dress doing karate moves", "A 3D model of a female cartoon character with long brown hair, dressed in a dress with a green wrap around her waist, performing karate moves.", "A 3D model of a blonde Gypsy breakdancing woman while wearing a brown top and a long brown skirt", "a woman with white hair, dressed like a Gypsy, break dancing" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 241, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2984a62bbc784315ac351359b3cfdb8d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Grey-Brown shotgun moving up and down", "A 3D gray silver gun with brown handle shoots", "A 3D model of a gray and silver gun with brown patches in the act of firing.", "A 3D model of a shotgun grey and brown coloured, swaying sideto side", "Metal shotgun with a wooden handle, levitated in the air and wobbled in different directions" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 242, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7743a10d48d74c8e814fa5b6759e1ddb", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A skull wearing a big black hat moving his head and jaw", "A 3D model of a scary skull wearing gray big cap is swinging its parts.", "A menacing skull adorned with a large gray cap, with its segments swinging about.", "A skull moving his jaw and head while having a large black hat on his head", "An evil skull with glowing green eyes, skewed jaw, wearing a pirate hat, wagging his head and jaw" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 243, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/25234b25da3a4bb8bcc2fa1c9d7ae726", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a wooden door openning and closing with an orange background", "A 3D orange gate opens its brown door and closes", "3d model of an orange brown gate opening and closing", "A 3D model of a wooden brown door openning and closing ", "3d mock-up of designer doors in orange colour with wooden inlays and monograms on top, open and close the door" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 244, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/58729c7672ae4e01a6103617c2f4f55e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a secure round metal door that opens ", "A 3D multi-coloured air lock door separates at the center one goes left and the other one right.", "A 3D model of a multi-colored airlock door that splits in the middle, with one half moving left and the other moving right.", "3D model of a secure round metal door that splits, one half goes left the other goes right", "round metal doors that look like safes open the doors" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 245, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/63152c5a03414a1e8cef4eee12f8e424", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A construction worker dressed in a red shirt and a black overall with a yellow helmet is dancing and squatting", "A 3D model of a engineer man wearing orange cap,red shirt black overal and black shoes is dancing .", "A 3D model of a construction worker dressed in an orange cap, red shirt, black overalls, and black shoes, dancing.", "A construction worker is squatting and dancing while wearing a yellow helmet, black overalls, and a red shirt.", "a man in a labourer's uniform, with a red shirt, yellow helmet, dancing, wiggling his hips, squatting and moving his feet" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 246, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/615975a697c0435e8d1546405394071c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A black shotgun with a brown wooden handle is pointing straight ", "A 3D metallic gun with brown handle point different directions", "A metallic wooden gun pointing straight forward.", "Pointing straight ahead is a black shotgun with a brown wooden handle. ", "black coloured shotgun with brown wooden inlays, with a round ammunition drum stands upright" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 247, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/642643c436914386b7f4c662246a9cd9", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D hand model dressed in black gloves carrying a gun removing bullets and putting them back in their place.", "A 3D model of a two half black hands holding a gun removes bullets and adjusts.", "A 3D model featuring two black hand halves holding a gun, removing the magazine, and making adjustments.", "3D hand model holding a gun while wearing black gloves, taking out and replacing bullets.", "3d model of hands wearing black gloves, holding a black weapon and reloading it" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 248, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/654756c010144568abf7060d1fd2a51d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of an earthenware vase, oval shape, turns in on itself ", "A 3D model of a brown pot spinning", "3d model of a light brown clay pot rotating.", "An oval-shaped earthenware vase rotating on itself ", "clay vase, round, wrapped in one place, turned in different directions" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 249, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/736021a46f1249e283583be47c1535a9", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a grey hairy cat with head down and tail wagging", "A 3D gray cat moves its head and tail downwards sleeping", "3D gray cat lowers its head and tail, settling down to eat something.", "A 3D model of a hairy, grey cat with its tail wagging and its head down to eat", "3d model of a fluffy grey-brown cat with a big belly eating something" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 250, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/844295d5d2b44ce2878df657f65961f7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a fish pond with blue walls, and brown ground, a fish swims next to a creature that looks like an animal that is lying down.", "A 3D model of a fish pond with blue walls and yellow fish swimming around animal like creature lying down.", "A fish pond with blue walls, where yellow fish swim around a creature resembling an animal that is lying down.", "3D model of a fish pond with brown ground and blue walls, a fish swims beside what appears to be a sleeping animal.", "a blue terrarium with a tree inside and sand with something flying in it" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 251, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9082454a51d74961a20e690a17392c2d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Half-circle wooden door that opens and closes", "A 3D model of a brown wooden semi-circle which splits into quarters", "A 3D model of a brown wooden semicircle that separates into four parts.", "A 3D model of a wooden door half-circle-shaped that opens in the middle", "Semi-circular wooden doors with patterns, opened" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 252, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/15532831b8704397b27517f42e64fa7c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a drawn cartoon character wearing a T-shirt and a Jean pants, is jumping barefoot and moving her arms", "A 3D model of a female jumping bare foot left and right side.", "3d model of a lady grey in color jumping with no shoes on.", "A cartoon figure dressed in a T-shirt and jeans is jumping barefoot and waving her arms.", "A cartoon woman in a white outline, jumping and moving her arms" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 253, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/98365778a9fb48f5bac4fe6a7667c621", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A cannon on a metal platform rotates from left to right", "A 3D model of metallic robot platform placed a canon on top rotates.", "3D model of a metallic robot platform with a cannon on top, rotating.", "A metal platform with a rotating cannon moves from left to right.", "A blue coloured weapon with two glowing sensors that stands on a square rig. The weapon spins in different directions" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 254, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/368857647d44434382ccdc5219682584", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a white structure unfolding on the top of a green platform ", "A 3D model of a white coloured object unfolds itself", "A 3D model of a white object unfolding.", "3D model of a white building that is unfolding atop a green platform ", "A 3D model of a white coloured object on a green platform unfolds by itself" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 255, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a3f110c0d2944a2dbff78709eb8f3984", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Three golden rings that intertwine as they turn in different directions", "A 3D model of a golden three rounded rings rotating all round.", "Three golden rings rotating in all directions.", "Three golden rings that merge together as they rotate in various directions", "Three gold rings that are intertwined rotating in different directions" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 256, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a19ee389bd624102919aa4a8cee37cdf", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a green and brown metal object that opens its mouth", "A 3D model of a green and brown gear of war grenade opens its mouth.", "3D model of a green and brown grenade , opening itself.", "3D model of an object made of brown and green metal that opens its mouth", "A metal object with a weave, with a green round tip, standing upright" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 257, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a34114416c314a24b247891781218553", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a female character with short hair, wearing a red croptop, grey jacket and grey pant, holding a gun in her right hand and moving it a little bit", "A 3D model of a female wearing red top,black jacket, brown baggy pant and black gumboots holding big lenses camera with the right hand. ", "3D model of a woman dressed in a red top, black jacket, brown baggy pants, and black gumboots, holding a large camera with her right hand.", "A 3D model of a short-haired woman sporting a grey jacket, grey pants, and a red crop top is holding a gun in her right hand and twitching it.", "Female game character with white hair, standing in a costume, with a gun in her hand, moving a little bit" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 258, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/adda0d31898c4fcd82f3ac8e12ae987e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3Dmodel of a white photo frame with the image of a little girl pulling a suitcase. the frame falls on its side.", "A 3D model of a flame portrait of a black girl picture with suitcase rotates", "A 3D model of a flame-themed portrait of a black girl, with a suitcase, rotating.", "3D model of a white picture frame featuring a young child pushing a suitcase. the frame topples over.", "The frame is white, the image turns and falls on its side" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 259, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ae6a7424569b4e30b09bb18f95728213", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a brown metal box with a blue roof. a large window appears and a sliding wooden door opens. ", "A 3D model of a multicoloured house opens its brown wooden door.", "A 3D model of a multicoloured structure like a house opening its brown wooden door.", "A 3D model of a brown metal box with a blue roof, and wooden sliding door opens.", "grey metal box with a square white grid, with a wooden floor inside that moves to the right" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 260, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e392c51356f24febb61ae27d1864628c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A black and white MIDI keyboard controller with red and green lights, is playing music.", "A 3D model of a black and white piano,with red light keyboard,is playing music", "A music keyboard with black and white keys, featuring multiple red pads and controls, is generating sound.", "A musical keyboard with white and black keys, along with several red pads and controls is playing music.", "3D synthesiser in black with red glowing buttons.Push buttons" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 261, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e418a9d5d1d24a058acbd81cbc854b55", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a two-part gear assembly where one of the parts is rotating.", "A black and white two separate series gear where the front part is spinning", "A 3D representation of a black-and-white mechanical gear system with two distinct components, one of which is spinning.", "3D model of a white and black mechanical gear assembly of two separate parts, one of which is rotating.", "grey metal mechanism, with black inserts, with a gear that rotates around the axis" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 262, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e591fc62545a455f917d86f94bc2bfe5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a NASA astronaut suit with a helmet and an American flag patch on the sleeve, where the front side of the helmet moves up and down", "A white astronaut uniform with American flag on the left shoulder,the face helmet moves downwards", "A 3D depiction of a white space suit featuring an American flag patch on the shoulder, with the helmet's front visor closing.", "3D model of a white astronaut suit with an American flag patch on the shoulder, where the front of the helmet is closing up.", "3D spacesuit model with the US flag on the arm, on which the helmet protector opens" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 263, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e598b89f5ba54a11bdbb7441fb761f9f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of an old man with glasses wearing a black suit with a red necktie is dancing.", "An old man with glasses wearing black suit ,white shirt and a red tie is dancing", "A 3D representation of an elderly man with spectacles, dressed in a black suit and red tie, dancing.", "An old man wearing glasses, a black suit and a red tie is dancing.", "An old man with grey hair, in a black suit, dancing with his hands and feet" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 264, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e693a7b4f7dc49a3aaee4048f986ed14", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A realistic 3D shell with a slightly curved shape, light beige on the outside and darker bluish-gray on the inside, is opening up.", "A 3D model of a beige oval shaped-like shell opens up", "A lifelike 3D seashell with a gentle curve, light beige on the exterior and a deeper bluish-gray on the interior, is unfolding.", "3D model of a curved beige shell is opening up.", "3D model of a white coloured seashell, which opens and has an inscription glued on the inside" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 265, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e804b9405dc74f0bb96c732fca946076", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a muscular, monster-like character with rocky, cracked skin, wearing torn jeans, purple tank top and boots, is standing still with his chest slightly moving with steady breathing.", "A 3D model of a muscular monster ,wearing purple vest,gray pant and boots standing with chest out breathing", "A 3D depiction of a brawny, creature-like figure with rugged, fractured skin, dressed in ripped jeans, a purple sleeveless shirt, and boots, standing motionless as his chest rises and falls with controlled breaths.", "3D model muscular monster character with yellow cracked skin, wearing a ripped purple tank top, torn jeans and boots, is standing still and breathing.", "The game character is big with wounds all over his body, white eyes, aggressive breathing" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 266, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e2285f2f17fc4cc9a8fc2010ad85e794", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Red, wheeled mechanical device with two wheels at the base and a small supporting leg, is swinging and its upper part slightly bounces up and down.", "A red two wheeled vehicle with a small front wheel ,is raising up its top part up and down", "A crimson, wheeled apparatus with a pair of wheels at the bottom and a tiny stabilizing leg is swaying, with its upper section subtly bouncing.", "3D model of a red log splitter where the upper part is moving up.", "3D model of a red-coloured wheeled machine that swings up and down by means of a rod" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 267, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e049593432534868a2be22868a3fd76a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A 3D model of a living room with a TV, black sofa, whit a coffee table, a modern fireplace and three white triangle shelves with a black ball on top that suddenly falls to the floor, making the shelves wobble.", "A structure with white equipments and shelves, one of the shelf is placed a black ball on top which falls down kicking another white object falling down also.", "A setup with white apparatus and shelves, where a black ball placed on one shelf rolls off, triggering another white object to fall as well.", "3D model of a modern room with gray walls, wooden floor and minimalist furniture, and a black ball that suddenly falls from a set of triangular shelves, making them swing.", "3d model of a game room with a fireplace, paintings, game console. From the paintings rolls a heavy metal ball, and knocks down the tube" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 268, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/eb410b7de4964bc49a1aa76b1c8bf8a9", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D black mechanical device supported wide legs, with detailed patterns, claw-like extensions, and a small purple element at the center, with its top sections appearing to open up.", "A 3D model of a gray machine with its upper top part splitts into two at the center one moves left and the other right", "A 3D black mechanical apparatus with broad supports, featuring intricate designs, claw-shaped appendages, and a small purple component at the center, where the upper parts seem to be unfolding.", "3D model of a black patterned machine with wide legs, the upper parts split to open.", "3D model of a black metal structure with sharp tips on top. The upper parts can be slid to the left and right" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 269, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ed092246bca44978add6e33971389fc1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A black round emblem with a golden silhouette of an angel on both sides, where on one side the silhouette has bat wings and holds a scythe, while on the other side the figure have angel wings, and the emblem is bordered on both sides with the phrase \"Dove loro parla, ogni lingua tace\".", "A round black and golden badge logo ,with gold dove drawn at the center rotates all round", "3d model of a black, double-sided circular emblem; one side features a golden figure with bat wings and a scythe, while the other displays angel wings, encircled by the phrase 'Dove loro parla, ogni lingua tace.", "A black round double-sided badge, one side shows a golden silhouette with bat wings and a scythe, the other side has angel wings, with the phrase 'Dove loro parla, ogni lingua tace' around the border.", "3d model of a gold coin, with a black seal, the image of death with a scythe, with wings and inscriptions.Spins around the axis in one place on the same place" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 270, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/fb4543fef88a466a989f4e11b8820db3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a colorful patterned sphere floating on a circular base, with a matching patterned vertical panel and a cone-shaped attachment; the sphere swings vertically along with the cone and interacts with the vertical panel as the base is spinning.", "A 3D multi-colored patterned sphere rotates on a spinning base and is linked to a cone and a vertical panel, all featuring the same pattern. The sphere moves up and down as it spins.", "3D decorative orb on a rotating stand, linked to a cone and aligned with a vertical panel, both bearing identical designs, with the orb oscillating up and down.", "3D patterned sphere on a spinning base, connected to a cone and facing a vertical panel, both with the same pattern, where the sphere moves up and down.", "3d model of the construction, with a wall, a triangle in the air, a circle in the air, and all these elements in multi-coloured print, with numbers under the balloon white installation with a black inset.The circle and the triangle move simultaneously up and down, From the sides of the circle fly out elements" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 271, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/fc6a07fe83cd482aa20adbfe833441c8", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a green rectangular box with an open top, featuring a rectangular opening on one side and a silver robot figure standing inside the box, waving with its arms raised alongside a tall white pipe, while a small pink shell moves toward the open side.", "A green rectangular box with an open window one side ,inside the box is a robot sitting raising its both arms waving continously", "3D depiction of a green container with an open top and a side cutout, featuring a silver robot lifting its arms beside a tall white cylinder, as a small pink object moves toward the side cutout.", "3D model of a green box with an open top and a side cut-out, in which a silver robot raising its arms next to a tall white tube, while a small pink element moves towards the side opening.", "A green room with a window without a door. In the room there is a wooden cabinet, a fireplace and a robot. The robot is waving its arms upwards" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 272, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ff1d25ded4fd4f4e8ce1694f3fdaa8ed", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A black wire mechanical structure with several coils and loops, mounted on a wooden base, as the wire rotates the components of the structure shift positions, with the coils and loops swinging.", "A 3D model of a black marble machine structure with multiple coils and loops rests on a wooden base. The model demonstrates physical movement mechanism when it is rotated using a handle.", "3D representation of a black wire structure with multiple coils and loops, mounted on a wooden stand. The model demonstrates physical movement when turned using a handle.", "3D model of a wire structure on a wooden base, with interconnected loops and coils that move sequentially.", "3D model of a black wire construction with several coils and loops that stand on a wooden base,The model shows the physical movement when rotated by a handle" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 273, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dddbd4087b2941c585ed4c9f343ae2ba", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a pink cartoon starfish with big eyes, wearing green shorts with purple floral pattern, smiling and dancing.", "A 3D model of a cartoon from patrick star with big eyes,wearing green and purple short dancing left and right", "3D depiction of a black wire framework with various coils and loops, set on a wooden base. The setup exhibits movement when rotated with a handle.", "3D model of Patrick Star from SpongeBob SquarePants cartoon, is dancing.", "A 3D model of a Sponge Bob character, Patrick, wearing yelow beach floral-print shorts, is smiling and dancing a happy dance." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 274, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/de3c8c1ed7ab471790d806d6f0e35d72", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a human figure holding a purple and blue sword in both hands, pointed upward, while walking in a combat stance.", "A 3D model of a human figure in a combat stance, wielding a purple and light blue sword with both hands held high. The figure is depicted as if in mid-step, ready for battle.", "3D illustration of a white-colored figure carrying a purple sword, moving in a slow, sideward stride.", "3D model of a white human figure holding a large purple and blue sword with both hands, moving in a fencing position.", "3d model of a white coloured character holding a purple sword, walking in a slow sideways gait" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 275, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/deccf817a75647b8b5146bc0d50e8ae5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a modern streamlined train in blue and gray colors standing with passengers waiting next to it.", "A 3D model of gray and blue train ,stopping with passengers standing next to it", "3D depiction of a sleek, contemporary train in blue and gray, with passengers waiting beside it.", "3D model of a gray and blue streamlined train standing with people next to it.", "3d model of a grey train, with a blue stripe, which stands on a grey platform, on which stand people" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 276, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/df94fc680c404ffeb0a7b864958600a5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a white cartoon dog with black ears, a red collar, and a black spot on its back, walking with a cheerful expression.", "A white cartoon with black ears and its back and red neck walks jumping happily", "3D representation of a white animated dog with black ears, a red collar, and a black patch on its back, trotting happily.", "3D model of a white cartoon dog with black ears and black back wearing a red collar, is walking happily.", "A cartoon character of a white coloured dog with black ears, a black heel on his back, a smile on his face, walking straight with a cheerful gait" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 277, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dfa0e22750b44e3f8ff856d62f05c329", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a mechanical device with large blue rotating wheel and a gray frame, mounted on a black base.", "A 3D model of a mechanism featuring a light blue wheel mounted on a gray assembly with a handle, all set on a black platform. Turning the handle makes the wheel rotate in a circular motion.", "a mechanical apparatus featuring a large blue spinning wheel and a gray frame, all set on a black base.", "3D model of a machine with a blue wheel, mounted on a gray frame with a black base, the wheel is connected to a crankshaft making it rotate.", "3D model of the mechanism with a turquoise wheel, grey setting with a handle, on a black platform. The handle turns the wheel and it rotates in a circle." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 278, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dfe0231a096d48e0b8e5e3d0ba3de1bb", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a female character with black hair, wearing a layered black dress with off-the-shoulder sleeves, tights, and heeled boots.", "A 3D model of a woman with black hair,wearing black off shoulder circular dress and black high heels boat standing still", "3D depiction of a female figure with dark hair, dressed in a tiered black gown with off-the-shoulder sleeves, tights, and high-heeled boots.", "3D model of a girl with black hair in a low bun, dressed in a dark, layered gothic dress, black thigh and black stockings, is standing still.", "3d model of a girl with black gathered hair, black dress, high heeled boots" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 279, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dfe277b5639a45289a36412c84465248", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a female character with long brown hair and a brown shiny accessory on the back of her hair, wearing a black top, denim shorts and dark tights with blue shoes, is sitting on a wooden bench, swinging her head and legs.", "A 3D model of a girl with long hair,black top,denim short and shoes sitting on top of a desk swinging her legs and head", "3D model of a woman with long brown hair and a shiny brown hair accessory, dressed in a black top, denim skirt, dark tights, and blue shoes, sitting on a wooden bench and swinging her head and legs.", "3D model of a girl with long brown hair, wearing a black t-shirt, jeans shorts, dark stockings and blue shoes, sitting on a wooden bench while swinging her head and legs.", "A girl with long hair, wearing a short grey t-shirt, blue shorts, black tights, sitting on a black table and wiggling her legs up and down and her arms" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 280, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e0fcd97d07034656ad4e9baac7114e43", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of an armed male character with a blocky design, wearing a dark jacket, blue pants, and black boots, holding and reloading his rifle.", "A 3D model of man robot with glass,wearing gray jacket,red shirt,navy blue pant and black shoes holding a gun loads and unloads it.", "3D model of a blocky armed man in a dark jacket, blue pants, and black boots, reloading his rifle.", "3D model of an armed blocky male character with black hair and glasses, wearing a gray jacket, blue pants and black boots, is standing and reloading his rifle.", "3d model of a robot in the form of a man, with black hair, glasses, jacket and jeans, holds a weapon in his hands, and reloads it" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 281, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e0fed94af7104e5e8e72e0755042619d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of an ugly monkey with brown color and a lighter brown face, belly, hands, and feet, standing on its feet and dancing.", "A 3D model of a brown cartoon monkey with beige belly,face,hands and leg,standing with its hind legs dancing.", "3D model of a brown monkey with a lighter face, belly, hands, and feet, dancing on its feet.", "3D model of a brown cartoon monkey with light brown face, belly, hands, and feet, dancing on its feet.", "The monkey is brown in colour, with beige spots in the front, stands on two legs, dances actively with his hands and feet" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 282, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e1a34e8d4f7c47bebc1f61380c211f68", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A green and yellow fish with black spots along its sides, moving its tail horizontally", "A 3D model of yellow fish with black spots swimming as it wags its tail", "A green and yellow fish with black spots along its sides, wagging its tail horizontally.", "Realistic 3D model of a green and yellow fish with small black spots, swimming and moving its tail horizontally.", "3D model of fish green colour,With yellow spot,small spots on the body,swims calmly straight ahead" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 283, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e2e70102881c4d7491b793e5b3c0f53c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of the Irish flag fluttering in the wind.", "A 3D model of a Iranian flag with orange ,white and green fluttering in different directions", "3D model of the Iranian flag waving in various directions.", "3D model of an Irish flag waving in the air.", "3D model of the Iranian flag, which wriggles in different directions" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 284, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e4bffd85e16b4c87b38e767c9f6dcc10", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a deer character in a pixel art style, wearing a white shirt and a blue vest, is standing on its feet and holding a yellow disk with its left hand.", "A 3D model of a cartoon deer with long horns,wearing white t-shirt,blue vest and brown pant standing holding a yellow chip coin with the left arm.", "3D pixel art deer character in a white shirt and blue vest, standing and holding a yellow disk in its left hand.", "A pixeled deer cartoon character with a white shirt and a blue vest, is standing still and holding a yellow round chip with its hand.", "3D lego model of a deer holding a flat circle and looking at it" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 285, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e5e1cfb0ca204dcc9b0cc67e345683a8", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a woman with short black hair wearing sunglasses, dressed in traditional clothes including a patterned skirt, shoulder wraps and yellow boots, performing a traditional dance.", "A 3D model of a woman with glasses,wearing tradition brown and white wraps yellow and black boots breakdancing", "3D model of a woman with short black hair and sunglasses, in traditional attire and yellow boots, performing a dance.", "3D model of a woman with short black hair and sunglasses, wearing traditional brown and white costume and yellow boots, is dancing slowly.", "A dark-skinned woman with a short haircut, wearing a traditional costume, stands upright and dances while making fluid movements with her hands" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 286, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e5e952b1d18d48208dbebb76bbe2c54d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D Model of a green, reptile creature with a long tail, spikes along its back, and antennae on its head, standing on a square, textured platform.", "A 3D model of a green dragon with spots,standing with its hind leg on a gray platform wagging its tail.", "3D model of a green reptile with spikes and antennae, standing on a textured square platform.", "3D model of a green reptile with spikes and antennae, standing on a square textured platform and slowly moving its tail.", "3D model of a green dragon, with spots, a large tail, standing on a flat floor with his arm at his side and wagging his tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 287, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e5e5048f3cef49158d2619252204fc0b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a cute fantasy themed female character with lavender hair styled in a large bow, dressed in a yellow gown dress with pink and blue straps, holding a pink and gold umbrella, she closes the umbrella, jumps and turns while throwing the umbrella in the air, then catches it.", "A 3D model of a multi-colored cartoon princess holding a pink umbrella wide open,turns and throws it in the air,picks it up ,folded", "3D model of a fantasy female character with lavender hair in a bow, wearing a yellow gown, pink and blue straps, holding and tossing a pink and gold umbrella, then catching it", "3D model of princess character with purple hair, wearing a yellow dress, closes her pink umbrella, spins, throws it in the air, and catches it.", "a female character with white hair, a gold suit, a pink umbrella in her hands, spins it round, tosses it up, and picks it up, making a U-turn" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 288, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e6b53f09a6cd441fa99ac84ae18bdbc6", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a ferocious, dark dinosaur, moving in a menacing way and opens its mouth.", "A 3D model of a gray dinosaur wagging its tail moving in a scary way defending itself while opening its mouth", "3D model of a fierce dark dinosaur, moving threateningly with its mouth open.", "3D model of a scary black dinosaur that opens its mouth and moves aggressively.", "A dinosaur, black in colour, hits something with a twist of its tail aggressively several times" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 289, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e8ca2615b4304c3bacb780b0685d8a05", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a green winged, mantis-like creature, moving its sharp arms in an attacking position.", "A 3D model of a green cartoon looking like mantis ,with wide wings moving its arms defending itself", "3D model of a green-winged, mantis-like creature, raising its sharp arms to attack.", "3D model of a green cartoon praying mantis standing and defending itself with its sharp claws.", "Green cartoon dinosaur with large wings, two sharp dagger-shaped arms, defending itself and waving its hand." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 290, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e8e528158fee44d3bc681410df516e5f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a female character with short silver hair, wearing a blue crop top, black leggings, and blue sneakers, is exercising.", "A 3D model of a female with silver short hair,wearing blue crop top ,black trouser with white stripe and black shoes performing capoeira moves", "3D model of a silver-haired female in a blue crop top and black leggings, working out.", "3D model of a woman with short grey hair, wearing a blue crop top, black pants and dark blue shoes, doing a light aerobic workout.", "A woman with short white hair, wearing a blue short top, black tights, making chaotic movements, putting her arms out to the sides and pacing" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 291, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e09cd67ac6384a25bca6a8d28408f203", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a female character with long brown hair tied up in a high ponytail, wearing a fitted blue and black jumpsuit with red straps and gray shoes, standing in a relaxed position.", "A 3D model of a female with brown long hair, wearing black and blue sport clotes with red stripes and brown shoes standing breathing.", "3D model of a long brown-haired female in a blue and black jumpsuit with red straps, standing casually.", "3D model of a girl with long brown ponytail hair, wearing a blue and black jumpsuit and gray shoes, is standing still.", "A woman with long brown hair in a ponytail, wearing a tight-fitting blue suit with black inserts, standing still and breathing" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 292, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e20c75d76db347d993ab8876c1fddd49", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A red balloon attached to a gray mechanical setup, with a hose connected to it and a vertical stick next to the balloon; when the stick moves down, the balloon inflates.", "A 3D model of a red balloon connected to a gray mechanical setup with a hose,there’s a vertical stick next to the balloon, and when the stick is pushed down, the balloon inflates.", "Red balloon connected to a gray machine with a hose and a stick; when the stick lowers, the balloon inflates.", "A red balloon attached to a grey machine by a tube, the balloon inflates when the nearby stick moves downwards.", "A white coloured machine with a balloon pump that inflates a red coloured balloon, which is attached to a pipe" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 293, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e21a5211b1634d14938b911a1cd430c4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a robotic character with a rugged industrial look, in yellow, black and red colors, standing with its legs wide apart and moving its arms slowly.", "A 3D model of a yellow and red metallic robot,standing with legs wide open moving its arms in patterns ", "3D model of a rugged yellow, black, and red robot standing with legs apart, slowly moving its arms.", "3D model of an old yellow, black, and red robot standing with legs apart, slowly moving its hands.", "A yellow robot with red elements has taken a martial arts stance and is making movements with its arms" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 294, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e39ec3c45acc40b79db8bf133e1dfc77", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A small tree with a thick trunk, several branches and green leaves, planted in a brown pot, one of the branches breaks and falls into the pot.", "A 3D model of a growing tree in a brown pot with soil, one of its green branches breaks and fall inside the pot", "A small tree in a brown pot has a thick trunk and green leaves. A branch breaks off and falls into the pot.", "3D model of a small green tree in a brown pot, with a branch breaking and falling inside pot.", "3d model of a tree in a brown pot, from which a branch fell into the pot" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 295, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e59ea16e52254c6189e3759dfce4dc93", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a pterosaur with brown wings, gray neck and a red bump on the head, flying with its mouth is open.", "A 3D model of a brown Quetzalcoatlus,with big wings and mouth wide open is flying", "3D model of a pterosaur with brown wings, a gray neck, and a red crest on its head, soaring with its mouth ajar.", "Realistic 3D model of a brown-winged pterosaur flying with its mouth open.", "Pinkish-brown flying dinosaur, opens its mouth and flies with its wings" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 296, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e95aac5be4fb40b3abc66eca0a59c5ab", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of an old-fashioned rotary telephone with a curved dark green cylindrical case, a rotary dial, a brown base and handset attached to the case with a coiled wire, where the rotary dial rotates anti-clockwise.", "A 3D model of an old-fashioned rotary phone with dark green, curved body, a rotary dial on top, a brown base, and a handset connected by a coiled cord, the dial turns counterclockwise.", "3D model of a vintage rotary phone with a dark green, curved cylindrical body, featuring a rotary dial, a brown base, and a handset connected by a coiled cord, with the dial turning counterclockwise.", "3D model of a vintage rotary phone with a dark green curved housing, a brown base and a handset, with a rotary dial that rotates in an anti-clockwise direction.", "A horn-shaped, green-coloured telephone with a round insert at the end, at the bottom of which lies a handset on a stand. The numbers on the insert turn around" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 297, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e98b692369f44fc7af0f28846d8e0488", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a grey moth, with large, patterned wings and six legs, is moving left and right on its legs.", "A 3D model of a gray and brown moth with spots,moves its legs right and left", "3D model of a grey moth with patterned wings and six legs, shifting side to side.", "3D model of a grey moth with large black-patterned wings and six legs, shifting sideways.", "A grey-brown insect with patterns on its large wings and large legs, walking from side to side swaying" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 298, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e318f536fd0249efae3b5e30c2a13159", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a medieval knight wearing chainmail, a white tunic with a red cross and a belt, stretching out his arms.", "A 3D model of a man wearing gray overall and white tunic with a red cross infront standing stretching his arms out ", "3D model of a medieval knight in chainmail and a white tunic with a red cross, extending his arms.", "3D model of a knight in a grey chainmail, a white tunic with a red cross, and a sword on his black belt, is stretching his arms out.", "A man in an ancient outfit, with a long white dress on top, a sword on his belt, stretching his arms in all directions" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 299, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e361e8af37d943d599de55785585b51f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a soldier in a black armor, including a helmet, vest, and equipped with a firearm, walking cautiously.", "A 3D model of a soldier wearing black uniform holding a gun walking slowly looking around fencing.", "A soldier in black armor and helmet, carrying a firearm and walking carefully.", "3D model of a soldier in black armor and helmet, holding a gun and walking carefully while looking around.", "A man in a black uniform, helmet, gun in hand, walking and looking around" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 300, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cc4eef17bff24cb58358cfa1b7e045a4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of fusifrom shaped silver fish moving its tail", "A 3D model of a gray silver fish swimming while wagging it tail.", "A 3D model of a streamlined silver fish swimming with its tail swaying back and forth.", "A fish with green skin and black dots with a white belly is moving its tail left and right..", "3d model of a long green coloured fish in a point, swimming and wagging its tail" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 301, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cc63f3c02d46486fb1243e3c06072a94", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a woman dressed in a black and gray jumpsuit who wears black heels and has her black hair tied back, her arms are raised at first, then she walks diagonally.", "A 3D model of a lady with long black hair,maroon and brown jumpsuit and high heels boots catwalking.", "A woman with her long black hair pulled back, wearing a maroon and brown jumpsuit paired with high-heeled boots, strides diagonally in a catwalk manner.", "A dark brown haired woman wearing a maroon and dark beige leather suit and black heeled boots takes three steps forward, two to the right, then two last ones to the left.", "A woman with black hair in a ponytail, burgundy suit with beige inset, arms out to the sides, arms down, and walks with a left to right bend" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 302, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cc341b971b704c5ebdc4a0a627917ef6", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a man wearing a gray and gold suit with a microphone headset on his head, waves his right hand, stops then start waving with the same hand ", "A man with headphones,wearing gold and white suit and brown shoes waves,stops and then waves again.", "A 3D model of a man in a gray and gold suit with a headset, waving his right hand, pauses, then waves again.", "a man wearing headphones, a gold and white suit and gold shoes waves, lowers his hand, then waves again.", "A man in a gold jacket, white headphones, white trousers, standing straight and waving his right hand" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 303, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cc521221251a4c87b0cb429f98de882c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a brown and black driller, held by a metal stand, has a rotating drill bit ", "A 3D model of a brown and black bottom driller with a stand.", "A 3D model of a brown and black bottom driller mounted on a stand, featuring a rotating drill bit for enhanced functionality.", "3D model of a yellow and black construction machine an a black frame with a black rotating cylinder at the bottom.", "3D model metal construction similar to a conveyor with generator connection" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 304, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ccb34ca411cb4a098f58b7e74be77bac", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a black dragon flying, moving its wings and tail", "A 3D model of a brown dinosaur flapping its wing while flying.", "A 3D model of a black dragon soaring through the air, with its wings and tail in motion.", "3d model of a dark red dragon beating its large wings while flying.", "A black Valkyrie with an aggressive look, levitating, flapping her wings" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 305, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ccde5dc68c9746b39378eb39c7ee0ed7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A nun wearing black-framed glasses and turning her head right then left", "A nun wearing glasses and black clothes looks around", "A nun with black-framed glasses, looking to the right and then to the left.", "A nun wearing glasses turns her head to her right then to her left.", "A woman with glasses in a nun's costume stands up and turns around" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 306, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cd2a2d7fe1134a40a4fe65eb422fe736", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A black eagle with a gray head and tail flies by flapping its wings.", "A black eagle with white head and tail flapping its wings while flying.", "A black eagle with a gray head and tail soars through the sky, flapping its wings as it flies.", "3d model of a bald eagle beating its wings in the air.", "3D model of a flying eagle" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 307, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cd3dff9f2ebe4dc8a8e3644e5266b210", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a black projector that rotates and emits a red light", "A 3D model of a black smart lighting rotates", "A 3D model of a black projector that spins and projects a red light.", "3D model of a black movement detector emitting red lasers.", "Black coloured spotlight, with a spotlight that moves in different directions, with red beams of light" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 308, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cd4d902eec314a419adde15d9e985877", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown wooden cutter with self-retracting blade", "A knife with yellow handle folds itself", "A brown woodchopper that automatically retracts.", "3D model of a utility knife retracting into itsself.", "yellow stationery knife, blade sticking out" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 309, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cd5f0d68b94a4afc8a0513a0f2680914", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a blue cartoon character, large in size, with antennae, wearing a black dress with red sleeves, sunglasses, who reads something on a wooden panel then falls to the ground crying.", "A big cartoon with antenna wearing black vest ,on red sweater shirt standing infront of a board falls down lifelessly", "A 3D model of a large blue cartoon character with antennas on his head, dressed in a black dress with red sleeves and sunglasses, examines a wooden panel, then falls to the ground in tears, collapsing in front of the panel.", "3D model of the character king kai from dragon ball Z laughs while looking at a wooden board, then falls on his back from laughter.", "A round creature with a long black moustache, in a black suit, with yellow inserts and red, praying by the table, and falling, gasping for breath" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 310, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cd690fd75e0e4e67989c7f194f975247", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a square-format cylinder in different colors, with a soccer ball in the middle, and the cylinder rotating in a spiral.", "A 3D model of a twister,with red,gray,brown,yellow and black colors spinning.", "3D model of a colorful square cylinder with a soccer ball inside, rotating spirally.", "brown, black and yellow squares are spinning around a dark pyramid with a soccer ball within it.", "Construction of 6 curved squares, with an object inside, rotating clockwise in different directions." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 311, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cd0235388b9a4f2eb2c19d19ee5cc05d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a soldier in silver-brown metal armor holding a long sword in his right hand", "A 3D model of an ancient soldier wearing a garment,boots and helmet holding a sword", "3D model of a soldier in silver-brown armor holding a long sword.", "a soldier wearing a gold and gray helmet and body armor with spiky shoulder protectors, black pants and black boots is standing at ease while holding a katana.", "A character in an ancient military uniform, sword in hand, standing still and breathing" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 312, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ce2a6a9fbc544bd180dc067a7fe2d94d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Azerbaijan flag that moves with the wind", "A 3D model of azerbaijan flag with light blue,red and green colors being blown by wind", "3D model of the Azerbaijan flag in light blue, red, and green, fluttering in the wind.", "3d moder of the Azerbaijan flag waving in the wind.", "3d model of Azerbaijan Fraga,blue,red,green colour in the middle with logo,develops from the wind." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 313, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ce6cd84906ea4c95943ea647d90ef887", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a brown paper folded into the shape of a pinecone that falls as it spins", "A 3D model of a orange folded paper rolling down.", "3D model of a spinning brown paper pinecone falling.", "low poly model of an orange pine cone rolls and floats in space at irregular paces.", "a gold-coloured cone wrapped around an axis" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 314, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ce50fe2e6a654a3bafab950c0f101e59", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a moving gear knob starting from the right, passing through four corners and then returning to its initial position in the middle.", "A gray gear with yellow battons changing gears.", "3D model of a gear knob moving from the right, through four corners, and back to the center.", "3D model of a gray gear know with yellow buttons moves clock wise in a square pattern.", "Grey upper part of the gearbox with orange buttons, shifts gears" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 315, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ce191e0410294ccfadf1f105b51874db", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a gray-faced figure wearing a yellow sweater and boots, black pants and a black helmet, lying on the ground, then jumping up and getting to his feet. ", "A cartoon with black helmet,yellow sweater,black trouser and yellow boots lying down with back ,jumps while standing with it two legs and throws it hands.", "A 3D model of a gray-faced cartoon in a black helmet, yellow sweater, black pants, and yellow boots lies down, then jumps up, standing on two legs and waving its arms.", "A robot wearing a yellow sweater, black pants, yellow boots and a black helmet with glasses laying on the ground does a kick up, then assumes a fighting stance.", "3d model of a toy character in a yellow suit, with a helmet, in a lying position jumps and stands on his feet, does somersaults" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 316, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cf76d8c5375b4d168b879a6e0bfe5433", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of arms with gray sleeves carrying a gray pistol that loads it, fires two bullets, then unloads it.", "A pair of hands wearing gray sweater are holding a gray gun and shoot.", "3D model of arms in gray sleeves handling a gray pistol, loading, firing two shots, and then unloading.", "A pair of arms in gray long sleeves loads a gun, takes two shots, then unloads the gun.", "3D model of the hands that hold the gun, reloads it and shoots it" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 317, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cfaf110f50054a15abc33e7d7fe109f4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young sportsman wearing a yellow jersey with the number 30 on it, black shorts and black shoes, has his knees bent and both hands raised.", "A 3D model of a male wearing sports t-shirt and black short and shoes bends ready to play.", "A young athlete in a yellow jersey with number 30, black shorts, and shoes, bends his knees and raises both hands.", "A soccer goalkeeper wearing a yellow tshirt over black long sleeves, yellow gloves, black shorts with the number 30 on them and black socks is anticipating a shot.", "3d model of a man in a yellow t-shirt with a print, black shorts, in the form of a football player, stands in defence" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 318, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d0bd82b23fa74569ac47c11bb240928a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a yellow emoji with closed eyes that sticks out its red tongue and moves from right to left", "A ball-like shaped yellow head ,closing eyes with mouth wide open tongue out moves right and left.", "A yellow ball-shaped head with eyes closed and mouth open, sticking out its tongue, moves right and left.", "3D model of a laughing emoji with closed eyes bounces around as it sticks its togue out.", "A yellow balloon with a smiley face, bobbing and sticking out its tongue" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 319, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d1ca4cc2070743979a68757788abc1e1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young man with long blond hair, wearing baggy clothes and a grey cap on his head, dances hiphop.", "A 3D model of a man with purple cap,wearing white shirt,black jacket,gray trouser with patches and black shoes dancing.", "3D model of a man in a purple cap, white shirt, black jacket, patched gray trousers, and black shoes dancing.", "blonde man with a purple beanie, sunglasses, a white shirt, a black vests, black baggy pants and black shoes is doing a hip hop dance.", "Man with white hair, purple hat, black clothes, dancing hip hop." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 320, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d1d6a443e30041818a5f32b02b3c171e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a scary character with an onion-shaped head, wearing a dark-colored sweater and pants and a gray bib, holding two bloodstained axes in his hands", "A 3D model of a cartoon, with a big head, wearing a gray sweater with blood stains, holding bloody an axe in both hands and running. ", "3D model of a creepy character with an onion-shaped head, dark sweater and pants, gray bib, holding two bloodstained axes.", "3D model og a cartoon man wearing a black mask, a black sweater, gray pants, black shoes and a gray apron stained with blood is running with 2 bloody cleavers in his hands.", "A character of grey colour, with a bulb and grazes on his head, in a human body, in blood, holding 2 knives, and running" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 321, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d1e54824e8bb425c91b125595d0a2c7b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a man wearing yellow overalls, short red hair and large black glasses, trying to sit on a toilet. ", "A man wearing yellow overall with red stripe on the neck,arms and legs is sitting slowly on the white toilet bowl", "A man in a yellow overall with red stripes sits slowly on a white toilet.", "A stuntman wearing a yellow and red suit with a toilet sink printed on its chest sits on the seat of a toilet sink with plane propellers attached to each site.", "3D model of a man in a yellow suit, opens his arms, sits on a toilet with a closed lid, with engines" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 322, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d4c933eca2ef4a84aacbd609d11c09a1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a green robot made with legos playing jump rope", "A 3D model of a jungle green robot is skipping rope", "3D model of a green LEGO robot jumping rope.", "3D model of a green robot playing with a jumping rope", "A green lego robot leaps over a brown jump rope" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 323, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d6ae0f42026a40518f04d104234b62f5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of an avatar wearing gray and purple armor, and a purple cape stretching.", "A 3D model of a female ghost wearing purple and gray armor is stretching out.", "3D model of a female ghost in purple and gray armor stretching", "3d model of an alien monster wearing maroon armor and a violet cape stretches its shoulders.", "an alien green creature with a cherry-coloured cloak and gear, kneading his arm" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 324, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d6c2c6cc12994ef29431b21a6f7a1860", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a very slim girl with long blue hair, wearing a purple dress, black boots and sunglasses, walking straight ahead.", "A female with long black hair ,purple dress,glasses,black shoes is walking straight in different directions", "A lady with long black hair in a purple dress, sunglasses, and black shoes, walking straight.", "A slender cartoon girl with dark blue hair wearing a purple dress and black shoes is walking.", "A cartoon girl tall with black hair in a dress with big eyes walks with a calm gait" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 325, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d6d457951ec74dc8b4994062ae002ed0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young man walks straight ahead wearing a grey hoodie, black pants and white sneakers.", "A 3D model of a man wearing white hoodie,black trouser and white shoes waking straight in different directions", "3D model of a man in a white hoodie, black trousers, and white shoes walking in various directions.", "A yound man wearing a white hoodie, black pants and white shoes is walking.", "A dark-coloured young man with black hair wearing a white hoodie and dark trousers walks in a relaxed gait" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 326, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d7e1ab755c264455a116085daba3041c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a cartoon character wearing a blue shirt, a vest and black pants as well as a black hat that stands in a fighting position.", "A 3D model of a cartoon soldier wearing military uniform performing karate moves", "3D model of a cartoon figure dressed in a blue shirt, a vest, black trousers, and a black hat, positioned in a combat stance.", "A blue robot wearing cowboy clothes puts his gard up ad steps around", "A robot with a hat, body armour, big arms, spreads them out, steps back and starts boxing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 327, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d8ba803412ce4218ad6b83da6d46d7e4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A tall, fat man wearing a black sweater with stripes in the middle of different color, black pants and black slippers, is dancing while moving his whole body", "A 3D model of a man wearing brown sweater with stripes in front,black trouser and shoes break dancing.", "A large, stocky man in a black sweater with striped patterns, black trousers, and black slippers, is dancing and moving his entire body.", "A chubby man with a buzzcut wearing a black sweater with stripes on its torso, black pats and black shoes is throwing dance moves.", "Man in striped sweatshirt, black trousers, dancing with his arms doing the swing" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 328, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d12b4d7a705b40cd8e96fe8ddcfa5ef9", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a white star wars character sitting on his knees, raises his right hand and then puts it back in its place", "A white and yellow star wars sitting on his knees squatting,stretches hisright arm and back.", "3D model of a white Star Wars figure kneeling, lifting its right hand, and then returning it to its original position.", "A storm trooper with green patterns on their armor is kneeling and gives directions while pointing their finger.", "White robot with green inserts raises its right arm upwards" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 329, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d13eac8f4c4f44dfaa845c1af9f7d5c6", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a grey ladder truck with wheels in motion", "A 3D model of a gray truck carrying ladder on top ", "3D model of a gray ladder fire truck with moving wheels.", "a gray truck carries a ladder while its wheels are moving.", "It's a long grey car with steps on the roof, driving straight ahead" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 330, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d22e3dc957b142479e1723a379494eb3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a grey zombie, wearing brown pants, he has both hands raised in front of him and walks slowly in a scary way", "A 3D model of a zombie , wearing gray t- shirt,brown trouser with his arms above the shoulder walking slowly staggering", "3D model of a gray zombie in brown trousers, with both hands raised in front, walking slowly and menacingly.", "a blue zombie wearing brown pants and one brown shoe is walking mindlessly with his hands held up in front of him.", "A cartoon zombie in grey, with a protruding rib on the side, with a large jaw, walking upright, dragging his feet like a zombie" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 331, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d35d4c4065494d1687d06aa7353f6d79", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Two toasts are placed in a red and black toaster", "A 3D model of a red and gray bread toaster placed two slices of bread", "3D model of a red and gray toaster inserting two slices of bread.", "A pair of bread slices get inserted into a black and red toaster.", "Red toaster, with grey inserts on the sides, with 2 pieces of bread sticking out, press the lever to cook the bread" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 332, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d39c050aaa204333a0dbd8259066376d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a soldier wearing black armor and a black helmet, holding a sword in his right hand and a black shield in his left hand, jumps very high then gets back on his feet", "A 3D model of a soldier wearing gray garment holding sword on his right hand and shield on the left jumps high and stands", "3D model of a soldier in black armor and helmet, with a sword in his right hand and a black shield in his left, leaps high into the air and then lands back on his feet.", "A knight wearing black armor jumps up while holding a sword in the right hand, and a shield in the left one.", "a man in a dark, ancient metal outfit, with a sword and shield, makes a high jump" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 333, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d046ff910658476ab2a0605177976e96", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a long-haired girl, wearing a blue bustier, a blue mini skirt and black shoes, who takes dance steps", "A 3D model of a girl with long gray hair,wearing blue short off shoulder dress and black shoes dancing right and left", "3D model of a girl with long gray hair, dressed in a blue off-the-shoulder mini dress and black shoes, dancing side to side.", "gray haired girl wearing a blue corset, a blue skitr with black frills, and blue shoes is dancing while moving her shoulders side to side.", "A woman with long grey hair, wearing a blue corset, blue skirt, dances to the right and left, moving her arms and legs" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 334, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d52bd1b900054b5e8dc88c30caf0e9f6", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a clown with big blue creepy eyes, dressed in a shirt and a cap of red and blue color, blue pants and big brown shoe, carries a red and yellow balloon in his right hand and a \"Balloons!\" sign in the left one, making weird movements before stopping.", "A 3D model of a cartoon character wearing multi-colored striped cap and shirt,blue pant, brown shoes holding ballon on right hand and a board written ballons on the left hand.", "A cartoon with large, eerie blue eyes, wearing a red and blue shirt and cap, blue trousers, and oversized brown shoes, holding a red and yellow balloon in his right hand and a 'Balloons!' sign in his left.", "3d model of a ginger robot toddler with large blue eyes wearing a red and blue striped propeller cap, a shirt with the same pattern blue jeans and brown shoes hold a balloon in one hand, and a sign that says \"Balloons!\" on the other vibrates as if he's having a seizure then shuts down.", "3d model of a plastic toy in the form of a boy, in a striped camper with a fan, T-shirt, blue trousers, in his hands he has a yellow-red ball and a sign, makes very fast sharp movements, like a system failure, and lowers his body downwards" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 335, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d52be89d681e4d669e3c7f0397de470f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Toilet that opens and closes, on top there is a green toilet paper", "A 3D model of a white toilet opens its bowls and closes placed light green tissue paper.", "3d model of a white toilet opening and closing its lid, with green tissue paper on top.", "A toilet with green toilet paper on top of it opens and closes.", "3D model of a toilet that opens its lids upwards, with green toilet paper on top, and closes the lids" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 336, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d53ce50912db4d078c771da0a78317bb", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Two arms with green sleeves and black gloves, carrying a gun that discharges it then charges it again ", "A jungle green arms and black gloves holding a gun load and unload its bullets", "Two arms in green sleeves and black gloves, handling a gun that fires and then reloads.", "A pair of arms in green sleeves and black gloves reloads a gun.", "3d model of hands with khaki sleeves and black gloves, holding a gun, reloading it, and preparing to shoot" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 337, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d66dda6dffc4440aab2d61c8aef3802d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Four doors of a red car open at the same time", "A 3D model of a red saloon car opens its all doors wide", "A 3D model of a red sedan with all its doors fully open.", "Four doors of a red car open simultaneously.", "3d model of a red coloured car, with blacked out windows, opens all its doors at the door" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 338, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d081cb49d3b74565849f025111d9c003", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young woman with black hair wearing silver jumpsuit is lying on the floor and then gently rises", "A 3D model of a female wearing white jumpsuit ,back shoes lying down,sits and then stands up", "A 3D model of a woman in a white jumpsuit and black shoes, lying down, then sitting up before standing.", "A brown haired woman in a while shiny body suit laying on the ground sits up, then stands up.", "A woman in a white latex costume lies on the floor in the shape of a star and slowly gets to her feet" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 339, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d88c58195ab74842aea110040b319f31", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a man with very wide shoulders who wears a striped shirt, a black jacket, a khaki shorts and brown shoes, walking backwards and immitting that he has a gun in his hands", "A 3D model of a mascular man wearing dark gray ,jungle green short and brown shoes,walks backward pretending to be holding a gun ", "3D model of a broad-shouldered man in a striped shirt, black jacket, khaki shorts, and brown shoes, walking backward and pretending to hold a gun.", "A man with a wide build and disproportionately small arms wearing a black vest, beige shorts and brown shoes, walks to his right while pretending to hold a two handed firearm.", "A man in the form of a zombie with a broad back, wearing a jacket, holding an imaginary weapon and walking" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 340, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d89a1af2f6ad48d8967386ee24eac996", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a metallic robot in red and gray color carrying a sword in his right hand and walking straight", "A red metallic robot holding a sword on his right hand,walking straight in different directions", "A red metal robot with a sword in its right hand, walking straight in various directions.", "An orange robot is walking while holding a katana in its right hand.", "A red robot with thin grey details, holding a sword, walking straight ahead" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 341, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d090db20421048018ee1a977f91923e3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young woman with long hair dressed in a white shirt, black pants and white sandals has her right hand on her hip", "A female with long brown hair,white blouse and blue jeans ,right hand on the waist while closes her legs facing forward posing ", "A woman with long brown hair, wearing a white blouse and blue jeans, poses facing forward with her right hand on her waist and legs together.", "a brown haired woman wearing a white blouse, dark blue jeans and white sandals stands with her hand on her waist and legs crossed, then uncrosses her legs.", "A woman with brown hair, wearing a white T-shirt, blue jeans, standing straight with one arm at her side" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 342, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d96f74f0dc6f49559083c110fef37e2f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a faceless man dressed in a red and white jacket, black pants and black shoes spins on himself", "A dummy man wearing red jacket with white arms, black pants and shoes,rotates", "3D representation of an anonymous figure wearing a crimson and white coat, dark slacks, and black footwear, rotating on the spot", "A faceless man wearing a red and white vest, a white shirt, black pants and black shoes is slowly rotating", "3D model of a man in a red bomber, black trousers, spins on an axis" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 343, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d125f5ca39c04bc4b0f4743d319a9c67", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A long red metal stick with three metal blades on one of its ends, is spinning around while the blades are slowly opening up like an umbrella.", "A 3D model of a red metal stick with three with blade on top spinning", "A lengthy crimson metal rod with three metal blades at one end rotates while the blades gradually spread open like an umbrella.", "a metal staff standing upright spins while three blades with pink, blue and green tipts lift up.", "A metal staff, with coloured elements, revealing blades at the end" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 344, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d278a334bc864830b6228d45c92b85bc", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a blue-eyed crane moving his lips as if he was talking", "A 3D model of a crane with blue eyes rolling its head as it moves it lips", "3D representation of a crane with blue eyes mimicking speech by moving its beak.", "3d moder of a hairless head with blue eyes is talking while moving.", "a baby's head with blue eyes talking and moving its head" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 345, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d0297fa807eb48c0a77e6ab6964da8bf", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A black haired woman dressed in a black sleeveless dress and black shoes breakdancing", "A woman wearing black dress,dancing while doing somersaults.", "A woman with ebony hair, clad in a sleeveless black gown and black footwear, executing breakdance moves.", "a woman in a black dress, black leggings and black shoes is breakdancing.", "A black woman with black hair and a green dress, break-dancing and doing somersaults with her feet" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 346, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d381a17fb98d44208f0657720f00634e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a three-piece object that is being assembled", "3 metal part ,red,black and white assembles all together", "3D representation of a tri-part item undergoing assembly.", "3d model of three objects, black red and white being assembled.", "3 metal parts, one of them is red in colour, Folds up" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 347, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d497dc2a9ea94c0cb8254c03c4bd6da1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A woman with black hair in a bun, wearing a black jumpsuit and heels, with her right eye covered, kneels on the floor in fighting position.", "A woman with black jumpsuit with heels kneels down with the right leg while extending the left one ", "3d model of a woman with her dark hair styled in a bun, dressed in a black jumpsuit and high heels, with her right eye obscured, assumes a combat stance on her knees.", "an eye patched woman wearing a black vest, black shoes and black heeled boots kneels down.", "a woman in a black suit with heels, kneels down and extends her leg" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 348, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d516c512c3a74dee801cb81506f7d83e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a young brown man with brown hair dressed in jeans shorts, a gray cap and a yellow vest where there is a rope tied to the back, showing his fighting skills", "A cartoon with yellow coat,gray cap and blue short with a rope tied on the waist is fighting.", "A young man with brown hair and a tan complexion, clad in denim shorts, a gray cap, and a yellow vest with a rope fastened to the back, demonstrating his combat abilities.", "cartoon boy with tanned skin and blonde hair wearing a gray cap with a blue stripe, a long yellow vest with a bow and rope attached to its back and blue shorts looks arown, throws a right and left hook, then wipes his nose with his thumb.", "3d model of a cartoon character in a grey cap, yellow dress, with an artificially made tail in the form of a ball. Boxing and brings his hands together" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 349, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d570a58b63c6446e83eca133331c4d35", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A table in light brown wood that expands", "A brown wooden table is expanding its inbuilt tables", "3D model of a light brown wooden table that extends.", "light brown wooden table extends to cover more space.", "3D model of a light-coloured wooden table, from which additional shelves can be pulled out" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 350, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d575de00bfcc40728c352c29327d076c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A brown man with a large black mousache dressed in a white shirt, a black jacket, black pants and a gray-brown cap is standing in a fighting position", "A man with gray cap,wearing gray suit ,white shirt and black shoes is attacking while defending himself", "A man with dark skin and a prominent black mustache, wearing a white shirt, black jacket, black trousers, and a gray-brown hat, stands in a combat stance.", "man in a gray and beige cap, a black vest, a white shirt, black pants and black shoes lowers his guard to point to his wrist then puts his hands up again.", "Man in a grey cap, grey suit, standing in a defensive position, protecting his face with his hands, patting his hands on his watch." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 351, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d4060ec3101c47fc861e62e528aff65f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a superhero dressed in a tricolor suit -red, blue, white- touches the ground with his right foot twice", "A man with superhero multi-colored clothes looks down while lifting its right leg and the stomps", "3D depiction of a superhero clad in a red, blue, and white suit, making contact with the ground twice using his right foot.", "Captain america in his amercan flag themed suit looks down at his leg, then hits the round twice with it.", "a man in a superhero costume, with blue, white, red elements, looks down and stomps his foot a few times" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 352, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d5087a60c9a24cf5b7ddb8c33efce264", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a black spider with red dots moving in a scary way", "A 3D model of a black ,white and red spider is moving its parts of the body ", "3D representation of a black spider with red spots, moving in a frightening manner.", "a black spider with green ayes, red and black legs and a black abdomen with an eerie skull pattern moves its front limbs forward.", "a black spider, with glowing green eyes, attacks something with two legs" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 353, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d7194a77e5bc4051ac44ed2a576e67e2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a metallic robot with a hook at the end trying to grab a soda can on a table in front of him", "A metal robot hand with a hook tries to grab a soda from the table.", "3D depiction of a metal robot with a hook at the end of its arm attempting to grasp a soda can placed on a table in front of it.", "a yellow mechanical hook attempts to grab a soda can sitting on a red and black table.", "3D model of a metal robot with a hook that tries to reach an object from different angles" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 354, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d8550afa153b4e04965cd06c5afe154f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a gun shooting a bullet", "A blue gun shoots loaded shoots ,with other bullets placed down", "3D representation of a firearm discharging a projectile.", "3d model of an orange and blue un ejecting a bullet shell while two bullets are sitting next to it.", "A blue-coloured gun with a wooden grip, two bullets side by side, reloads, and a bullet comes out of it" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 355, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d13155dadb914e9cb8c2998f638ddc02", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a soldier dressed in black suit with a gas mask on the head, looking to the right and left then walking straight", "A 3D model of a soldier wearing jungle green uniform looks around lifting his hands and then walks straight", "3D depiction of a soldier in a black uniform with a gas mask, scanning right and left before advancing forward.", "3d model of a man in a SWAT uniform and a gas mask lifts his hands to waist level, looks around, puts his hands down then starts walking.", "A man in a blue military uniform, wearing a bulletproof vest, helmet, gas mask, looks around warily and walks forward" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 356, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d403156b1dde4958b9497e50abaf1062", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A lid of a concerve box spinning", "A metallic can lid flipping at an angle", "A can lid rotating.", "A tin can lid slowly rotating.", "A tin lid from a tin can, turned round and round at an angle." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 357, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d510115b74e84736af994e15781954de", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a cartoon character walking backwards", "A 3D model of a red and maroon cartoon walks in reverse mode", "3D depiction of a cartoon figure moving in reverse", "a distorted 3d model of taz from loney tunes is walking.", "A cartoon character inflatable balloon goes straight on" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 358, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/d983542fbecd4926aa07f1f8ec7e9b05", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Red ceiling fan lamp that turns", "A 3D model of a red wall fan rotates with 3 blades", "Crimson ceiling light fixture with a rotating fan.", "a red 3 bladed ceiling fan is spinning.", "3D Model of the red wall fan is spinning" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 359, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/da9cb8ac53274b9bbd9467b7d83c85fb", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a boy dressed in a red t-shirt and blue overalls, he has a brown hat, black shoes and holds a big wooden bucket in his hands ", "A cartoon farm man with brown hat,blue overall and orange shirt ,holding a gray bucket", "3D depiction of a boy wearing a red tee and blue dungarees, with a brown hat, black footwear, and carrying a large wooden pail in his hands.", "3d model of a farmer wearing a strawhat, an orande tshirt, blue jeans overalls and brown shoes stands on his toes while holding a wooden bucket.", "3D cartoon farm boy with hat, denim overalls, holding a wooden bucket" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 360, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dad6928d3df845b9953534870ab5d8e5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a grey robot arm trying to catch something", "A gray robot arm with 3 blades stretches and goes down picking something", "3D representation of a gray robotic limb attempting to grasp an object.", "A gray mechanical arm opens up, thens tried to rgab something from the ground.", "A metal robot, with a rod at the end, on round balls, trying to pick up and drag something" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 361, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dae074abcac844599909191bbccf45ae", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Plane of the aerial campaign SF opens a door", "A 3D model of a white aircraft opens its door ", "Aircraft from the aerial operation SF opens a door.", "A white, black and red commercial plane opens its door.", "The aircraft is white, with blue red stripes on the tail, flying straight ahead" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 362, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/db9f98c176704fb984dff88df49b3443", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Circles of different colors are untangled in the middle of a large multicolored feather", "A multi-colored feather, with rings in the middle enlarges", "Colored rings are unraveled at the center of a large, vibrant feather.", "circles of many colors making a ball expand at different paces within a multi colored feather.", "A multi-coloured feather in a round shape, with a circle in the middle that shimmers" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 363, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/db269f87897a4d60a2345fb48bd249a8", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a white wooden wardrobe with doors closing", "A 3D model of an empty white wooden wardrobe closes its doors", "3D depiction of a white wooden closet with doors shutting.", "light brown wooden closet closes its doors.", "Light-coloured wooden cupboard, opens all its doors" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 364, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dba0c8ab83174c87938844575fded9e6", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a pink flamingo who has a black hat and a gold chain on the neck, raises his right foot then the left one", "A pink flamingo wearing black cap and gold neck chain raises its right leg and then left leg", "3D depiction of a pink flamingo wearing a black hat and a gold necklace, lifting its right leg followed by its left.", "a low poly pink flaminco with a black cap and a gold chain lowers its head while lifting its right leg, then lifts its head high while lifting its left leg.", "3d model of cartoon flamingo pink colour, with black cap, gold chain, raises one leg and bends over" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 365, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dba4bb132fa54e4e8dc8b4cde210941c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of an angel with a golden halo, a golden color dress and white flapping wings", "An angel with a gold halo,yellow dress, white wings is flipping its wings", "An angel with a golden aureole, dressed in gold, and having white, fluttering wings.", "3d model of an angel with a golden, dangling hands and fluttering wings halo wearing a yellow dress.", "A figure in the form of an angel, in a yellow dress, with a halo, white wings, fluttering in the air" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 366, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dc7395609f9945b0af0300fc6bdf4e58", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a young woman with short hair, wearing a pink bra, blue shorts and black boots , is lying on the floor and moves backwards then falls to her left side", "A female with short blonde hair,pink small top , blue short and black boots lying down moving backward with arms and legs", "3D depiction of a young woman with short hair, clad in a pink bra, blue shorts, and black boots, lying on the ground, shifting backward, and then collapsing onto her left side.", "red haired woman wearing a pink tank crop top, blue shorts, and black heeled boots is lying on the ground and shifts backwards before falling on her head.", "A cartoon woman in a blue top and shorts, lying on the floor, and moving backwards on the floor with her arms and legs" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 367, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dcad605773944292a309a2bf964ba7be", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a sitting ninga turtle moving from right to left", "A 3D model of a turtle shaped-like man ,sitting moving right to left.", "3D representation of a seated ninja turtle shifting from right to left.", "3d model of the japanese kappa monster laughing while slapping his knee.", "An alien creature in the shape of a turtle, shaped like a man, green in colour, with a shell, sitting on a transparent chair and laughing" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 368, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dce5a2f431444496bffe27666606d7f3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a metal mill that turns with the wind", "A metal wind mill rotates its fan and wings", "3D depiction of a steel windmill rotating in the breeze.", "3d model of a plane prototype with retracting wings, a propeller on top, and skis on the bottom.", "A grey metal structure, like a metal tower with a fan on top and wings, standing on skis, moving the fan and wings" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 369, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dd4c573c3dea40c692b866498c83fe2d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A desk with three drawers of different colors, and whose first drawer opens and closes", "A gray desk with multi-colored drawers opens and closes the top drawer", "A table with three multi-colored drawers, where the top one opens and shuts.", "3d model of a desk with multi colored drawers opening, then shuttng down its top one.", "The table is grey, with three coloured shukhlads, the top one slides out" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 370, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dd499b5892fe4ee88862ac64f90892bf", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a young Muslim man praying, he puts on a green sweater, black pants and a black cap, the prayer mat is grey", "A 3D model of a muslim man with black hat,green sweater,black pant and white socks kneels as he prays", "3D depiction of a young Muslim man in prayer, wearing a green sweater, black trousers, and a black hat, on a gray prayer rug.", "3d model of a muslim man wearing a black cao, a green sweater, and green pants praying on a gray mat.", "3d model of a man in a green jumper, black trousers, black hat, kneeling and praying, leaning his head on the floor." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 371, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dd631c02928540f49bab339c6adbd4ff", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a cartoon character resembling an ant dressed in a green suit that moves her arms and head", "A light green Alien creature is standing while moves its head", "3D representation of an ant-like cartoon figure in a green outfit, moving its arms and head.", "3 model of a praying mantis monster with scythes for hands moving as it breathes.", "3D model of an alien creature with a tail, green in colour, arms with claws, tendrils sticking out of its head, standing and moving its claws" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 372, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dd28429d126f411ba6b6750ee01cf75d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a toy in the form of a yellow rabbit wearing a green overalls and playing cymbal", "A plastic toy kneeling ,wearing green cap and overall ,holding brown cymbals clapping", "3D representation of a toy shaped like a yellow bunny, dressed in green dungarees, playing cymbals.", "3d model of a yellow bunny with green overalls and a green party hat playing cymbals", "A plastic toy of yellow colour, with a green cap, in a green overalls, holding cymbals and clapping with them" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 373, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/dd506219f2fa4d7fbcfde25a88340dd1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a red and black dragon moving gently", "A 3D model of a red dragon moving its wings and tail", "3D depiction of a crimson and ebony dragon gliding smoothly.", "3d model of a dark red horned dragon standing and moving as it breathes.", "A burgundy coloured gargoyle with a large tail, wings, standing in a defensive position and waiting to be attacked" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 374, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ddae0aafb7344f75a0be5f88cbe80bdd", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a blue bow that extends then launches a small arrow.", "A 3D model of a blue-orange sports bow pulls a string releases an object", "3D representation of a sapphire bow that stretches before shooting a tiny arrow.", "3D model of an orange and blue bow extanding its string, then launching a small projectile.", "A blue coloured sports bow with orange inserts, pulls a string and fires an object" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 375, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ddd1b1db2e074b7aa5b4cfcba761b340", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a boy wearing glasses dancing dressed in a grey hood, black pants, gray shoes, he puts on a red cap and a blue backpack", "A young boy with red and black cap,gray jacket and pant with blue back bag is breakdancing", "3D depiction of a boy with glasses dancing, dressed in a gray hoodie, black trousers, gray sneakers, wearing a red hat and carrying a blue backpack", "a young boy wearing a orange and black hat, lasses, a gray hoodie, black pants, gray shoes and a blue backpack is dancing", "A boy in a grey sweatshirt, orange cap, glasses, trousers and trainers dances a flexible dance with his hands" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 376, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/c2b4724f532d4949b84a8a442cefc4c7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a cartoon character who has a green face, he walks straight ahead dressed in a brown sweater, gray pants and brown shoes, he has large green gloves on his hands and puts a headset on his head", "A cartoon male with green face,brown coat,yellow gloves ,gray trouser and headphones is walking straight forward", "3D representation of a cartoon figure with a green face, moving forward in a brown sweater, gray trousers, and brown footwear. The character wears oversized green gloves and a headset.", "a man in a yellow ski mask, an orange hoodie, yellow gloves, gray shoes with black knee protectors and brown boots is walking.", "a man in a yellow helmet, brown jacket, grey trousers, yellow gloves, walking straight ahead" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 377, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/c40df34d0aaa44dd8757b1fb4e9d3306", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a man dressed in a brown suit and a blue tie, and has a big mustache, he is singing while moving both his hands", "An old man wearing a brown suit with blue tie and white shirt moving arms potrays singing mood", "3D depiction of a man in a brown suit and blue necktie, sporting a large mustache, singing and gesturing with both hands.", "a bald man in a black and brown suit, a blue sweater and tie and black shoes is moving his hands as he argues.", "An older man with a moustache, classic suit, actively talking and gesticulating with his hands" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 378, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/c45ed76ee0da4e16bb975489c2dfc535", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a black magic box with a design of multiple green and purple diamonds, and something green comes out from the top", "A multi-colored with purple bottom box open its lid while a green object pops out rolling", "3D representation of a dark-colored magic box adorned with numerous green and purple gems, with a green object emerging from the top.", "a jack in the box toy with yellow, blue and purple patterns ejects its contents out of frame.", "3D model of a black casket with a multicoloured mosaic pattern, from which a green accordion pops out from the inside" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 379, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/c563f36b2b5d430c93716c7214144829", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "3D model of a rectangular wooden box with a metal opening that opens on one side", "A rectangular shaped box opens one end wide and closes", "3D depiction of a rectangular wooden container with a metal latch that opens on one side.", "3 model of a black and brown box opening up, then closing.", "A rectangular long dark-coloured box, from which two doors open and close" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 380, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b606f795afb747f7ac1dc5234ff7d6ea", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a man in a dark gray long robe with a hood, in a plague doctor mask jumps up and pulls his legs up", "a man in a black cloak, a mask with a beak and black gauntlets makes a leap.", "A tall and mysterious character in a long-hooded cloak with a long beak, black gloves and shoes, jumping up and down. ", "A man in a long black trench coat wearing a mask with a beak and black boots, trying to jump and move his feet from the upwards.", "A figure wearing a hooded black robe, grey mask with a beak and black boots is jumping up and landing on feet." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 381, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b752e89d058d482480c47bfe3e2f2186", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "White Mercedes Sprinter with black windows, with black wheels and white discs and open trunk, open side door and folding seat comes out", "Minivan, white, with black seats, with red headlights, opens the side doors, rear doors, and extends the black seat", "A long van with black windows, windscreen, and doors on the sides and the back, opening and closing while showing its interior.", "A white mercedes van with black tinted windows with its back trunk open and the side door slides open, with the foldable chair opening up to be straight.", "A white van with its back doors open, large black side windows, a black stripe along the side, black roof-mounted components, and a sliding door open while a grey daybed is folding out." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 382, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b927ce627b6841a688067331853302d6", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Robot transformer with blue helmet, red body and blue legs, with a yellow belt and blue gloves, walks straight and waves his arms", "A tall robot with red-colored arms, a torso with a blue head and blue wide legs walks straight and moves its arms and legs back and forth", "A robotic character with a body of mainly blue and red, and prominent protrusions on the shoulders, walking straight while moving the arms.", "A transformer robot with red and blue body colour with a yellow belt walking straight forward with moving arms.", "A toy robot with dark blue lower legs and a grey can-like part attached to the side of a leg, a red upper body, dark blue hands and a head with a grey face shield, two light blue rectangles on the chest is walking swinging its hands." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 383, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b1394e96fb1b4ebdb96a76524224358f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dark-brown metal canister is slowly shrinking until it's fully flat, as if it has melted down.", "A metal barrel of brown color without a lid on top, squeezed downward, and loses its shape and falls down", "A brown cylindrical drum with round protrusions on its surface is compressed downwards. ", "Brown metallic drum being compressed downwards and ends up flat", "A brown empty metal drum is buckling." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 384, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b42868589adc4958a3d78f332f2dde7c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An adult man with dark gray hair in a dark gray T-shirt, gray pants and black shoes wiggles his hips and sits down, then stands up waving his right hand to the side and his left hand hides behind his head.", "A tall man with a dark skin tone, long dark hair, stubble, black clothes, dancing, swinging his hips in different directions, raising his arms up", "A grown man in a black top and pants with black shoes, dancing by moving downwards and touching the knees with both arms, shakes his body while getting up and moving the arms.", "A fairly aged man in a black tshirt, grey pants and black shoes dancing and swinging his body downwards the stands up then throwing his hands upwards while still moving his body.", "A middle-aged chubby male figure wearing black pants, black shoes and a black T-shirt is dancing starting half standing forward bending with hands on knees moving left and right, then strengthening the body and throwing right hand in the air and then the left hand moving the body at the same time." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 385, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/968931db71db4c518077ff709e265a71", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A figure of a cartoon woman with a short brown haircut and red lips sits on an orange chair in a red blouse, blue jeans and red shoes, leaning her upper body forward and then lifting her upper body up", "A yellow human-shaped doll with short brown hair, red lips, a red sweatshirt, blue jeans, red shoes, raises and lowers its head and sits on a red chair", "A cartoonish character with an oversized head, short brown hair, and facial features who wears a brown top with blue pants and brown shoes and sits on a chair while resting the elbow on the thigh and moving the head up and down.", "A female with a red top,blue pants and red shoes,with short brown hair, sitting on the orange sit leaned forward and then sitting upwards facing forward.", "A female figure with short brown hair, wearing red shoes, blue pants and a red top is sitting on a red chair with her head facing down and then lifting it and moving it around, then nodding." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 386, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2553271e52a447af8c12318ac8205252", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A light grey fighter jet with a yellow cockpit and a dark blue vertical stabilizer flies straight, maneuvering left and right", "A gray airplane with 1 blue wing and a black tail, flies straight while tilting from right to left", "A streamlined aircraft with wings, a tail, and a fuselage showing the cockpit, moving from left to right.", "A grey fighter jet with a blue vertical stabilizer swaying right to left and later into the original position", "A grey fighter aircraft with a dark blue vertical stabilizer is rolling to the left and then to the right, finally getting back in the normal position." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 387, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/4722392f33aa4bfd9bcb41211b76423b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Dark grey bronze antique fountain filled with grey dirty water, with the top part of the structure spinning", "Fountain-shaped device, gray, two-tiered, filled inside, with a round object with 2 rings, the rings rotate chaotically around the axis.", "A multiple-level fountain with a broad round base that tapers toward the top and a round structure spinning at the top connected to the fountain by the thin rod. ", "A grey-colored two-layer fountain with an intricate top part made out of two intertwined circles spinning around.", "A grey fountain composed of two basins, a central column, rough and uneven edges and a pointed spire topped with a circular ring spinning around the axis." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 388, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7521101dbb714dc991acc78ca1d602a0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A thin young man with black hair in a white hoodie and blue jeans reaches forward with his right hand and presses his finger on something", "A tall man with dark hair, wearing a white sweatshirt, blue wide jeans, gray sneakers, standing upright, moving his right hand as if to press something", "A tall young brown-haired guy, wearing an oversized grey hoodie, baggy jeans and a pair of grey shoes, is bringing his right hands forward and moving it in a way as if he presses a button.", "Man wearing a white sweatshirt, blue pants and grey shoes with short black hair, is pointing his finger forward with his right hand.", "A young male figure with a short haircut, wearing blue pants, a grey sweater and grey boots, is pointing down with its right hand and then getting back to standing straight." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 389, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/506668927f434293b1c567a6ac8501fb", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Dark brown wooden coffin with silver metal trim opens the top lid to reveal red cloth inside", "The coffin is black with a pattern on the shape of the top lid with the inscription, standing and at the same time the top lid opens, with red upholstery", "A black geometric coffin with designs on its body opens up to reveal its reddish interior. ", "A black casket with brown interior, decorated outside with some silver drawings has its top lid open before it slightly bounces and closes it.", "A lid of an open black coffin with six sides decorated with web-inspired fragments is closing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 390, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/670791315b634cb0b1f59cbfe7cff3e4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Black little fluffy kitten with a white muzzle, white paws, yellow eyes, looks straight and wags its tail easily", "a black cat with white spots on its face, legs, neck, yellow eyes, standing on four legs, and wagging its tail a little", "A black cat with white accents stands on four legs and wags its tail.", "A black and white cat standing on its four paws is starring as it slowly wiggles its tail.", "A black cat with white paws, face, chest and tail tip is standing on its four paws breathing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 391, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1924052361fb4bd49b714d296a34d2ab", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A green scaly dinosaur with brown spikes and a long tail defends itself by spinning around its axis to the right and left", "A green dinosaur, with large brown wings on its back, with a long tail, turns and waves its tail and neck in different directions, and moves back a little", "A large green dinosaur with a long tail and several spikes at its back, moving backward and throwing its tail to one side.", "Green dinosaur with brown spikes on its back and tail, moving itself round from left to right.", "A green stegosaurus with brown kite-shaped upright plates along the back and two spikes on the tail is crouching and then spinning around and then getting back in the original position." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 392, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/9890304332ca44798a7ada99c2fc30d2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Fantasy brown dragon from video game waving its long tail and flying straight moving its tentacles", "a mystical brown creature with four legs, two long wings, a long tail, flying, and flapping its wings and legs", "A flying creature with multiple appendages and a long tail, moving with its wings and tail.", "A brown creature flying moving forward and moving its wings and tail, right to left.", "A dragon-like brown creature is flying flapping its brown, long wings and tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 393, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a2efd6d4227142c7851f8438d5766b29", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A square figure of a boy with black hair, yellow eyes, in a red T-shirt, blue jeans and white and yellow sneakers, runs straight and swinging with his hands", "pixel model of a man with brown hair, dark skin, red T-shirt, blue jeans, white sneakers, running fast, moving his arms and legs", "A blocky character with a large head wearing a red top and blue pants, white shoes with blue and yellow accents, running while moving the arms.", "A boxed boy with a red shirt, blue pants and yellow sneakers running facing forward with is hands swinging.", "A brown-skinned young male wearing blue pants and a red T-shirt is running swinging his hands." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 394, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a3e1931f151446808f78e1e78d1dbfbd", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Male anime character figure with black haircut in white shirt, purple belt and black pants holds sword in right hand and break dances.", "An anime character with a high dark hairstyle, a white kimono, a blue skirt, a belt, and white socks is dancing, moving her arms and legs in different directions", "An anime character wears a long-sleeved top with blue pants and a sword held by a rope around his waist, holds a stick in one hand, and dances by moving the arms to the sides and the legs, to and fro.", "A male samurai anime, with a sword on his right hand., wearing a white shirt, black pants and purple belt dancing.", "A male anime character with a sword tucked behind the back and holding another sword in the right hand is breakdancing throwing hands to the sides." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 395, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a3ec034e322b40c9a755d164567ab92c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Metallic grey conveyor belt in a factory with green boxes, from which an old metal typewriter emerges, drives along the belt and hides under the machine elements", "A gray metal structure with a movable surface that moves a cash register along the platform, which appears from the bottom of the structure and moves along the surface to the box.", "An industrial structure with interconnected platforms and joints, with a metallic typewriter emerging from one end and moving along the platform to the endpoint, which is sealed.", "A grey screening machine, with a typewriter on the moving belt and goes to the screening box and it closes.", "A metal grey conveyor belt system with a sign that reads \"PEMBAUAN ASSET\" above it, where a typewriter appears out of one of the boxes at one of the ends of the system and is sliding on the platform till the big brown box in the middle of the belt and encloses the typewriter, the sign is spinning." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 396, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a4dcb07cf3e64106a58a128e3a199144", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Green metal machine gun from a video game with two black guns shoots to the left then turns and shoots to the right", "Green weapons are on the installation, firing from the installation and moving in different directions", "A green multi-barreled weapon with legs and a rotative lower body, shooting from one side to the other.", "A green military machine gun toy that is shooting form left then turns to right.", "A metal green-colored turret looks up and starts shooting rapid fire, goes back to its initial position and then turns to a different direction before starting to fire the shots again." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 397, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a5f422cff0f24adc96ccf061f12c6dc8", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Sheriff Wood from the Toy Story cartoon in a brown cowboy hat, yellow shirt, blue jeans and brown cowboy boots dances the Macarena raising his arms up and down and swaying his hips", "A character from a cartoon about toys in a brown hat, brown boots, white vest, yellow shirt, dancing with his hands, moving his hips", "A cartoonish cowboy with a large brown hat, yellow long-sleeved top, blue pants, and brown boots, dancing by raising his arms to his shoulders and back to his waist, while simultaneously moving the legs.", "A cowboy toy story cartoon with a yellow shirt blue jeans and a brown cowboy hat dancing put his arms up and down while moving his body.", "A cowboy doll Woody from Toy Story is dancing macarena extending arms in front, then placing them on his head, and shoulders, and crossing the arms on the chest while rhythmically bending knees." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 398, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a8ef9a806eb14a198d3f542a83f43972", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dark brown metal blacksmith's anvil on which lies a hot red-hot piece of metal and a dark gray hammer hits the metal", "An anvil of gray metal on which a split brown object is lying, on which a gray hammer is hitting several times", "A large hammer hits a hot red metal placed on an anvil.", "Dark brown hammer hitting a red hot metal that is on an incus", "A brown hammer is hitting a light brown wooden plank that is on the brown anvil." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 399, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a8fd7184373c4f3cb85cf5608662fcee", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Bald man in yellow rubber suit with red gloves and red boots with white long cape makes right hook", " A superhero in a yellow suit, with red gloves and boots, with a white cloak, is punching aggressively and standing in a defensive stance", "A cartoonish character in a yellow suit with a large flowing cape and red gloves and boots is in combat mode, throwing a fist.", "A bald man with a yellow jumpsuit red gloves and boots, and long white cape is throwing a right punch ", "A bold white male figure wearing a tight yellow costume, red boots, red gloves, a black belt with a golden buckle, and a white wrinkled cap is standing in a fighting stance throwing a punch with its right hand and then getting into an original position." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 400, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a13b8deea4b443489597b84626e8c90f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Adult male fighter with yellow hair in a green vest, white scarf, brown pants and black boots stands straight with his left foot forward and waits", "The character from the game is tall with yellow hair, wearing brown pants, high black boots, a gray vest, black gloves and a red ribbon on his arm, standing up straight and breathing", "A male soldier with yellowish hair wears a combat vest, pants, long boots, and gloves, and stands upright with a scarf tied around the neck.", "A muscular man with protective clothing, green hair and black boots is standing with his left foot forward, ready for anything. ", "An armoured male figure wearing high brown boots, gloves, a vest and a handkerchief is standing breathing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 401, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a19bd2a0bb0e4e27aa15fbd346be302f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A small metal grey knife with a brown handle and a red drop of blood dripping down from the tip", "Knife with a brown handle, gold insert, gray blade with blood dripping from the end of the blade", "A hand knife made up of a round handle and a short blade with a red drop dripping from the tip. ", "A small knife with a brown handle and a drop of blood falling from the tip of the knife.", "A drop of blood is falling down from a tip of a short knife with a brown handle." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 402, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a47b72a618e846ff81a39bcac9d2dbe5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Gray fabric chair with black cross legs rotates around its axis to the left", "The chair is gray in color, rounded in shape, with black legs, spinning around its axis", "A comfortable chair with multiple legs meeting at the center and having a curved backrest is rotating on its own.", "Grey accent chair with black cross legs turning around from the right going to the left", "A grey-colored lounge chair with a curved back and black metal base is gradually spinning around." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 403, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a96c6ecef70945cb8a361f09b19b3a91", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Genderless gray human figure stands on a gray treadmill with dark gray elements and steps forward swinging with its hands", "A model in the shape of a gray human walks along a gray sports track at a calm pace", "A faceless while-colored human character is walking on a treadmill while moving their arms back and forth.", "A human figure walking and swinging its hands on a grey treadmill", "A grey faceless and genderless human figure is walking on a grey treadmill." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 404, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a734df8176ca47c68b0a90d1a17b5132", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Beige rabbit with soft pink ears sits on its hind legs, lifts its head up and looks around", "a model of a white rabbit sitting on a triangle and moving its body and head in different directions", "A rabbit with long, erect ears, short tails, and four short limbs lifts its head up and looks to the side.", "An off white rabbit with pink ears lifting its head. and looking both sides.", "A grey rabbit is lifting its head, turning to the left while slightly moving its ears to the right, then turning to the right and getting back in its original position." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 405, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/a7005fad11824d4199b952f45384d226", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Brown woolly monkey doing a forward somersault", "A brownish-colored monkey with a long tail does a flip in a jump, and rotates around its axis", "A brown monkey with a long tail and arms, doing a flip.", "Brown monkey doing a front flip with one arm up.", "A light-brown hairy monkey with a white belly and a long tail is doing a front flip." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 406, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/aa837398e8c9460b99c1588b18af0b4d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Plastic scissors with grey blades and red handles, make the movement of turning levers on the fulcrum, allowing the wedges to cut", "metal scissors of gray color with red handles cut something in the air", "A pair of scissors with gray blades and large round red handles, and the blades opening up and closing in the motion of cutting.", "Scissors with red handles and silver blades moving inward in the motion of cutting something.", "The scissors are opening and closing in the air, mimicking the cutting motion." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 407, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/aaa1cf1ee68646fc9db200c0406c73e2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A round metal blue robot with a black belt and two long legs lifts his right leg twice, then his left leg twice", "The circle with turquoise-colored legs runs fast with wide waving legs", "A spherical robotic body with two long thin legs moving very fast.", "A cyan-colored round-bodied robot with two legs and black rims around its body and legs is the motion of running.", "A turquoise spherical body on two long, jointed legs and black accents at the joints is running." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 408, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/acba12e6486847588748248faa905f46", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man with a face from the cartoon The Simpsons with yellow skin and big white eyes dressed in a bee yellow with black stripes suit with green wings stands breathing and looks straight ahead", "Cartoon character in a bee costume, striped sweatshirt, yellow shorts, with wings on his back and a mustache on his head standing upright with his arms spread wide and breathing", "A human character in a bee costume with two long antennae, two large green wings, a black and yellow striped top, and shorts and stands upright on two legs.", "A cartoon on the simpsons with the black striped bee costume and green wings standing breathing heavily looking forward.", "Bumblebee Man from the Simpsons dressed in a bumblebee costume is standing and breathing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 409, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/acf8006c1b794195a4e96e2a50d85be5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Young girl with lush gray hair in a white blouse, black short shorts and white high-heeled sandals dancing salsa, swaying her hips left and right and moving her arms", "a woman with long white hair, wearing black glasses, white shorts, white T-shirt with a neckline, heels, dancing salsa, moving her hips, arms and legs", "A female character with long gray hair, a short short-sleeved top, and blue denim shorts with heels, dancing salsa by moving the hips, arms, and legs.", "A woman with grey long hair, light grey blouse,denim shorts with heels wearing dark sunglasses, is dancing some salsa and then stands.", "A woman with grey voluminous hair wearing a grey V-neck short-sleeved top, light blue denim shorts, sunglasses and heels is performing the salsa side basic step with her arms bent at the elbows, and hands held in front of her chest" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 410, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ad8c845bfd6046659ef93fcae11dcd81", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A cartoon dark brown orangutan with a white muzzle, white palms and feet, wearing a red tie, sits on its hind legs and then stands up straight", "cartoon chimpanzee big brown with a beige spot on the front and a red tie on the neck with the inscription sits and and stands up quietly with the help of hands", "A muscular monkey with brown fur wearing a red tie sits and then stands up with its mouth and eyes wide open.", "Brown chimpanzee with a red tie on its neck, is standing up on its feet with the help of its hands", "A sitting open-mouthed brown chimpanzee with a beige muzzle, feet and palms wearing a red tie that reads \"DK\" in yellow, is standing up leaning on its right arm." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 411, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ad28dc69173140a7b39e6c64cf403f57", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Black and white penguin from the cartoon Madagascar with a yellow beak and yellow feet raises his left wing and flaps", "A cartoon penguin from the cartoon Madagascar black color with a white spot, yellow paws, waving his paw lifting it up", "A black and white penguin with a yellowish beak and feet raises its arm and waves.", "A black and white penguin with raising its left hand and waving twice and takes it back down", "A penguin from the animated film Madagascar is standing, lifting its left wing and waving it and then putting it down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 412, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b0dcaba5df3e4fd2b9d70eecea20897f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dark green locust with long antennae and long spread wings stands on its legs and moves its body", "Green-colored insect with long legs, whiskers, wings stands straight and moves smoothly", "A darkish green insect with long antennae, large wings, and multiple legs, moving its body. ", "A green grasshopper having long and alert antenna moving its abdominal up and down", "A dark insect with six legs, whiskers and its wings slightly lifted is moving its body." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 413, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b2f1233f15e54072bda2dc0f532c767e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Young girl from video game with long lush red hair in red dress with metal knife on hip frozen in flight and slowly moves upper body", "A female character from the game with long thick orange colored hair with a yellow gradient at the end in an orange dress with a sword on her belt levitates in the air and moves her arms", "A female cartoon character with long orange hair who wears an orange dress has a long sword strapped to her waist and poses mid-air.", "A girl with very long orange hair and an orange outfit, floating with left leg being elevated. ", "An anime game young barefoot female character with red, long, voluminous hair, wearing a red dress, having a curved light-green sword attached to her body, is floating in the air up and down moving her hands." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 414, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b3f0e46aec6441168313baf045de4b4d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "White metal fox robot standing on legs tip in orange bow tie with pink muzzle, pink ears, pink belly and pink tail, takes step forward spreading arms to the side", "Model of a white fox in a human body with a fox face with a red bow on the neck, red elements on the body walks making a circular motion and moves his hands ", "A white foxy character with a long snout and tail wears a red bow tie and moves steadily while spreading the arms to the sides.", "White fox robot with patches of pink in the ears, nose, tail and front part of its body, is walking forward with its hands stretched outwards.", "An anthro white fox robot with pink muzzle, chest and a tail tip, a red bow and red stripes placed at key articulation points such as the shoulders, elbows, wrists, hips, knees, and ankles is making three steps forward with its arms stretched outside, then stopping and gesturing with its left arm in the air." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 415, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b3fb06508c5649fd8ecb30d79e2b9050", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A green wet toad with big eyes and faded brown limbs is jumping forward", "A dark green colored toad with large bulging eyes jumps up forward", "A greenish toad with round, protrusive eyes and long limbs, hopping on its legs.", "A green toad with patches of brown on its leg hopping forward", "A dark green toad with big black eyes is jumping forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 416, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b6c593f38b024aefa5459d998f02da84", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Metal robot fighter from video game stands in a stance and throws punches then steps back", "the character in the game is tall in an alien suit with a head-to-toe gray color boxing, practicing moves, dodging", "A robot fighter wears a helmet with a visor and a utility belt around the waist is in combat mode and throws fists while dodging them by bending.", "A muscular metallic robot is throwing several punch the stands back.", "A humanoid video game character figure with a muscular build, wearing a grey futuristic suit and a helmet with a darkened visor is in a fighting stance, with fists clenched and legs slightly bent, is shadow boxing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 417, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b7d56a9f12ca4b41ada6731cf14cd0b5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A metallic yellow robot stands on four yellow-gray legs with a long gray metal neck with yellow tentacles pushes up with its legs and jumps", "White robot with yellow elements, a tail in the form of a dog with a sticking out rod from the head, a glowing screen on the face makes a high slow jump, and lands on the floor", "A robot creature with a yellowish body and four limbs, a long thin tail, and a long metallic hand with a claw at the back is jumping very high by pushing its hind limbs. ", "A four legged metallic robot yellow and grey in colour with a long vertical neck is pushing itself up using its legs.", "A white-and-yellow metallic robot with a thin tail, floppy grey ears, and a grey metalling grabber on the back is standing on its four legs and then jumping in the air landing on its rear legs." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 418, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b37e5d4833044ba597b8a5af41b540ae", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Brown cartoon gorilla in red bandana in white dot, purple glasses, white top and denim shorts raises big hands up and greets crowd", "A large monkey in a white T-shirt, jean shorts, bandana, glasses stands on two legs, in a defensive position and actively waving his hands up and calling to someone", "A stylish muscular gorilla wears a white tank top, red bandana, dark glasses, and blue shorts with a gray belt and raises its large arms. ", "A brown gorilla wearing a white vest, blue denim shorts, red bandana on its head is punching upwards twice then rises its hands.", "A cartoon chimpanzee with a beige muzzle, feet and palms, and black lips, dressed in light blue denim shorts with a grey belt and a buckle, a white tank top, black sunglasses and a red polka-dot bandana is throwing a fist in the air twice and then lifting its arms up and down encouraging cheering." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 419, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/b074cb583c5e4011bbe1d6c4019d0efe", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dark-grey stormtrooper from Star Wars in metal gear with black patches is swaying a spear in his right hand before he slightly bends backwards and raises his hands up in a victorious gesture.", "The character from the movie Star Wars gray with black elements with tubes, and glowing elements, holding a large long spear, turning to the left and right, moving his hand with the spear", "A character from the Star Wars franchise wears a large body armor with a helmet and holds a long spear in one hand while raising both arms.", "A robot from Star Wars with grey gear, holding a spear in his right hand, lets loose, swinging his spear, then looks upwards and throws his hands up as a sign of victory.", "A grey storm trooper from Star Wars wearing grey and black gear is holding a long spear, swaying it and then lifting it in the air outstretching arms upwards, bending backwards." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 420, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/3b452894962a4d92ab171ab0fe1dccf5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An adult man with pale-white skin, gray hair, wearing a gray shirt and dark green pants, bends down, spreads his arms and shake his head and body as if teasing someone.", "A man of gray color, with gray hair, wearing gray pants, shirt, shoes, standing bent, holding his hands near his ribs, bending over and shouting something", "An adult male human with a long-sleeved shirt, gray pants, and shoes, bends down around the waist area and has his arms slightly outstretched.", "A fairly masculine man wearing a long-sleeved white shirt, grey pants and shoes. with short grey hair, is bending with his hands spread outwards then stands up.", "An adult man wearing a long-sleeved grey shirt and grey pants is standing bent forward with his arms bent at the elbows, then inclining more forward, shaking his hand while stretching his arms slightly backwards." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 421, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/3bc6b5dd5c874003a2005f366cb9973d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A cartoon characters Mickey Mouse with black and white body color, wearing red shorts, white gloves and orange shoes, sways his hips and dances a salsa.", "Mickey Mouse cartoon character with big ears, red shorts, yellow shoes, with a tail, dancing salsa, moving his arms and legs to the beat.", "This is Mickey Mouse from Disney studios with large, round ears, wears red shorts, white gloves, and yellow shoes, and he is dancing salsa.", "A Mickey mouse cartoon character, famously known from disney studios, with a black top, red shorts and white gloves is dancing salsa moves ", "A Mickey Mouse figure wearing yellow big boots and red shorts with two white buttons in front is performing the salsa side basic step with his arms bent at the elbows, and hands held in front of the chest." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 422, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/3bc90f60c71249799653347bbf9742ab", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A young thin white man with blond hair in black polo and gray pants shows his right bicep which is not pumped up", "Tall thin man with white hair, wearing a black T-shirt, gray pants, red sneakers, stands calmly and raises one hand up, making a gesture of “success” and lowers his hand", "A human character wears a black top, gray pants, and reddish shoes with white accents, and he stands upright while raising one arm at the elbow level.", "A slim man with blonde hair, black t-shirt, grey trousers and red shoes is standing upright and rising his right hand mid-level as a sign of victory.", "A young blonde man wearing a black polo shirt, grey pants and red shoes is bending the arm at the elbow, clenching the fist, and pulling it downward in a quick, triumphant motion, then turning back to standing position." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 423, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/3cc219ecd5654c23a84b31b566fdff76", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "metal robot doctor octopus blue color body with gray inserts, stands still and there are four long metal black tentacles from his back come out", "Model of a turquoise-colored man with large black cords that imitate weapons, stands straight and does not move", "A stylized robot character with a segmented body and several appendages at the back.", "A robot human structure with turquoise colour is standing upright and with long black attachments on the back of its body.", "A blue faceless human robot with four long black segmented metallic tentacles attached to its back is standing and breathing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 424, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/3cd8902168cf45938b221870c74db743", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "The picture shows two toy plastic trucks of raspberry color, the truck with the orange cabin is a tow truck and its structure is lowered down so that the blue car can move", "Two toys cars, yellow color, with a pink trailer, and orange color with a pink lift for cars, loaded with two cars, stands and lowers the top car lift down", "A collection of vehicles with different sizes and shapes where the smaller cars are placed on a larger truck and one of the smaller cars is being placed on the ground.", "Two long toy trucks with one carrying two cars on the truck and is lowering one of the small to cars to ground level.", "Two truck toys, one on the left is a cargo truck, with a large, solid rectangular body in magenta, and a yellow and gray cab; on the right is a car transporter with two small cars, one blue and one pink, loaded on its angled trailer, its cab is orange and gray, and is lowering a blue car by tilting the ramps on its trailer." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 425, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/004d02243a5b4117afc4baa45eb1eba0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A black African elephant with large ears and long white tusks walks forward, slowly moving its head.", "A large black elephant with 2 tusks walks straight with a calm gait", "A dark elephant with very large ears, a long trunk, tusks, and a tail, and it walks steadily on four legs.", "A dark grey elephant with large white tusks is walking forwards while flapping its ears", "A black elephant with white tusks and angular texture with a blocky appearance is walking forward slightly moving its head up and down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 426, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/04fc4b96b1f242bba7e1baa067e0df10", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "an elderly thin man with short gray hair in glasses, a white shirt, a red tie, a black pullover and black trousers, stands with his left foot forward and waves his right hand", "A tall elderly man in a black jacket, black pants and boots, with gray hair, raised his hand and waved it vigorously", "An old human male wears a suit with a white shirt, red tie, and black shoes, and he throws his arm.", "An old man with white hair, glasses, white shirt and long-sleeve sweater with a red tie and black pants, is standing and rises his right hand and throwing a slap forward.", "An old thin man with white hair wearing glasses, a black suit jacket, a white shirt, black shoes, and a red tie, along with dark-colored pants is throwing a high-five in the air." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 427, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/4a5b805b7cab409b9a0534faa9f4b07b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a pair of black fabric sneakers with a white rubber sole and white laces spin around their axis", "Black sneakers,with white insert on the toe,white sole,laces,round side logo on the side,twist around the axis", "A stylized ankle-high sneaker with white laces rotating. ", "Black and white high cut converse with white laces is rotating ", "Two black and white high ankle sneakers with a white toe cap and a sole, a thin black line running along the edge of the white midsole, white laces, a logo on the side that reads \"CHARLY ROCKS\" are being rotated." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 428, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/4a6ced2eec1b414fb549d6d23ee7bed4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "character from the game a man with a square head, black hair, glasses, a gray jacket and blue jeans reloads his weapon", " A man in lego style with a square head, dark hair, gray sweater, black pants, boots holding a gun, aiming, standing in a wide stance, reloading the gun", "A cartoonish male character wears a long-sleeved shirt with dark pants and glasses, and he holds a rifle with one arm and loads it with the other arm.", "A boxed like cartoon man in grey top and navy blue trousers is holding a gun and loading it using the left arm standing ready to shoot", "A grey-skinned male figure with short black hair, glasses, wearing a grey long-sleeved shirt and black pants, boots, is reloading a rifle." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 429, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/4a4262ce778f4cb8b56d86b52fdff7fd", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "tall strong man with dark skin, dressed in blue working uniform with blue helmet and dark gray overalls and rubber black boots with blue glove on his right hand steps forward", "A tall man wearing a blue helmet, blue shirt, gray overalls, boots, blue mittens, a cable attached to his belt, he walks straight with a calm step", "A human character in a working overall with a blue helmet, dark glasses, a large glove on one hand, and a rope strapped around his waist, and he walks in a steady manner.", "A brown man with a blue helmet,shirt, gloves and a black shorts with protective gear. Walking forward majestically.", "A big muscular man wearing a purple hardhat, sunglasses, a purple shirt, dark grey bib overalls with a circular green logo on shoulders with a spanner inside and brown knee pads, black boots, a purple glove on the right hand, is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 430, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/4b978461d8014a50899505f5d0ca1b24", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a dark grey rhinoceros with a long white horn strides forward and then disintegrates into pixels", "rhinoceros of gray color, with a large horn walks calmly straight, at the moment breaks up into many small squares", "A gray rhino with a large, pointed horn on its snout having four thick legs, moves in one direction and disintegrates into smaller tiny cubes.", "A large grey rhino walking forward the blocks forming the rhino falls apart", "A big dark grey rhino with thick, rough, and heavily wrinkled, armor-like skin with deep folds around the neck, legs, and joints, is walking forwards, suddenly disintegrating into ligh-coloured small cubes that are falling down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 431, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/4bf0d791312047be99d986bd35065a58", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "metal gray gate made of thin rods with a powerful pipe edging moves in the wind", "The gray metal door with mesh inside is swinging", "A metallic door with a mesh surface opens and closes.", "A metallic single mesh door, swinging open in a slow motion. ", "A grey metallic mesh door is slightly swaying." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 432, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/15b3dbad1e7f4712baa2d28df6abc094", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "thin young school boy with black hair in green t-shirt and grey tight jeans and black sneakers sits on brown pillow his left elbow resting on left knee", "A guy with dark hair, wearing a green T-shirt, gray pants, and sneakers is sitting on a pillow, looking left and right, turning his head", "A cartoonish boy in a green top, gray pants, and black shoes sits on a brown cushion with one arm resting on a slightly raised leg and the head being moved from side to side.", "A boy with short black hair, wearing a green t-shirt, light blue pants and black shoes, is sitting down on a brown pillow, with the left arm on the knee,knoding side to side.", "A young brunette boy wearing a green T-shit, grey pants and black sneakers with white stripes is sitting on a brown pillow, with a hand resting on a bent leg in a knee, while the other bent leg is stretched to the side, and is moving his head to the right." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 433, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/18c8d71fcac54c5d9dc0dbb5e05db5c8", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "hairy black-red-white puppy with big black eyes, blue collar with gold medal, sits on hind legs, wags tail", "A brown cartoon dog with white spots on its nose and paws, a brown spot on its chest, wearing a blue collar, sitting upright, sticking out its tongue, wagging its tail and turning its head in different directions", "A playful brown dog with a blue collar , sits on its hind limbs and has its mouth slightly open while shaking its head and wagging its tail.", "A brown and black puppy with a blue collar is sitting wagging its tail facing upwards ", "A small calico dog with a turquoise collar and a yellow medallion attached to it, is lifting its head in surprise, moving its head to the left, returning to the original position while wiggling its tail at the same time." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 434, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/18e68c55a09847178795682142af3d06", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A bald, tall man with pale skin in a light brown tight-fitting sweater and tight black-brown pants stands straight and holds his left hand out to the side", "Model of a white man in a brown sweater, dark pants, boots, takes his right hand aside, looks at it, and lowers his hand", "A human character wears a light brown long-sleeved top and dark pants with black shoes raises his arm and drops it back.", "A man structure wearing a light brown bodysuit, and brown pants stretching his left arm outwards.", "A male faceless figure dressed in a blown long-sleeved shirt and black pants is standing facing forward, then lifting its left arm while looking at it and turning back to the original position." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 435, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/20e59f9379d846e6bcb2fbba8d609aea", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "The upper part of the man's skeleton white color turns its left hand palm up", "One part of the upper part of the skeleton, white in color, turns the arm up 180 degrees", "A view of the human skeleton which shows half of its body and has its arm slightly raised and the arm moving.", "Half left upper body skeleton with the arm stretched forward and rotating.", "A half of the upper part of the skeleton is twisting its arm." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 436, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/22d13b14e0a4455db9e4fb37cda960cc", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A red plastic toy Ferris wheel with multi-colored cabins stands on four red supports and a gray base, while slowly spinning.", "Round Ferris wheel in red with multicolored pins in yellow and green, with seats in a circle, spins in one direction", "A large Ferris wheel with seats on its circumference rotating.", "A multi colour ferris wheel rotating", "A red and yellow Ferris wheel with colorful cabins is rotating." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 437, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/25c40880744b43c6be5dc5905c7de046", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a metallic dark blue solar panel with six panels stands on a piece of green grass and moves upwards", "A black solar panel stands on a round piece of grass, rotates the base in different directions, tilts", "A rectangular solar panel mounted on a pedestal and located on a green base moves from side to side. ", "A black solar panel elevated from the green grass with a long metallic silver rod, moving from one side to the other.", "A rectangle black sollar panel divided into six squares on a grey pole and green lawn is moving from left to the right." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 438, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/26a155c39ea74f9b9f74c641a7a431d9", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A man with white skin and black hair dressed in an orange and beige robot suit stands in a defensive pose with his fists then his body goes into a convulsion and he falls to his knees", "A man in a red suit with beige stakes stands in a protective pose and begins to shake and fall unconscious to the floor", "A stylized character wears armor that covers the torso and the groin, long gloves, and boots, stands upright with both arms lifted, trembles, and finally falls to the ground.", "A short haired man in robot like outfit standing trying to throw a punch and shakes rapidly the falls down on his knees.", "A young man in a red and beige superhero armour costume in a boxing stance is convulsing and falling forward in his knees." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 439, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/26d58a4848fb44f598950c6b637bb26f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "The Eopies animal from the movie Star Wars is pale beige color, with brown bags on its back walks straight", "Camel of gray color with brown bags attached to its body walking at a fast pace with a slight bend", "A creature with a long neck and four legs walks hurriedly with bags and luggage placed on its back.", "A camel structure walking forward with brown bags and loads on its back.", "A grey camel with brown bags on its back is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 440, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/26f7364a82f94073b6d13596e93beae7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a weeping willow tree with long green leaves stands on a brown pillar and moves around its axis", "A green tree with long branches with many leaves wraps 360 degrees around the axis", "A large green tree with a long truck and several leaves rotates on its own.", "An artificial tree with branches hanging facing downwards is rotating", "A green weeping willow tree is rotating around its axis." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 441, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/28ebec88a44443e29fcb8ac1bbe6a4dc", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a man with pale skin and thick black hair in a blue Hawaiian shirt and black tight pants dancing, moving his hips and body and arms then jumping and repeating the same with his back", "A man with dark hair, wearing a blue print shirt, dark pants, spread his legs, sat down and began to actively dance and jump, changing positions.", "A human character wears a blue shirt with black pants and shoes, slightly bends, and moves his arms to the sides.", "A man with a sky blue shirt and black pair of trousers is dancing jumping while swinging his hands right to left then jumps to face the other direction and throwing his right hand in the air.", "A brunette man wearing a light blue short-sleeved shirt, black pants and black boots is dancing with the knees slightly bent and swinging his arms bent in elbows three times in front of the chest, thrusting pelvis forward, then turning around doing the same, and coming back to the original position raising his left arm in the air." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 442, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/35b69cafad8a4dae8e5afc0a80925795", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "an elderly man with a bald head and a thick brown moustache in a brown suit, a white shirt and a blue tie, throws up his hands in disappointment", "A man with a bald spot, with short brown hair, with a big brown mustache, wearing a brown jacket, a blue tie, a vest, black pants, boots stands a little bent and actively spreads his hands something says indignant", "An old man with a bald head wears a suit with a blue tie and black shoes stands upright and raises both arms slightly.", "A fairly aged man with brown hair with a bald spot and moustache wearing a brown coat is standing upright raising his hands with an impression of explaining something", "An elderly bald man with hair around the sides and back and a moustache is dressed in a brown jacket, black pants and boots, a blue tie, a dark blue waistcoat, a white shirt underneath, is standing withhis knees slightly bent and throwing up his arms." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 443, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/35bd9d73e9384980ae7c748cb3909aff", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "The dingo dog with red fur and a white muzzle stands on all fours, wags its tail and turns its head to the right", "The dog is light russet color with white spots on the legs, belly stands upright and turns his head to the left and right", "A brown-furred dog with white accents wags its tail and turns its head to one side.", "Light brown dog with patches of white on its mouth area, and its chest, is standing wagging is tail and turns to face one side.", "A dingo dos is standing on his four breathing, wiggling its tail and then turning its head to the right." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 444, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/35bf9cca0d664a4193f26a13bb7ba0b5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a steel tank in camouflage paint twists its barrel to the right", "Green colored military tank with a large long cannon, turns the base of the cannon", "A large tank with a smaller turret at the top and a long barrel that spins.", "A green armoured millitary tank rotating its barrel to the side.", "A turret of a tank covered in military pattern with a caterpillar tread is turning from left to the right." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 445, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/35ff454c7a224e43be2f80510a520b25", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a young skinny guy with a black short haircut in a gray sweater, dark gray tight jeans and gray boots does a backflip and fights with his hands", "A young guy with a short haircut and dark hair, wearing camouflage sweatshirt, dark pants and boots doing somersaults in a jump, and practicing techniques", "A stylized character wears a long-sleeved gray top with blue pants and long boots, has straps on his thighs does a backflip, and throws both fists.", "A young boy in a grey sweat shirt and blue pants , doing a back flip and then throws punches and stands still ready to throw more.", "A brunette young boy in a grey wrinkled long-sleeved shirt, dark grey pants, grey high combat boots, with black kneepads and a black handgun holster on his right thigh is standing with his left foot at the front, and right foot and arm at the back, then doing a backflip, landing and throwing three punches in the air." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 446, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/36e6bb5292dc4f4aa95bab2e2e1a707b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a boy with yellow hair in a tight-fitting sea-green superhero costume, with a black mask and a gray shield on his right shoulder walks straight, stops and waves his right hand", " Tall guy with white hair in a blue suit with a metal structure on his shoulder, walks straight ahead at a relaxed pace and stops.", "A cartoonish character wears a green armored suit with a metallic shoulder pad, long boots, and a mask that covers the upper part of the face, walks straight, stops, and waves one hand.", "Boy with blode hair in a green armoured heroic costume and black boots, walking majestically and stands and waves using the left hand.", "A young boy spiky blond hair in a teal-colored jumpsuit with metallic elements, notably around the shoulders and wrists, black, thick-rimmed sunglasses or a visor is walking relaxed, stoping and looking from left to the right and then waiving with his left hand." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 447, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/44d248dfa0644b4fb861f2b536ce2f9b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Frozen image of brown fur bear multiplying In projection", "The big fluffy bear doesn't move and is duplicated multiple times", "A brown-furred bear bends down while facing the ground and multiplies.", "A brown fur bear frozen and multiplies while still frozen", "A brown bear on its four is still while being duplicated over and over again." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 448, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/44f4d21cedb74ab0b74eb94c174e524d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a beige-brown snail with a large brown shell turns its head to the left and returns it back", "A black colored snail with a large round shell turns its head left and right", "A snail with two antennas and a large spiral-shaped shell turns its head to the side and back to the original position.", "A beige snail with a spiral shell on its back, turns its head to the side then returns it back.", "A snail with a brown shell, two antennae, half beige and black body is turning its head left and right." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 449, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/45db60a0a3af4f24b141d503512356a0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "red-haired girl in a green hoodie, green tight blouse and brown pants looks at her right palm", "A girl in a green head cape, green sweater, dark pants and shoes looks at her hand, raising it", "A green hooded female character in a green top and black pants and shoes has one hand resting on her waist while she raises the other arm to look into her palm.", "A woman wearing a green scarf on the head is turning gently downwards looking at the rigt and the looks straight forward.", "A red-haired woman dressed in black pants and boots, a short-sleeved dark blue shirt with a V cut, with a dark green cape coving her head and shoulders is standing with her left arm placed on her left hip while lifting her right hand and looking at it, then getting back in the original position." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 450, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/45e57349f062416aaf11f2c31587da16", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "black young man - video game character in military gear and green body armor holding gray metal machine gun", "A large black male character in a military uniform holding a large weapon with a pipe, a large container on his back, he is breathing heavily and staggering", "A black soldier wears camouflage pants and wears a large black backpack while holding a long-barreled gun and smoking a cigarette.", "A dark skinned masculine man wearing a bullet proof jacket, camouflage pants and holding a minigun ready for any attack.", "A black muscular man wearing black tactical gear, chest armor, black and white camouflage pants, sturdy black boots, and a large backpack holding a heavy futuristic minigun-style weapon, with cables connected to his gear is standing in a combat-ready pose. " ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 451, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/47ebe15d4c0545b88d2846bff9918208", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a middle-aged man in a cap, a checked green and white shirt, gray trousers and brown shoes walks straight", "A man in a gray visor, plaid shirt, gray pants walks straight ahead with a calm stride", "A human male character wears a folded checkered long-sleeve shirt, gray pants, and a hat, and he walks in one direction.", "A man with a long sleeved white and grey checked shirt and gray pants with a grey hat is walking straight forward.", "A middle-aged man with a beer belly and stubble wearing a grey flat cap, grey pants, brown shoes and a white and grey checkered shirt with sleeves rolled up is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 452, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/49c6c4c529cf4de2a8d86406eb424097", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "cartoon character Aladdin with brown hair in a purple vest and white pants holding a metal sword in his right hand, his left leg is extended forward", "Aladdin cartoon character with brown hair, wearing a purple vest, white pants with an orange belt, holding a sword and staggers", "This is Aladdin from Disney Studios with a small round hat, purple vest, and gray pants, and he holds a large sword with both arms slightly apart from the body and one leg in front of the other.", "A young brown-haired main character from Disney's Aladdin, wearing white pants, pink sleeveless vest and a pink hat, is holding a sword that has a brown handle on his right hand with his left leg slightly forward, both legs slightly bent.", "A young boy Aladdin in a crouched, ready-to-fight position, holding a large curved sword in both hands. He is dressed in traditional Arabian-style clothing, featuring a purple vest, white baggy pants, and a red fez hat. His stance and expression suggest he is focused and prepared for action. The sword has a golden hilt, and the character is barefoot." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 453, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/50e17ba5c2dc4e3d9ed0effc7d2f9fd5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dark grey Indian elephant walks upright and waves its tail and head", "A large black elephant with big ears, tusks, tail, walking slowly", "A large dark elephant with prominent tusks and a long trunk walks slowly on four thick legs.", "An dark elephant walking forward, wagging its tail very gently", "A dark grey elephant with small ears is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 454, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/50f988fb766b400c85ddfb48cfbf0caf", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "puppet with white face and red cheeks, with black body and striped arms and legs, smiling and dancing Opa Gangam Style", "A tall black creature that looks like a man with a clown mask on his face is doing the gangnam style dance", "A cartoonish character with a big, round, black body with stripes on the arms and legs, and he dances by crossing his hands while moving the legs and raising the arms above the shoulders.", "A puppet with a round head and a black suit with white stripes on the arm and legs, dancing while crossing his hands and moving its legs ", "A puppet-like character with a thin and elongated body with black and white striped limbs, a white, mask-like face with red circular cheeks, a wide black smile, and black eyes with tear-like streaks is performing a gangnam style dance, mimicking a horse-riding motion, with bent legs, bouncing up and down, and crossing its arms in front while moving rhythmically." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 455, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/52d8cdb4279744ce8be4ef34ff79524b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a ping pong paddle with a wooden yellow handle hits a white ball first with the red side and then with the green side", "Ping Pong game one handle round bounces a white ball, flips over and changes color from red to turquoise", "A table tennis racket with one face being green and the other pink, and a small white ball bouncing on both sides.", "Table tennis paddle pink and green in colour hitting the ball on one side and switches to hit it on the other side.", "A red ping pong paddle with a yellow handle is turning around revealing that the other side is green while a white ping pong ball is bouncing on each side." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 456, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/52f142c052564d85b6ef21413af34e25", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a black plastic TV remote with one red button and other light gray buttons lies motionless and presses the light gray buttons", "rectangular black TV remote control with blue buttons and one red button, with a glued logo, lies flat on the surface and the buttons are pressed", "A rectangular black television remote with several buttons and varied colors aligned on its surface and the buttons are being pressed.", "A long black remote control with gray buttons is still and the gray buttons are being pressed.", "A black remote control with grey buttons and a red button at the left top corner is in a horizontal position while buttons 1, 2, 3, 4, 5, 6, 7 are being pressed." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 457, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/60e0e94ca09240b0bfc5736fc9fc56dc", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A zombie girl with red hair and a bloody face in a black sweater, blue pants and red ballet shoes stretches her arms forward and slowly moves her legs", "Tall zombie woman with brown hair, black jacket, gray jeans, red shoes, walking straight and dragging her feet, sticking her arms out in front, walking like a zombie", "A zombie character with long hair, a black long-sleeved top, and gray pants with red shoes moves by outstretching its arms to the front and dragging both legs while walking.", "A woman zombie with red laid back hair, black sweater, blue pants, and red dolly shoes, walking unsteadily with the hands streched forward.", "A red-haired female zombie in a black leather jacket and a brown belt, grey pants and red shoes is walking foward with her arms stretched forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 458, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/62f95f8c60f84e6babf9aabe22808307", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "flip flops with brown soles and white leather uppers decorated with silver stones are laid out upwards into different elements", "Sabo slipper with wood sole, white leather, stones, split into four parts", "Stylish slippers with brown soles and white straps having decorative elements on the surface, move up, exposing its various layers and elements.", "A fancy white slipper with brown soles, and decorative silver elements moving upwards to separated into different parts of the slipper.", "A sandal with a beige sole and white upper straps, forming a thong-style fit between the toes, decorated by an anklet with a round pendant and chain details is being desintegrated in an anklet, a strap and two soles." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 459, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/66d536cb4799441bab6270d569d32c42", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "toy metal retro car blue with brown side doors opens hood and side doors", "Old-school turquoise-colored car with beige inserts on the sides, white headlights, white wheels, raises its hood and lowers it, then opens the doors on both sides", "An old vintage car with round wheels and doors on the sides, a curved booth, and a bonnet that opens and closes.", "A vintage Turquoise blue car with beige sides , its bonnet open and closes back. and the side doors opens up.", "A front trunk of dark teal blue Volkswagen Beetle with beige sides, chrome bumpers, is closing and then both doors are opening." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 460, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/66db7b22d5084475ab9a969de1dd3580", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a bald young man with brown skin, pumped up arms, a light gray T-shirt and dark gray pants, stands straight and sighs with his shoulders", "Tall black man in a gray T-shirt, black pants, boots, with a beard sighs heavily while lifting his shoulders.", "A black muscular man wears a gray tank top with black pants and shoes, and he stands upright while sighing.", "A black man who is bald wearinf a gray vest and blue pants is standing upright and and breathing heavily with a sigh of relief.", "A black bold muscular man wearing a grey tank top, black pants and boots is standing with his left positioned a bit forward and sighing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 461, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/68d62957664d4c518b11e40b2845e91b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a vertical metal gray power rack onto which a black weight is slowly lowered and pressed down with force", "Gray-colored sports equipment with a blue cushion and a dark-colored kettlebell that rises and falls in the air", "A large machinery with several components and levels and a small black object dropping from the top.", "A gray mettalic gym machine for back exercises, a weight is lowered and the weights on the machine are vertically elevated.", "A black kettlebell is falling on a blue knee pad of a grey pull up and dip assist machine, loweing the paddle and lifting a couple of weight plates." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 462, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/69bc5825cda748cf8d19f20c3f48b0a9", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a man in a camouflage closed suit, a camouflage helmet, green glasses and a balaclava raises his left hand up first, then his right hand, then lowers his hands down", "A man in a military suit with a balaclava, helmet, boots, raises his hands up in turn left and right in different directions, and puts them down", "A military soldier in camouflage attire which consists of a helmet, goggles, uniform, gloves, and boots, raises one arm after the other and finally lowers them.", "Millitary soldier wearing a camouflage gear and helmet, with black boots is dancing gently rising his hands one after the other and afterwards lowers them.", "A soldier in a green coloured camouflage uniform, balaklava, black boots and gloves, yellow glasses and a camouflage hat is stretching his left arm in the air and then stretching up his right arm, then lowering them down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 463, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/69e33e6cebaf4149a27be5419f908558", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a metal gray oven with a red button panel opens the oven and a tray with six brown cookies comes out", "Square gray metal oven with a red control panel stands with an open door, from which a plate with cookies sticking out and shoved inside, and the doors are closed.", "A rectangular gray oven with a control panel at the top and four short, round legs at the base, opens up to reveal a tray with neatly arranged cookies and then closes in the end.", "A metal gray oven with a red control panel opened with some brown choco chip cookies on the tray slide in and closes the oven door.", "A tray covered with beige parchment paper sheet and six brown cookies symmetrically placed on a tray is sliding inside a grey micro oven with a red button panel while the door of a micro oven is closing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 464, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/69e85f0a164f47e0bfe9f8c1c6229fd3", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "old grey african elephant with long tusks stands and flaps his big ears", "Large gray puff with large tusks, long ears, tail, standing straight, breathing and moving ears", "An elephant with very large ears, a long trunk, long curved tusks, and a tail, slightly raises its head and flaps its large ears. ", "An light gray elephant with long curved trunks, standing and wagging the tail rising its head gently then flaps its ears. ", "A grey elephant is standing and flapping its ears." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 465, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/74e7cee7ed044439aca9241b0739156e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "brown woolly chimpanzee stands on hind legs and moves hips and arms left and right then turns", "The brown colored monkey stands on two legs and starts actively dancing, jumping, turning around waving his hands", "A brown monkey with two arms and legs moves its arms to the sides and turns around while still moving the arms to the sides.", "a monkey brown in colour dancing moving its legs and arms side to side, jumps facing the other side and continues to dance jumping on the other side raising the right hand.", "A brown monkey is dancing with the knees slightly bent and swinging its arms bent in elbows three times in front of the chest, thrusting pelvis forward, then turning around doing the same, and coming back to the original position raising its right arm in the air." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 466, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/075b922d9aae47d6b79d184a9e246946", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "character from the game Sonic purple color with white gloves and red boots runs Fast Straight", "Old retro blue car with black wheels, yellow license plate, front protection, white headlights, opens one door", "This is Sonic the Hedgehog with its prominent blue fur, and spiky hair, wears white gloves and red shoes, and can be seen running.", "a cartoon from the sonic game , with long purple spiky hair with white gloves and red boots is running straight ahead in a fast motion. ", "A purple Sonic the Hedgehog with red boots is running fast." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 467, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/75a4bb26da7f48819d60c3e79d5c6365", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "toy retro car blue lilac color with yellow numbers yellow headlights and black wheels opens the left front door", "Old retro car, dark blue with black wheels, opens one door", "An old blue vintage car with round wheels has doors on the sides with one door opening and a curved booth.", "An old classic vintage royal blue car,with a yellow lisence plate, on black round wheels is opening the left side door gently. ", "The left door of a dark purple classic Volkswagen Beetle is opening." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 468, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/76bf77600330413dbaf65b99a2564979", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "wooden toy dummy Sprinter mounted on stand runs straight", "a wooden mannequin of a man attached to a round stand shows the motion of running ", "A human-like mannequin is mounted on a round panel by hanging on a thin pole, and runs by moving the arms and legs.", "A human-like dummy is attached to a round panel via a thin pole and moves its arms and legs as if running.", "A wooden human mannequin on a pole and a round stand is running fast moving its arms and legs." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 469, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/76e655440c5345bd813ff38681576e5f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a section of a metallic grey hatch opens its dark green lid to allow entry", "A square gray hatch with a dark gray door in the middle that opens and closes", "A rusty metallic lid with a locket that opens by lifting up and closes by falling down.", "A metallic grey hatch lifts its black lid to permit entry.", "A grey metallic lid with a small square door in the middle to grey sqare panel is opening and then closing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 470, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/78f77e56e4c64194a069017b8a50e093", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "female warrior character from the game in a black bikini and black and yellow armor with a short white haircut holds a large long gray metal sword and chops with it", "female game character with white hair, wearing a gray suit, high heels, holding a huge metal sword in her hand and swinging it while squatting", "A female warrior wears yellow and black armored clothing, with protective patches on her shoulders and long boots, and she yields a long sword which she swings from below.", "A female warrior character from the game, dressed in a black bikini and black-and-yellow armor with a short white haircut, wields a large, long gray metal sword and performs a chopping motion with it.", "A female blonde anime warrior in a yellow and black armoured high boots and top part, black underwear, is swinging a massive grey sword." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 471, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/80f10af6cb604d7cbbd141050d093b2b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "an orange leather basketball with black stripes spins first to the left and then back around its axis", "Round baseball in brown color, with black logo and inscriptions, wrapped around its axis and then to the sides", "A round basketball with black stripes and inscriptions as well as a logo spins from the left to the right, and then from up to down.", "An orange with black lines basket ball spinning towards one direction then stops to spin to the other direction.", "A basketball is rotating around its x-axis and y-axis." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 472, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/81fb335478a746c5bc987efebba2bb7b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An Australian White Ibis with brown and black wings, a black bill and a black tail on blue feet stands with its head down and flaps its large wings", "A white bird with gray wings, long beak, turquoise legs, stands on two legs, pecking something and takes off waving its wings", "A large bird with a long neck and wide, expansive wings, a long tail, and colored feet, stands upright while flapping its wings.", "A large bird with a long neck, broad wings, a lengthy tail, and colorful feet stands upright while flapping the wings.", "A big grey bird with a long neck, brown wings, a tail, a beak, turquoise legs is lowering its head and then flapping its wings." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 473, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/87f21ad7bbc34d9f81702ab3283fdec2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A red woolly camel with a dark brown hump stands on its hind legs and kicks its front hooves", "A brown camel with a black colored hump stands bent over and pushes off with two legs, as if hitting someone, and becomes in place", "A brownish camel with a long neck and a large protrusion at its back has two long legs and raises its forelegs by kicking them in front.", "A red in colour camel with a dark brown hump stands on its back legs and kicks its front hooves foward.", "A brown one-humped camel with a dark brown hump, tail and mane is rising up on its hind legs, lifting its front legs off the ground, strinking the air and then getting into the original position." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 474, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2e77b6e0afe04ba29579d8d847525f97", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Game character Catra, from \"She-Ra and the Princesses of Power\", with brown hair, red ears, black and red tight suit and dark brown arm warmers pushed her left hip to the left and put her left hand on it", "Game character with long bouffant brown hair, with ears, wearing a red suit with black tight fitting high shoes, standing upright with his arm at his side and looking around", "A cartoonish character with long brown hair pointed ears, and wears a tight outfit with long shoes, as she places one hand on the waist and looks to her sides.", "A cartoonish character with long brown hair and pointed ears, dressed in a snug outfit and long shoes, places one hand on her waist while gazing to either sides.", "A female cartoon character with two pointed ears, wearing a red tight suit, black high boots and gloves, is turning her head left and then moving is back to the original poisiton with her left arm placed on the left hip." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 475, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2efd40dae18b401da769870acb545a0f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "strong adult woman with big belly in black top and grey skirt on hips and cowboy boots in grey hat jumps up with legs tucked", "A large woman wearing a top, a gray hat that covers her face, dark hair, a gray skirt, boots, jumps high and lands. ", "A plump female character with no facial features wears a hat, a sleeveless top, a big skirt, and big boots, and she jumps very high with her legs apart from each other.", "A strong adult woman with a large belly, wearing a black top, a grey skirt, and cowboy boots, leaps into the air with her legs tucked and a grey hat on her head.", "A big female figure with shoulder-length black hair in a grey skirt, high brown boots, grey gloves, black sports bra, grey fedora is jumping up landing on her two feet." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 476, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2fd7a41706254e6cb2c24467a5ca0370", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "wooden toy in the form of a nest with two chicks and a mother bird in brown color with a rotating mechanism that imitates feeding chicks", "Wooden toy with two levels, on the second level there is a nest with two chicks, which the mother feeds in turn thanks to the trigger mechanism", "A wooden bird nest with a round base and a wooden handle for rotating the device with a bird and its chick up in the nest.", "A wooden bird nest with a round base and a handle for turning the device, which features a bird and its chick perched inside the nest.", "A handle to a brown wooden mechanism on a round stand, a wooden nest over it, in which a small wooden bird is sitting while a mother bird is deeing the small bird, is rotating." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 477, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/2ff954a6b5724049be380710b1ece917", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a mechanical black metal gun with a silver barrel fires a shot", "The massive black-colored automaton levitates in the air and makes a few sudden movements", "A black rifle that features a long barrel, curved magazine, and a trigger mechanism fires up.", "a black gun with a long barrel, lets loose ready to shoot.", "A black rifle with a grey barrel is firing once." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 478, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/3a13eb385d534ab48301f77d88581442", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "young bald man sportsman athlete in black and blue gym suit and black and blue sneakers walks straight then suddenly falls", "Tall athletic man in black tights with blue inserts, wearing sneakers, walks confidently straight and falls to the floor", "A male athlete wears a blue and black attire with matching sneakers, walks straight, and falls on his back.", "A bald athlete man wearing an athletic black suit with blue and yellow stripes on the side, is walking majestically, then suddenly falls flat on his back", "A bald man in a black one piece full body swimsuit with blue side parts, blue sneakers, is walking forward, then falls down on his back." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 479, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/3a67e76decc849c694c228eb590a9902", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a piece of dark green grass moves in the wind", "A circular patch of tall green grass staggers in the breeze", "Long strands of green grasses are being moved by the wind.", "A portion of medium height grass moving within the motion of the wind. ", "The green grass of the lawn patch is moving as the wind is blowing over it." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 480, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5959be37577a4ebbbf860955da0151b2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a small skinny purple shark from the cartoon opened its red mouth with white teeth and swims straight", "A purple-colored cartoon shark with a red mouth is swimming, wagging its tail", "A blue shark with a tapering body, fins, and mouth opened showing visible sharp teeth and a tail that moves from the left to the right.", "A dark blue shark with sharp teeth showcased with its tail undulating from side to side.", "A purple shark with an open mouth showing white sharp teeth is whipping its tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 481, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6220c8120c494d849ca0997af4473664", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An Asian man with a black short haircut in a red T-shirt and black jeans lies on his stomach leaning on two hands and crawls forward while his legs do not move", "A man of Chinese appearance with black hair, wearing a red T-shirt, black pants, black shoes, crawling on the floor with his hands, his legs do not move", "A male character with dark hair, a red top, black pants, and shoes, crawling on his belly and moving his hands in the process.", "A dusky adult male, dressed in a red short-sleeved T-shirt with black trousers and boots, creeps forward on his stomach, using only his hands for propulsion.", "A brunet male adult in blue pants, pink T-shirt and black boots is crawling on his stomach without using his legs, only pushing himself with hands, and then laying on the floor facing ground." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 482, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/6242ee69a361478386050e64a20867df", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A house plant in brown ceramic jug with long green leaves, gradually wilting, its long leaves hang down in turns", "A green flower with long leaves in a black colored pot stands, leaves fall down, a red colored flower grows out of it", "A brown round vase containing a green multi-leafed plant which moves up and down. ", "Wilting occurs in a brown urn containing Green-leafed plant, and in their midst, a vibrant red flower blooms.", "Green leaves of a plant in a brown pot are lowering down, then a red poppy appears from the middle of the plant." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 483, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/7436bc8d938d4d8faa483080b29aaf55", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Final form of Doremy, character from the Japanese game, with a large body, red wings, blue tail and purple paws, stands on its hind legs and flaps its tail", "Children's toy in the form of a mystical creature of white color, with a tail of blue color, red wings, purple legs, the face of a jester. Stands on two legs and waving arms and legs", "A large four-legged creature wears a round hat, has two wings, and a long segmented tail and raises its forelimbs, drops them, and shakes its body.", "A quadrupedal white creature with red wings and a blue long tail, extends its front legs, then brings them down and wiggle its head.", "A mechanical creature with four purple legs, a segmented tail, white body, brown wings on its back, and a humanoid figure with a red and white jester hat on top of its round purple head is roaring, getting back on its four legs and shaking its head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 484, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8011c2706db54e53af9e6e131fdecb73", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a military infantryman in green and brown camouflage gear holds a black pistol in his left hand and dances with his arms and legs moving first to the left then to the right", "Man in camouflage military uniform, wearing a helmet, body armor, with a backpack, dancing actively, holding a weapon in the hands of black color ", "A military soldier wears a light-toned camouflaged uniform, long boots, a helmet with a microphone attached, holds a gun in one hand, and dances by moving his arms to both sides.", "A soldier in full military gear holds a silenced pistol showcasing his dance classic moves.", "A soldier wearing beige tactical gear, including a helmet and a large brown backpack, with pouches around the waist, is dancing holding a gun in left hand." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 485, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/8723c780d58b4e2ebb808c3781860f3f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "astronaut in white spacesuit with green stripes, black gloves and helmet with black glass dances moonwalk moving backwards", "Astronaut in a white suit with green inserts, a large container with a screen and logo on the back, walks in a lunar gait", "An astronaut wears a spacesuit with a large helmet and a visor, a rectangular backpack, and walks by slightly dragging his feet and moving his arms,", "An astronaut in a bulky spacesuit and a backpack traipses with a deliberate gait swinging his arms in a rhythmic motion.", "An astronaut wearing a white spacesuit with green stripes in elbows and knees, a backpack, black gloves, equipped with tubes, helmet, is doing a moonwalk." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 486, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/15191b158b694cec8a112f1f5c61887d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A robot monkey with a big blue face and a black body moves around its axis in a folded position", "A monkey robot with a huge blue face, black body, black hat, spinning on an axis ", "A cute robotic character with a large head wears a small round hat, and rotates on its legs.", "A monkey faced robotic character with a big head, wearing a small round hat, spins around on its legs.", "A monkey-shaped metallic robotic figure with a big blue monkey face and a small black hat on top is rotating." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 487, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/45013a6fe06549618716a5f8a636ca2e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A grey-and-gold metal robot with a black spacesuit stands in a boxing stance and makes a left hook", "A gray-brown metal robot with sharp points, wearing a helmet, practicing a punch with his hand", "A humanoid mechanical creature with a segmented body slightly lowers its body and throws a jab.", "A humanlike robot with a subdivide body slightly bends down and throws a strike.", "A grey and black humanoid robot, with its parts disintegrated, is standing in a fight stance and throwing a punch." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 488, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/56708b2e0279442bb0e1f58b8a70bffd", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "plastic duck toy yellow with orange beak and white eyes moves head right and left", "A yellow rubber bobble head with an orange beak bobbing its head in different directions", "A yellowish duck with a long beak and bright white eyes bounces while slightly moving its head to both sides.", "A jaunty duck, adorned in a sunny yellow hue, with a lengthy beak and luminous white eyes, hops and skips playfully while nodding its head from left to right.", "A yellow rubber duck with an orange beak is slightly moving its head to the sides, while its eyes are widening in turns." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 489, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/079147acb8a2462c9958e559f5fc3600", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "quadcopter with red body and black four propellers spins and maneuvers right and left", "The red drone with black legs and black blades is flying, making movements from left to right", "A red rectangular drone with four arms has moving propellers on each arm and four short legs.", "A red rectangular drone with four arms, each equipped with spinning propellers, and supported by four short legs.", "Drone propellers of a black drone with a red top are turning around making the drone move from side to side." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 490, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/82150bc4ae9d4b35a1424a334f5d6c2a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a cartoon dark grey crow with a big yellow beak crashes into a big wooden brown gate and tries to get through it", "A purple bird-like creature with a long yellow beak, two legs and human-like arms bursts through a large red wooden gate and makes a big hole in it", "A long-beaked creature with two legs and arms forces its way through a large wooden wall by running into it, its hand reaches to the other side, and it falls after that.", "A skinny black creature with a large brown beak and a black bag on its back is attempting to push through wooden doors made of brown wood strips, which are locked by a wooden bar placed across them.", "A black, slender creature with a big brown beak and a black bag on the back is trying to get through wooden doors made of brown wood strips that are locked because a wooden strip has been placed across them." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 491, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/343763e21e7d4185a739555441315d05", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a guy in a yellow helmet, a gray jacket, a multi-colored shirt, blue pants, yellow knees and black boots lifts the switch of the electrical panel up and closes its door", "A tall man with dark hair, glasses, orange helmet, blue jacket, blue pants, black shoes, walks jerkily to a blue-colored panel, raises the lever, and closes the door", "A male character, dressed in a blue jacket and pants with brown boots, a hat, and glasses, pulls up a switch and shuts the door after that.", "A bearded, middle-aged man in an all-blue attire, adorned with glasses, a bright-orange helmet and dark boots, flicks a switch before promptly closing the transformer door.", "A man with a mustache, glasses, an orange hard hat in a dark blue jacket, green patterned shirt, blue pants, high black boots with white edges, yellow knee pads is standing in front of an open dark turquoise transformer box, then pulling up a lever and closing the transformer box." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 492, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/438459aed1624e6ca2aa8c4281415ef4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A plastic purple and white shark with dog-like paws walks on a gray metal treadmill", "A cartoon creature resembling a purple-colored shark on four legs walking on a black-colored treadmill and actively wagging its tail", "A four-legged blue shark with fins and a wagging tail walks on a treadmill.", "A quadrupedal blue shark with its lower section white, swaying tail walks on a treadmill.", "A four-legged, purple and white shark, is walking on a dark grey treadmill while moving its tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 493, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/673672cf955443958dbc392d8547ec7d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A male firefighter in a bright yellow uniform with white reflective stripes covers his ears with his hands from a loud noise", "A man in a yellow fireman's uniform, wearing a helmet, grabbing his head sharply with both hands and swaying from side to side", "A firefighter dressed in a yellow overall suit and boot with a helmet bends down to the waist level, placing his hands on each ear and moving up.", "A fully, yellowed-dressed firefighter, cups his hands around his ears, then moves in an irregular manner.", "A male figure wearing a yellow firefighter suit and a yellow hard hat is covering his ears with both hand and bending down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 494, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/778010c051564786ab89f736720c062f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a red human heart with soft pink vessels contracts its muscle and pumps blood", "A red-colored heart with pink-colored valves is pounding", "This is a beating human heart with visible vessels and tubes.", "This is a pulsating human heart, with its network of vessels and tubes visible.", "A red human heart with beige tubes around it is pulsating." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 495, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/bb93b2f4aff94c588fab83acc7166225", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "red haired monkey from the cartoon in a red vest, red cap with a long tail turns to the right", "a cartoon monkey in a red cap, red vest with stars, looks back sharply and looks forward", "A stylish brown monkey with a long tail wears a red vest and a cap, and it has its arms slightly outstretched and looks back.", "A short monkey with a long tail, wearing a red top and a red cap adorned with yellow stars on the top, turns around and then returns to face the first position.", "A short monkey with a long tail wearing a red cap and a red tank top with yellow stars on it, is turning around and then getting into original position." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 496, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/bc4d213f54af47c78161ba676015ef10", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Metal Lunokhod shoots blue beam of light forward", "A gray-colored metal setup similar to a projector, with a circle at the base, raises the projector and directs the blue light upward", "A projector is mounted on a rectangular stand with a thin device moving around it, and the projector emits a large blue beam of light.", "A device sits on a rectangular base, casting a wide blue beam of light.", "A grey metallic Mars rover on a round stand is projecting blue light." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 497, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/88bf1628fc5e449aa9456280f3a73182", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A square Minecraft-style character with brown hair, red red t-shirt and gray pants is walking straight", "Lego character with brown hair, red sweatshirt with white inserts, gray jeans, walks straight with a confident stride", "A blocky character with brown hair and blue eyes wears a red top with blue pants and walks hurriedly while moving his arms.", "A block bodied minecraft character with brown hair and blue eyes, wearing a red top and blue pants, walks quickly while swinging his arms.", "A Minecraft-like character with short brown hair, blue eyes, wearing grey pants and a short-sleeved pink shirt and white edges, is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 498, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/90ad05c2929d40bd8998ae74b0d5813a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a large grey African elephant with grey tusks is breathing, slowly moving its ears", "A gray-brown elephant with large tusks, standing upright and breathing, then flapping its ears", "A dark elephant with very large ears, a long trunk, tusks, and a tail, flaps its ears, and slightly moves its tail.", "A dark grey elephant is standing still, breathing heavily, and then flapping its ears.", "A dark gray elephant is standing, breathing and then flapping its ears." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 499, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/92de6dd75f7648c1bfaae8a8b91e2624", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "female young tennis player with white headband in white dress and white sneakers holds racket to right side of face and wants to hit ball flying at her", "A girl in the form of a tennis player of white color with a red sash, holding a racket in her hands, the ball is in the air and the girl is swinging from side to side", "A female tennis player dressed in a white outfit with a headband and shoes holds a tennis racket and faces a green ball placed midair.", "A light complexion tennis woman player, attired in a white tank top, white skirt with a red belt, and white shoes, stands with her knees bent, holding a racket and ready to hit a green tennis ball in the air.", "A blonde female tennis player with a ponytail, wearing a white tank top, a white short skirt with a wide red belt, white shoes is standing with her knees bent and a racket in both hands while a green tennis ball is placed in front of her." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 500, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/94bd2622065f4d04a8bcdd1f166793a0", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "camouflage pistol changes magazine and reloads and shoots", "a green colored pistol in camouflage in which a black colored magazine with cartridges is inserted and the pistol is reloadable", "A camouflaged handgun with a long barrel and a slanted grip, has the magazine being inserted and the trigger pulled to shoot, with the bullet coming out.", "A pistol is loaded with a magazine and then fired, ejecting a bullet.", "A black gun covered in green military camouflage is being loaded while a black detachable box magazine sliding inside the grip, a slide is moving black, firing a bullet and a bullet casing is falling out of a gun." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 501, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/94e5f0d3cf9e4afabb08195b9f738f1f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A blue Tyrannosaurus Rex with red stripes runs forward with its big legs", "Dinosaur turquoise color with red stripes on the spine, runs on two legs, wags his tail, opens his mouth", "A colorful dinosaur with large hind legs, a long tail, and short forearms running with its head bowed.", "A blue dinosaur with red stripes down its back and a dark cream-colored underbelly is charging forward, roaring fiercely.", "A dark green dinosaur with red stripes on its back is running forward and roaring." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 502, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/97aff5b85f2f40ec93b9da19d35ff182", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "toy small plastic airplanes white with a blue cockpit and a red tail fly next to each other maneuvering", "Three toy planes of white color, with a blue nose, red tail, flying chaotically moving left, right, straight, overtaking another plane.", "An assortment of colored airplanes arranged in groups whiles dropping sequentially.", "A group of matching colored airplanes are flying in formation and then dropping in intervals.", "Three panes with red empennage, a purple nose and a cockpit, black wheels are flying while suddenly changing their positions by dropping." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 503, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/97ca1fc2a00d474b8423a9774d8b5d2e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "military armored vehicle dark green with gray wheels drives straight across rough terrain", "green military vehicle with black wheels without a roof, with a cannon inside on the installation, wobbles, sharply goes to the left and right, and bounces", "A dark green open truck with four dark wheels and a cannon attached to the top, turns and bounces.", "A no roof dark green truck with, machine gun on top, jolts after deflecting.", "A dark green open Range Rover car is moving around and jumping into the air." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 504, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/97dc38f3b7374a7cb0ce29b55c8781da", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a soldier in a green camouflage suit with a green balaclava on his face spreads his arms to the side and then steps back", "Man in military camouflage uniform, wearing a helmet, goggles, shoes, gloves, raises both arms to the sides, lowers them, and steps back", "A stylized military soldier dressed in camouflage wears a matching helmet with goggles, gloves, and boots, while he outstretches his arms to the sides, drops them to the sides, and walks backward while raising one hand up.", "A soldier in full military gear extends his arms outward, then brings them down and steps backward, with one hand raised.", "A soldier in dark green and brown military camouflage, black boots and gloves, gray glasses, a green balaclava and a gray helmet is outstretching his arms to the sides, then walking back while raising its hand." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 505, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/178c59f4468e41c3a15e4263089f9963", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a multi-headed siren head, skinny, copper-colored monster stands in a boxing stance and throws a left hook", "A skeleton of a man in skin of beautiful color with a gramophone on his head and two pipes, someone beats with one hand", "A Siren head with its prominent conically shaped head connected to a skeleton body by a long neck, while he positions himself in a combat manner and throws a jab.", "A mild brown creature resembling a human, but with two sirens opposite each other with slightly different heights instead of a head, is delivering a left jab.", "A human-like bronze metal creature with two sirens in place of a head is throwing a punch with its left arm." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 506, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/193a97f680644ce3922f3c2fc7fd4f2c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a man in a gray cap, a pink sweater, and black pants stands in a boxing pose and makes a hook with his right hand", "A big black rat with a big pink tail, lowers its head and turns to the left", "A male character wears a pink top with a black pant, shoes, and a gray hat, and he throws a lower jab.", "A man with a scruffy face, sporting a pink short-sleeved shirt, gray flat, pants, and boots dark in color, is throwing a right-handed lower hooks then retreats.", "An adult male with stubble in a pink shirt with rolled up sleeves, black pants and boots, a grey flat hat is throwing an air punch with its right arm and steps back." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 507, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/208e5cf748404521894bf30c95f25c12", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a woman with black hair, black round glasses, brown trench dress and brown high boots stands on tiptoes and looks around", "a woman with dark hair, wearing a beige dress, a belt, high boots, gloves, stands still and moves her body", "A stylish woman with black long hair in a light brown outfit and long brown boots with dark glasses stands upright with her arms close to her sides.", "A short-haired dusky female putting on black sunglasses sporting a brown trench coat with a belt attached, and high brown boots, is standing, her head moving left to right gently.", "A brunette woman with short hair in a short khaki trench coat, high brown boots, wearing a brown belt, black sunglasses and gloves, is standing and slightly moving her head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 508, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/222b416357a04a1780815ce4fd7beaa5", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "black adult rat with long pink tail standing on paws and looking to the right", "A big black rat with a big pink tail, lowers its head and turns to the left", "A black mouse with a long tail and four short legs raises its head and looks to the side. ", "A black rat with a long, pink tail, is calmly moving its head as if looking checking the surroundings.", "A black rat with a long tail is standing, moving its head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 509, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/223d32f8b1754fe0b1f928dec7caddec", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "gray long fish with a solid body and one fin swims straight", "Blue eel fish swims straight and waggles its body.", "A silver-colored fish with a body that tapers toward the tail, and it swings its tail from side to side.", "A fish with a silver hue that undulates its tail from left to right rhythmically.", "A grey fish with one back fin is flexing its tail from side to side." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 510, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/238eaf4c3a2b425ab9e848d7554d8b06", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "young woman with a short haircut in a leather jacket and a brown leather skirt and long boots dances moving her arms like waves", "Tall woman with short black hair wearing a brown sweatshirt, latex skirt, high black boots, dancing with her hands making a wave", "A stylish woman with black hair wears a dark green dress with long boots, sways both arms, and whines her waist slowly.", "A female wearing a dark green dress improvising her dance moves with both of her hands outstretched.", "A tall young brunette with short hair in a black short leather skirt, black shirt, knee-high shoes, latex brownish cardigan is dancing with her arms outstretched to the sides, creating a flowing wave-like motion with her hands and arms while swaying her hips gently." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 511, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/302fa991d870416da008dddbfe3c11dd", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "fighter from the SWAT group in a red-gray-brown uniform stands in a stance and kicks forward with his right foot", "A man in a brown military uniform with red elements, wearing a helmet, body armor, goggles, practicing kicking", "A soldier dressed in a camouflaged uniform wears a helmet with goggles, long boots, and gloves, and kicks his foot in the air.", "A military soldier dressed in brown gear and a grey military vest with red stripes throws a kick in the air.", "A soldier fully covered in black and red protection gear, wearing a helmet and glasses, is throwing a kick with his right leg." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 512, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/370b0a7394ca4a6a88a350a2a23d5c89", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "leather shiny brown soft sofa folds out for sleeping", "Old-school brown leather sofa, folds out into a couch", "A shiny brown sofa with both arm and backrests has a seat that shoots out with a stand beneath it to form a bed.", "A brown doeskin couch with a cascading back is unfolding its seat cushion, revealing a hidden comfortable sleeping space.", "A dark brown leather sofa with a waterfall back is sliding out its sitting panel thanks to a sleeper mechanism." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 513, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/439c950dd1f242868dd9d8fc0a2dcd57", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a black and white striped zebra stands up straight with its head down and its tail wagging", "A white zebra with black stripes stands frozen and wiggles its tail", "A zebra with four steady legs looks down while moving is tail.", "The zebra is exhibiting signs of agitation, its tail oscillating in a tardy and repetitive motion.", "A zebra is standing facing down and slighlt moving its tail to the sides." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 514, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/470efc1ca7934e7bb77b97559ab9124b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Asian girl fighter in red and white short kimono and gray shin guards kicks forward with her left leg tilting her body to the side", "A girl in a red Chinese outfit with leg guards, does high kicks upwards", "A female fighter in a red outfit and long black boots throws her leg into the air while moving her arms and slightly turning her body.", "A female with her hair in a bun, wearing dark boots, a red cloth, wrapped around her body, featuring a bit of white on her right shoulder and a blue scarf, is kicking the air with her left leg.", "An asian female fighter in a traditional red clothes for warriors is repeatedly throwing kicks in the air with her left leg." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 515, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/481d6c5a980d4a71bf7203fa2d64da6f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "blue-red-gray metal intermittent reciprocating mechanism with moving elements", "Blue metal construction with a mechanism inside that rotates and raises the construction", "A metallic instrument with a curved end is connected to a rotational device that moves along the arc.", "A metallic instrument with a curved tip is attached to a rotating mechanism that traces a circular path.", "A mechanical instrument with a dark blue curved part like a sickle is lowering while another attached straight red metallic part is moving along the curved instrument." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 516, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/497b534118074bb89a47bcbae30cad3d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Mickey Mouse from the game with a square body in an orange sweater and brown pants raises his arms up and waves them", "A dog with a human body in a red sweater, brown pants, yellow shoes waves two hands up actively, and puts his hands down.", "A box character wear with a cuboid head and rectangular body wears an orange top with brown pants and shoes and raises his arms and waves them.", "The humanoid automaton, clad in a crimson tunic, brown trousers, and light brown footwear, is raising its arms aloft in a gesture of greeting.", "A human-like Minecraft-style Mickey Mouse in a red long-sleeved shirt, brown pants and light brown shoes is lifting its arms in the air and waving." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 517, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/497d37dc4b97494b8a32966e49cfaf96", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "metal smooth body robot spider black color with red eyes goes forward on its six legs", "A black spider with red eyes, tall legs, walks fast", "A robotic creature with multiple legs and a round body with red eyes moves its legs hurriedly.", "The arthropod automaton, rendered in a pitch-black color adorned with twin ruby eyes, is making its way forward.", "A black robotic tarantula with two red eyes is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 518, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/514bb5b1bbdb41e7b2b537962bac61be", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "beige and brown scaled dragon from the game stands on its feet and moves its head down and then to the right", "A golden-colored mystical creature with tusks, wings, and a hoof-shaped tail at the end, eats something from the floor, raises its head and turns around", "A large creature with two wings, horns, a long tail, and spikes at it back, moves its head downward and to the side.", "A brown creature resembling a dragon is wagging its tail and investigating the ground with its nose, before looking to the left.", "A dragon-like brown creature is moving its tail and sniffing something on the ground then turning its head left." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 519, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/521ceb2fcc1e4cc1b8e424427520af19", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "red and black convertible car toy moves black steering wheel left and right and front wheels move", "Toy car red color with black wheels, white interior turns front wheels left and right", "An open-top reddish truck with black wheels has its interior visible and a steering wheel that moves the front wheels to the sides.", "The front wheels of a red convertible are steering right and left.", "Front black wheels of a red open car are turning right and left." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 520, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/534c485bf6e84791b30cbe4d3b7a0cd9", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "young athletic woman with long black hair in a black T-shirt", "A girl with brown hair, black pants, T-shirt, black shoes and glasses, shakes her two hands down sharply and raises her head up", "A stylish woman in a black outfit comprising a tank top, black pants, and shoes, wears dark glasses raises both arms, and drops them.", "A dusky female in a black tank top and pants with silver patches is scoffing, her shoulders and arms moving up and down gesturing her frustration.", "A brunette woman in a black tank top, black pants with silver rectangular patches sewn onto the pants is standing shrugging her shoulders and arms in short, jerky motions, clearly annoyed." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 521, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/534f28f191f442b188b23f1dd46d8b60", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "young athletic woman in glasses black tight tank top and cargo pants looks up disappointed and then throws her hands down", "Dark brown door with a handle, opens and closes", "A dark door opens and shuts quickly.", "A mahogany brown door is swinging open and shut with force.", "A dark brown metallic door is opening and then closing quickly." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 522, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/582cb4dcceb94d8ab71096f1ac8f5329", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A green and brown billiard table with three multi-colored balls that hit each other and knock down the white and blue cubes", "Wooden game table with a green cover on top with figures of different colors, on which the ball breaks other objects", "A pool table with four sturdy legs has several colored balls placed atop with the balls hitting each other and then finally hitting the cubes.", "A blue ball on the pool table strikes a red ball, which in turn hits a yellow ball, causing the three white, yellow and purple cubes to fall.", "On the pool table there is a blue ball that is hitting a red ball, which in consequence is hitting a yellow ball which hits three white cubes mounted on each other that fall down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 523, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/733afd855f5d4c378f041cc89e231770", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a red haired tiger with black stripes walks forward at a fast pace", "a massive orange-colored tiger with black stripes walking at a brisk pace", "A striped tiger with a long tail walks very fast on its legs.", "A sleek tiger is striding forward.", "A big orange tiger with black stripes, is swiftly walking forward on his four." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 524, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/0737c0d50b6f4c3ea82d70980543b420", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A flying saucer made of black metal with blue round elements spins around its axis", "Flying saucer of black color with turquoise color illumination, levitates in the air and spins around the axis swaying", "A large black saucer with a dome-shaped structure and an antenna at the top, is quickly spinning around.", "A saucer-shaped black UFO with three blue spots, divided equally in distance, is revolving.", "A black UFO plate with a short black antenna, three turquoise dots on it and a turquoise edge is spinning." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 525, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/751b32784e1a4ddd9c95aa88d7640ab7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A gray, long-necked ultrasaurus wags its long tail, while eating grass", "A big dinosaur with a long neck and a long tail, tilted his head down and wagged his tail, then lifted his head up", "A large dinosaur with a long neck, a small head, and a long tail wags its tail and bends its head.", "A grey dinosaur-like creature is standing with its head facing downwards, wagging its tail, and then elevating its head.", "A grey brontosaurus dinosaur with its head facing down, is slightly moving its tail and then lifting its head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 526, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/796eb62d20bc41009a3f9990bf1ab5a4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a dark green metal nut is screwed onto a dark green metal bolt", "Two large dark green nuts, one nut spirals onto the other", "A dark nut being screwed onto a long cylindrical bolt.", "A grey nut is being screwed onto a grey bolt.", "A dark-grey metal nut is threading onto a dark grey screw." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 527, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/818c88a2ba244ccc9d6ee9b8ec746ca7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "old white magician from the game in a white long robe with a hood holds in his right hand a wooden brown scepter with a white diamond then lifts it up and puts it down", "A man in a white suit with a long white beard holds a black staff with a stone inside and knocks it on the floor", "An old man with a white beard is dressed in a long, cloaked attire with a hood, holds a staff that has a crystal at the top lifts it up, and drops it.", "A man of old age with a white beard, attired in a long hooded grey robe, grasps a curved brown staff featuring a central white stone, raises the staff with both hands and smashes it into the ground with great strength.", "An old wizard with a long gray beard, dressed in a long light grey robe with a hood, is holding a brown magic stick curved at the top, with a white rhomboid stone in the middle, holds it in both hands, raising the stick up and then striking it against the ground as if casting a spell." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 528, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/883f99d3497b4ebda58b5e4a358caafb", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A yellow goldfish swims straight", "A big-eyed goldfish swimming peacefully", "A yellowish fish with visible fins and a tail moves its tail to the sides.", "A goldfish swimming straight forward swinging its tail.", "A yellow fish is swishing its tail." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 529, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/896aa8e996074ae58d7bc4a40587aa9a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "toy plastic crane orange and gray color folds square blue elements into one line", "Metal construction with orange colored control knob on a gray unit, moves square objects in different directions", "A large robotic arm lifts an item from one side and places it to the other side.", "A small, plastic crane with orange and gray parts picks square blue pieces and puts them into a straight line.", "A grey and brown robotic arms is moving small blue plates." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 530, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/896d06fa1ad04419bec492c5f3fb139e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a young cameraman in a blue cap, a green sweater, and blue pants holds a large gray metal video camera on his left shoulder and films a young woman with blond hair in a purple dress who is saying something into a microphone", "Two people: a cameraman in a green T-shirt with a blue cap and blue pants, holding a gray camera and filming a woman with white hair, in a purple dress, while she is holding a mic and saying something to the camera, moving her hand and gesturing as if she doing a news report", "A male cameraman wears a blue hat, green top, and blue pants, and he holds a camera that captures a female presenter with long brown hair who holds a microphone.", " A cameraman, dressed in a blue hat, green shirt, and blue pants, operates a camera that focuses on a female presenter with long brown hair who is speaking into a microphone.", "A camera man wearing a green shirt, blue pants, black shoes, a blue cap is holding a black camera, while a female TV presenter with red hair, in a purple knee length dress is holding a black microphone in front of a camera and slightly moving her head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 531, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/897ce33a65d04bb69eb3d87d0742464f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "wooden rocking chair with checkered fabric upholstered is swinging", "Wooden rocking chair with red plaid upholstery, swings back and forth", "A wooden brown chair with a curved backrest and legs swings from the back to the front.", "A wooden rocking chair, adorned with checkered red and brown fabric upholstery, is engaged in a rocking motion.", "A brown wooden rocking chair is rocking back and forth." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 532, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/986e4284d2f64a25a595d30de8d6267e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "that athletic woman in a red bikini with long black hair and white sneakers stands in a boxing stance and makes a left hand hook", "A woman with black hair in a red bodysuit with white socks, boxing", "A woman character with long black hair wears a red swimsuit with white sneakers has both arms slightly raised and throws an uppercut.", " A woman in a red bikini with long black hair and white sneakers stands in a boxing pose and punches with her left hand", "A tall young brunette woman with a fringe in a red swimsuit and white sneakers is standing in a boxing stance and throwing a punch." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 533, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/1069ef323b2640a7a72876025cbe3311", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "female robot fighter from the game with black and beige body, long black skirt and high heel boots holds two swords in both hands, hits with left hand then runs straight", "game character of a woman in a small suit with a skirt, high heels, no hair, holding a large sword in her hand, strikes with it, runs and falls", "A female fighter wears a long blue skirt, a mask, and long heels and wields two swords, swings them, runs, and then falls.", " A female robot warrior, clad in black and beige armor, a flowing black skirt, and towering heels, strikes with her left-hand sword and charges ahead.", "A bold female warrior wearing an asymmetrical black skirt, black knee high heel boots, with her face partially covered with a black mask, her upper body is bare except for straps across the chest, while the lower half is covered in a loose dark garment is holding swords in each hand and swinging them, then standing in a fight-ready position, then running and falling on her back." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 534, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/4056db77108e460faae2f82e22c821db", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "multicolored rubik's cube moves its elements", "rubik's cube of different colors folds in different directions and interferes with the colors", "A multicolored Rubik's cube moves its parts.", "A Rubik's cube with colorful squares has its upper part rotating non-stop.", "Plates of the Rubik's cube are moving." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 535, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/4587fadbd1194844914797cf86aed394", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "an old vintage gramophone with a large metal yellow horn plays a black record", "a gold-colored gramophone on a wooden stand with a black plate turns the starter mechanism", "An old gramophone with a large curved horn and a rectangular base has a spinning mechanism on its side.", " A classic gramophone with a large, brass horn playing music from a worn-out black record.", "An brown gramophone has its handle moving and turning in circles." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 536, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/4798d8c87a0e4ad8835217fe93ddf67b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A gray hairy elk screams and then bows its head with long yellow antlers", "A large gray-colored moose with long antlers screams, tilts his head down and wobbles in different directions", "A large elk with multibranched antlers, a short tail, and four sturdy legs, open its mouth, raises its head, and then bows.", "A gray elk with thick fur lets out a cry, then lowers its head with long, yellow antlers", "A dark grey elk with light grey mane and light brown antlers is bellowing, then shaking its head facing down." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 537, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5027ca4c4ebc439e9cd110e68367ce04", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a brown dragon with a long tail stands on its paws and waits", "Brown dragon with large wings and a long tail, sits upright and wags its tail", "A brown dragon with two large wings, horns, and a long tail stands on its large limbs.", "A brown dragon, with a lengthy tail, is standing on its hind limbs, in a state of anticipation.", "A brown dragon is standing on its claws and breathing, slightly moving its tail and head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 538, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5478ddd14bf044e59e02bda57ec46edb", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Mai from My Neighbor Totoro with brown hair and ponytails on the sides in a white shirt and a red dress, spread her arms to the side and runs fast straight", "Cartoon girl with brown hair with ponytails, wearing a pink dress, running fast leaning forward", "A cartoonish female character has brown hair and wears a pink dress and brown shoes, is running very fast, while having her arms stretched out to the sides.", "With brown hair in side ponytails, Mai from \"My Neighbor Totoro\" darted forward in a white top and red skirt, her arms flung wide.", "Mei Kusakabe, a little girl with brown hair, two ponytails, wearing a white blouse with a pink dress, yellow shoes is running ahead with her arms extended behind her." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 539, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/5803c2f561f5446ca8ec723de2c0e069", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "wooden round table folds and hides its legs", "round wooden table on four legs, legs fold under the table", "A wooden round table has multiple legs that fold and hide beneath it.", "A circular wooden table that collapses and conceals its legs for easy storage.", "Three legs of a light brown round table are folding under the table." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 540, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f8c5887be52843d1af634a219f4c5e5f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "metal gray shotgun with red elements inflated pulls back the hammer pulls the trigger and shoots", "Red metal monogrammed pistol, with grip insert, loaded and firing", "A metallic handgun with a long barrel and designs on its body, has its trigger being pulled and raises up a bit.", "A gray shotgun with red accents is being loaded. The shooter pulls back the hammer, then the trigger, and the gun fires.", "A trigger of a grey metallic arm gun with red ornaments is being pulled." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 541, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f9f1c51f5bd74a01a721a88c278c3baa", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a tall human skeleton in armor of the Lich King from the World of Warcraft game walks straight with his right hand extended forward", "Game character in the form of a tall death in a long black cloak with large wings on his back, smoothly walks forward", "A skeletal character dressed in a long hooded dress with protrusions on the shoulders, walks back and forth.", "A spooky skeleton in a long, hooded robe with weird pointy things on the shoulders is walking back and forth.", "A tall skeleton figure in long black hooded robe is levitating in the air with its arm stretched in front." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 542, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f24a856428f94f2ebbcaa0626f62eb0e", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Wooden folding table in soft white-beige color with white legs opens both the top and the side parts and closes them back", "Table made of light wood, folding, side doors and top pull-out table open", "A rectangular folding table has four short leg and its compartment opens from the top and the side.", "A white wooden table that folds up like a tent is opening and closing. The top and sides can be unfolded and then folded back together.", "Doors to a light brown dresser are opening and closing, while the top half panel is opening," ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 543, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f58c5e7f0ada40e491284247e6917c9a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a red leather cinema seat with black armrests that has a raised bottom and then lowers for comfortable sitting", "Black sofa with red upholstery, fold out the seat and lower the handles", "A folding chair with a reddish leather seat and backrest and black armrest has its seat being raised and lowered. ", "A red leather folding chair with black armrests is going up and down. adjusting the seat height.", "A base of a red cinema chair is lowering down and then the black arms rests are falling onto black parallel panels." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 544, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f59e7aa12a124c158d06263357165e72", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "An anime little Asian girl in blue wig, pink tank top, raspberry shorts and yellow high top sneakers, is doing Capoeira moves", "A girl with blue hair, wearing a pink T-shirt, pink shorts, orange shoes, walking from side to side with wide steps and waving her arms", "A cartoonish character with a blue wig, a tank top, pinkish shorts, and brown high sneakers moves steadily with her legs while her arms are slightly raised.", "A cute little Asian anime girl in a blue wig, pink tank top, red shorts, and yellow sneakers is doing cool Capoeira moves.", "A teenage girl with short turquoise hair, in pink tank top, magenta shorts, yellow sneakers with white tip is moving to the sides changing legs with her arms bent in elbows." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 545, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f472e10e4ed54bd5a0871da3ec753fcc", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "old metal old school Carousel with four wooden boards stands on a round platform and rotates around its axis", "Old round carousel metal multicolored with a wooden bottom on the half, slowly rotates around the axis", "A round carousel with metal railings and a wooden platform spins in a clockwise direction.", "An old-fashioned metal carousel with four wooden boards is spinning around on a circular platform.", "A metal red and green carousel with its base partly covered with wooden panels is spinning." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 546, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f2910cfad5be416b849b446ccfced8e1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "young adult woman with long red hood red mini short dress and high black boots with heels walks straight", "Tall woman with red hair in a burgundy dress, high-heeled black boots, walks straight modeling gait", "A female character dressed in a brownish outfit and wears black long heels walks straight with her arms by her side.", "A young woman with long red hair, a short red dress, and tall black boots is walking confidently.", "A woman wearing a purple short hooded dress, black high boots is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 547, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f5371fbe5ba44ab2a9d9b6d2517a0c81", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a middle-aged man of short stature and broad build in a yellow hat with a cigar in his mouth, a red shirt, a brown vest, yellow trousers and black shoes stands and then walks straight", "A short man wearing a yellow hat, red shirt, red vest, yellow pants, black shoes, cigarette in his mouth, walking straight ahead", "A plus-sized man wears a round hat that matches with his pants, black shoes and has a cigarette in his mouth, walks straight with his head slightly lifted.", "A man in a matching hat and pants, black shoes, and a cigarette is walking confidently with his head held high.", "A middle-aged old chubby man in black boots, brown pants, red T-shirt and light-brown fedora with a cigar between his mouth is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 548, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f5437d046d144fcdb0fbfdd0f3a5bcc1", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Captain America from Marvel Comics in his blue, white and red superhero costume stands on two legs and then does a backflip", "A character from the Marvel universe “Captain America” in a blue suit with red elements, high boots, does a high somersault and lands on his feet", "This is Captain American from Marvel Studios who wears a blue body suit with a white star in the middle, and he performs a back flip.", "Captain America, the superhero from Marvel, is standing tall in his iconic blue, white, and red suit. Then, he does a cool backflip!", "Captain America is doing a backflip." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 549, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f19038fe45cc4475a40d3f80d277e23a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a young man with a short haircut in a white T-shirt, blue wide jeans and black boots stands in a boxing pose and sways left and right", "Dark-skinned man in a white T-shirt, blue pants, sneakers, staggers and stands in a wide stance and holds his hands in a defensive position", "This is a character from GTA San Andreas video game who wears a white tank top with blue pants and black shoes and stands in a combat pose with both arms slightly raised.", "A man in a white vest top and blue baggy jeans is standing in a boxing pose, like he's ready to fight. He's shifting his weight a little bit.", "A black man in white tank top and blue wide-leg jeans is standing in a boxing stance, slightly moving." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 550, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/fa7a052ae3b34b7c820732ce754d2a44", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A dark gray Godzilla with horns on his ridge smoothly moves forward", "Dinosaur dark green color with a long tail, moves straight very fast", "A large reptile-like creature that stands upright has two sturdy legs, a long tail, two short arms, and several spikes on its back and moves quickly.", "A big, lizard-like creature is walking around on two legs. It has a long tail, little arms, and lots of spikes on its back. It's moving pretty fast.", "A big black reptilian that looks like Godzilla is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 551, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/fc6e597a81ce4dc291dcca6a9e6888ba", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a ginger striped cat with white paws and a white face smoothly sneaks up moving its paws", "The cat is orange in color with white spots on the legs in stripes, sneaks and walks forward", "A brown-striped cat with paws and a long curved tail moves forward and takes a step back.", "A brown and white paws cat with a long, curved tail is walking forward and then taking a step back.", "An orange cat with white paws is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 552, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/fc093db562ca464aa0efc7e77483befa", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "tall bearded man with long brown hair in leather jacket biker t-shirt and black worn jeans stands smoothly moving his legs", "A man with a beard, long black hair, wearing a brown leather jacket, dark pants, boots, rolling from foot to foot and looking around", "A male character with long black hair and a beard wears a black jacket with pants and shoes, and he stands upright,", "A tall, bearded man with long brown hair is standing confidently in a leather jacket, biker shirt, and black jeans. He's shifting his weight from leg to leg.", "A bearded man with long black hair in a dark brown leather jacket, a T-shirt with a print, black jeans and black boots is standing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 553, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/fc8204782cd04da38cf812e2d76cab57", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "medieval knight in metal gray armor with massive arms and body in red-blue shirt strikes with all force with his right hand", "Game character in a red suit, with a white helmet on his head, with brown solid boots, aggressively swings several times leaning over", "A medieval warrior in armor which includes shoulder pads, a large helmet, belt, and boot, throws his arm in a combat manner.", "A medieval warrior wearing a beige brass crusader helmet, protective pads on his shoulders and knees, a red robe, and brown shoes is throwing a powerful punch.", "A warrior in medieval armor such as a beige brass crusader helmet, beige pads on shoulders and knees, red robe and brown shoes is making a bold punch." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 554, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/fd27df134ab04945ac3d8678accc112b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a cartoon drawn little dragon in a soft gray-blue color with orange horns on the ridge and black paws goes straight swinging tail", "The dragon is blue with black legs, long tail, walks forward, and wags his tail", "A cartoonish dragon with multiple horns on its head, a long tail, and several spikes on its back, moves steadily on its four legs.", "A cute, little cartoon dragon with soft gray-blue skin, spikes on its back, and black paws is walking along, swinging its tail.", "A grey young dragon with spikes on its head and black paws is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 555, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ff51e655d05d4a05b2c0d9b274c00ed7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "green helicopter with black glass flies rotating propeller", "Khaki-colored helicopter with large blades, black glass, the blades spin and the helicopter flies", "A greenish helicopter has two doors on its side, two firm legs, a long tail, and a spinning propeller.", "A green helicopter with two strong landing gear, a long tail, and a spinning propeller.", "Propellers of a khaki-colored helicopter are spinning." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 556, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/bc982b8da22a4a2ba8097b1e424ae38b", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a maroon dragon from the game flaps with big black wings and moves its tail", "Burgundy-colored dragon with big wings, white horns on his head, flying, flapping his wings and wagging his tail", "A large winged creature with horns on its head, a long tail, and several horns on its back flaps its wings.", "A dark brown dragon is flying flapping its wings.", "A big brown dragon is flapping its wings." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 557, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/bd5a99a682384d7b9e6f44f349fb5ab2", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a cartoon white man in a blue plastic cap and blue swimming trunks swims breaststroke, straightening his arms forward and then pulling them under himself", "Man in a blue cap, black swim trunks swimming in the water waving his arms", "A male swimmer wears a hat, goggles, and shorts, and he swims by moving his arms and legs while facing downward.", "A male swimmer wearing a hat, goggles, and shorts is swimming underwater. He's using his arms and legs to move through the water.", "A male swimmer with a blue swimming cap and grey swim shorts is laying in a streamline position, is swimming." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 558, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/be3c64aaf73447f481092a40679064e4", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "two soft white buns and one orange sausage between them are combined to form a hot dog", "Two long yellow buns, a sausage between them, folded around the axis to form a hot dog", "A sandwich that is made up of two loaves of bread and a long hot-dog, has the loaves moving to enclose the hot-dog.", "A long hotdog is being squeezed between two pieces of bread. The bread is closing in on the hotdog.", "Two parts of a sliced bun and a hot-dog sausage in the middle are turning upside down while in the air." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 559, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/bfcb770ca92549e586052e4919a4a1e7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a female archer from the game in a black hood and black robe with arrows behind her back turns over her right shoulder and returns back", "A female playable character in a black suit, black pants, black boots, with arrows on her back, turning warily around", "A hooded warrior wears armor and has arrows strapped to her back while she slightly turns to look back.", "A female archer, wearing a black hood and robe with arrows strapped to her back, looks over her right shoulder and then turns back around.", "A female warrior in black hooded cap, black long gloves and boots, with arrows behind her back is standing looking around." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 560, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/c0b782d701064a90b94055e9db76de00", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "dark-skinned man with short hair in blue t-shirt and black pants sits on something and claps his hands", "A man with short black hair wearing a blue T-shirt, dark pants, black shoes, sitting on an imaginary chair, flapping his arms", "A male character wearing a blue shirt and gray pants with black shoes sits while clapping his hands.", "A male in a gray shirt, black pants, and black shoes is floating in the air and clapping.", "A young man in a grey T-shirt and black pants and shoes is in a sitting position in the air and clapping." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 561, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/c4cff1709d1c4ae6a4883a67f3af2fee", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A middle-aged man in leather face mask, from the Texas Chainsaw Massacre movie, in plaid shirt, a dirty apron and black pants, is holding a metal chainsaw in his right hand and walks straight.", "A masked man with black hair, wearing a plaid shirt, dirty white apron, dark boots, holding a saw, walking straight ahead with his arm out", "A scary man wears a shirt and pants with shoes and an apron, and he holds a chainsaw with one hand while he walks straight.", "A middle-aged man, wearing a leather face mask similar to that depicted in The Texas Chainsaw Massacre, is walking purposefully. He is clad in a plaid shirt, dirty apron, and black pants, while holding a metal chainsaw in his right hand.", "Leather face character from the Texas Chainsaw Massacre is holding a chainsaw in front of him in his right hand and walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 562, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/c8be2ae67ff9432a84a8c1c535711f8c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Mario character, a short man in red cap, with brown mustache, wearing blue overalls, is walking straight while moving his arms and legs", "Cartoon character Mario with brown hair, mustache, red sweatshirt, blue overalls, brown shoes, red cap with logo, white gloves, actively walking forward and freezes", "This is Mario with his prominent big hat and a blue overall with a red top and brown shoes, and he walks hurriedly on both feet.", "Mario, the famous Nintendo character, is walking around in his red shirt, blue overalls, brown shoes, white gloves, and red cap ", "Mario from Nintendo in a long-sleeved red shirt, a pair of blue overalls with yellow buttons, brown shoes, white gloves, and a red cap with a red \"M\" printed on a white circle is walking," ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 563, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/c48b05ec22ac4b93afaac80c0be2732a", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "white wooden cabinet with white washbasin opens its doors", "White washbasin with vanity unit with doors, with white mixer tap, both doors opening", "A modern sink with a cabinet below it, which is enclosed by two doors that open.", "A fancy sink with a cabinet underneath. The cabinet has two doors that you can open to put stuff away.", "The doors to a white sink base cabinet are opening." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 564, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/c5782e43dcf442718a25b49bed00f582", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A white cow with large black spots and horns stands on four legs and slowly moves its head", "A white-colored cow with asymmetrical black spots on its head, horns and tail, has its head tilted, shaking it to the left and right", "A moving white cow with black accents on its body, has horns on its head, four sturdy legs, and a tail.", "A black and white cow is walking around. It has horns, a tail, and four strong legs. standing still", "A cow with black patches and horns is standing slightly moving its head." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 565, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/c9637241985f403e9fdcf359d685f16f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "white figure of young punk boy with mohawk on head blue latex tight jumpsuit dancing hip hop movement moving arms and legs to the side", "A white man in the form of a mannequin with a Mohawk in a blue latex suit, dancing", "A stylish white character with a mohawk wears a shiny outfit with white shoes, and he slightly bends down while moving his arms and legs.", "A white guy with a spiky mohawk and a tight blue jumpsuit is dancing hip-hop. He's moving his arms and legs around really fast. ", "A female figure with a mohawk hairstyle wearing a latex tight black suit and white sneakers is dancing, throwing her arms to the sides and bending her legs." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 566, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cba6e283f536435ca99236b75103224c", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Aang, a character from Avatar the last airbender cartoon, an asian boy with bald head and blue arrow on forehead in yellow orange kimono stands in a stance with left leg forward, stretching forward with both palms.", "Avatar cartoon character in yellow suit, yellow cape, orange shoes, with tattoo on head, arms and legs stretched forward on the floor and swaying in a strong wind current", "This is Avatar with his prominent bald head marked with a blue arrow, who wears a caped top with yellow pants and brown shoes, and he bends air by slightly bending down and performing a gesture with his hands.", "Avatar, the bald man with the blue arrow on his head, is wearing a cool cape and yellow pants. He's bending air by leaning down and making a special hand gesture.", "Aang, the Avatar from The Last Airbender, is standing in a wide stand with one leg is often slightly bent at the knee, with the other leg straight, and arms outstretched in front of him with palms open." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 567, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/cc4dd8bc3c3f464a8d928cbf4518b0ef", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a black-brown tick beetle with a hairy body and two antennae moves its small legs", "brown colored beetle with body hair, long whiskers, goes straight", "A brown round insect with multiple legs, antennae on its head, and tiny strands of hair on its body moving slowly on its tiny legs.", "A brown bug with many legs and antennae is slowly crawling around. It has tiny hairs on its body.", "A brown insect with two antennae on its head, a round body covered in thin hairs, is walking on its six legs." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 568, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e0727241adde4fb0992df6afd9f7ebb8", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A red panda with a long fluffy striped tail plays with a yellow apple by attacking it and rolling over onto its back", "red panda orange color with a fluffy tail with black stripes, with dark legs, sniffs an orange that lies nearby, stands on two paws, attacks him and falls, does a somersault playing", "A brownish raccoon with a long furry tail stands upright, grabs a pumpkin, and rolls on the ground with it.", "A red panda is having fun with a yellow apple. It's biting it and rolling around on its back.", "A red panda is standing on its hind paws falling down to catch an orange that is in front of it." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 569, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/e3244268216244808d2603d23ef37310", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "A grey smooth hippopotamus with a pink belly stands on four legs and slightly rises up", "A gray-colored hippo with a pink belly, standing swaying with his legs bent and unbent", "A grayish hippopotamus with four sturdy legs and a short tail stands on its legs and slightly moves its body.", "A gray hippo with a pink belly is standing on four legs and lifting itself up a little bit.", "A grey hippopotamus with pink stomach is standing, breathing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 570, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ea76eb689ca14ff49190cdc42216183d", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "Agnaktor from monster hunter stories waves its single fin like a fish and makes biting movements with its face", "A mystical creature that looks like a dragon with a huge dark brown tail, shouts aggressively and wags its tail vigorously", "A large scaly creature with four legs and a large tail opens its mouth to grab something while wagging its tail.", "A big, scaly creature with four legs and a long tail is opening its mouth and wagging its tail.", "A balck and green sea horse creature with a long tail is swishing its wide tail and roaring." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 571, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ebd4c5827d1944518b9b6dea7f8594f7", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "young woman dressed in captain Jack Sparrow clothes in a pirate hat white shirt and armor goes straight swinging hands", "A woman in a pirate's uniform, brown, skirt, shirt, high boots, walks straight ahead with confidence", "A female character dressed in an old captain's attire with a hat and long boots walks hurriedly in one direction. ", "A woman dressed like a captain from long ago, wearing a hat and long boots, is walking quickly to one side, her hands swinging.", "A female figure in a typical pirate outfit and a big black tricorne hat is walking forward." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 572, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ecc85a571cb64a2f94b77fae1d975b64", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "old school metal hound car dark green with white bumper and peeled off stickers opens hood", "Gray car with a green stripe on half of the car, in graffiti and stickers, numbers on the ceiling, opens the hood", "A green racing car with inscriptions on its body and four round wheels opens its bonnet to expose the engine and mechanical parts.", "An old, dark green metal car, like the ones from way back, with a white bumper and some worn-out stickers, is opening the hood.", "The hood of a green car with white bottom part of the body is opening." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 573, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ed5a5a0508b5464e999bfa49e1f5cb7f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a blue little dinosaur with a tail like a fish opens a pink mouth with two white teeth walks straight sits on its butt and does a forward somersault", "Toy dinosaur blue rubber dinosaur with turquoise elements, with a long blue tail, with two fangs, goes straight, opens its mouth, wags its tail, falls and does a somersault", "A blue creature with a long snout and flat tail walks upright on its two legs with its mouth open and falls on its tail which bounces it back to its original posture.", "A blue cartoon with a long nose and a flat tail is walking upright on two legs with its mouth open. It trips over and falls on its tail, but it bounces right back up.", "A small, blue, cartoon-like creature with a smooth, rounded body, fin-like limbs, and an elongated head, featuring a light blue belly and small fangs is walking and opening its mouth, then falling on its tail and bouncing back." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 574, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ee69f4a506cd4826a216017da6c9b7dd", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "The metallic white space satellite opens its dark brown solar panels to the sides and adjusts them", "Metal satellite antenna of white color with black inserts on the blades, opens and turns the inserts to the left and right", "A large cylindrical satellite with flat panels on its sides opens up the panels and turns them.", "The white metal satellite is unfolding its dark brown solar panels and adjusting them to catch the sunlight.", "A satellite with a cylindrical white body with multiple sections, including a spherical module at one end and large black solar panels extending from both sides of the central structure." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 575, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ee5799e9d6bd465d95bf45371e116ebe", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a woman with a red short haircut in a red tight dress and leopard leggings dances swaying her hips and moving her arms forward and to the side", "A woman with short red hair, red dress, black tights, dancing with her hands actively.", "A female character with brownish hair wears a tight reddish top and pants with shoes, and she is whining her waist while slowly going down and raising her arms.", "A woman with short red hair, wearing a tight red dress and leopard-print leggings, is dancing. She's swaying her hips and moving her arms back and forth.", "A woman with short red hair in a red dress below her hips and grey pantyhose is dancing rhythmically moving her hips and extending her arms." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 576, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/ef02d1aa088e4add872e15c30cbc799f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a man in red and white Santa Claus costume with a white beard, walks straight, moving his legs and arms", "Santa Claus with a white beard, red suit, black belt, red hat, black shoes, walks straight with a wide step bent", "This is Santaclaus with his prominent red hat and attire, with a black belt and shoes, and he walks hurriedly with his arms slightly away from his body.", "Santa Claus, in his famous red suit, black belt, and black shoes, is walking quickly with his arms slightly out to the sides.", "Santa Claus is walking slightly bent over." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 577, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/efeb35ace9f04c879a893d4409ca60fe", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a metallic yellow and black robot with two large eyes and a huge gray-brown chest on its head stands on two legs and staggers from side to side", "Metal robot of dull yellow color on two legs, with a pipe attached, two eyes, a wooden chest on his head, attached with two straps, stands up, takes a step, wobbles, and takes a wide step", "A rusty robot with a large head and two legs carries a wooden chest on its head and it takes a step forward.", "A big, yellow and black robot with two big eyes is wobbling back and forth on two legs.", "A grey metallic robot shaped like a chicken that has a chest as its head is getting up on its two legs, moving its body slightly as if breathing and then lifting its left leg." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 578, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f0acb761248b489689ef87c726d43959", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "a rooster with white feathers, a dark gray fluffy tail and a pink head stands with his left paw forward", "White rooster with blue tail, belly, wings, legs, fluffy, standing and breathing", "A cock that has a white body with a black tail and legs, stands on its two legs and looks forward.", "A white rooster with a fluffy gray tail and a pink head is standing with his left foot forward.", "A white rooster with blue tail and legs is standing, breathing." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 579, "media_type": "Video", "media_paths": "./data/4D_Object_Captioning/f4c16b93c0b84abbac6f29395785370f", "description": "", "task_type": "Vision-Question-Answer", "question": [ "Please generate descriptive captions for this multi-view video." ], "question_type": "free-form", "annotations": {}, "options": [], "answer": [ "metal dark gray scales measure two identical weights", "Gray metal scales with weights on the scales, weighing from one side to the other", "This is a balance scale that has two pans hanging from either side of a horizontal beam, supported by a central pillar that is held by a round base, and both plates get lowered simultaneously.", "The scales are tipping with arms are going down on both sides.", "Grey equal arm balance scales is lowering its beam to the sides." ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 580, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/05c76ba760c34e12928816329ca20a44", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the direction the top part of the installation moves?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It moves upward and downward in a non-stop motion" }, { "id": "B", "text": "It moves upward and then starts rotating clockwise" }, { "id": "C", "text": "It does not move" }, { "id": "D", "text": "It spins clockwise" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 581, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/213de09d83d84b5da297a3738ee94cc3", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the crane perform? " ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It rotates the cube counter-clockwise and puts it down" }, { "id": "B", "text": "It lifts the cube and puts it down" }, { "id": "C", "text": "It lifts the cube up and rotates it clockwise" }, { "id": "D", "text": "It lifts the cube up, rotates it counter-clockwise and puts it down" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 582, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/243e5e732ad94c9994a6f0e4e930dedd", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What moves and how?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A bug moves up the grass while the grass is swaying side to side" }, { "id": "B", "text": "A bug sits still on a leaf while the grass is swaying side to side" }, { "id": "C", "text": "A bug moves up the grass while the grass remains still" }, { "id": "D", "text": "A bug moves down the grass while the grass remains still" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 583, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/28ca591b0f1944c9af069e275e229669", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the pills?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Pills are moved inside the box and then back out again" }, { "id": "B", "text": "All pills are put in its box and taken away" }, { "id": "C", "text": "Some pills are put in its box while some are left outside" }, { "id": "D", "text": "One pill strip is put into its box, the rest remain outside" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 584, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/29b76325ae734525b76665a330f36408", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many times was the cube moved to a different cell?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4 times" }, { "id": "B", "text": "3 times" }, { "id": "C", "text": "2 times" }, { "id": "D", "text": "1 time" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 585, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/29f592ee354849b6ad8a28a1347729fd", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What follows the springing movement of item's head?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The leaves fall down" }, { "id": "B", "text": "The tongue sticks out" }, { "id": "C", "text": "The mouth opens and closes" }, { "id": "D", "text": "The pot slides to the left" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 586, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/31d15e2c99bc4f95933eb32d6274fb46", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many flowers does the character move behind the head?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1 flower" }, { "id": "B", "text": "2 flowers" }, { "id": "C", "text": "4 flowers" }, { "id": "D", "text": "3 flowers" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 587, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4cd1702740b04683a8a2b86b4fb745e3", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "In what way does the gray mechanism move?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It moves downwards only" }, { "id": "B", "text": "It stays up without moving down" }, { "id": "C", "text": "It moves sll the way down before going back up" }, { "id": "D", "text": "It moves slightly down, then moves back up" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 588, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4d361e54cb984d22870098140cab2c1f", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many shelves open fully?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3 shelves" }, { "id": "B", "text": "Neither" }, { "id": "C", "text": "4 shelves" }, { "id": "D", "text": "2 shelves" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 589, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4d5248c8de4340bb954301f87567f656", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What parrot overtakes the front parrot in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "In the end, the left parrot in the back overtakes the front parrot" }, { "id": "B", "text": "In the end, all parrots stay in the exact same formation as they were in the beginning" }, { "id": "C", "text": "In the end, all parrots form a single straight line and all stay in the same front line" }, { "id": "D", "text": "In the end, the right parrot in the back overtakes the front parrot" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 590, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4e57f299471c4684b095f30973602e4f", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the item the character holds in the right hand at the end of the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It burns down" }, { "id": "B", "text": "It remains in the character's right hand" }, { "id": "C", "text": "It falls down" }, { "id": "D", "text": "It breaks in half" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 591, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6fdc3b47026d441e9fc2470b97d53d3a", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the correct movement combo?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Robotic body constantly spins clockwise while robotic head is still" }, { "id": "B", "text": "Robotic body moves together with its robotic head" }, { "id": "C", "text": "Robotic head moves in different directions while robotic body is still" }, { "id": "D", "text": "Robotic body moves in different directions while robotic head is still" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 592, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8fce9e78f1434b64a5563c0bebf9c405", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What parts of the body does the robot lift up?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Right arm and right foot" }, { "id": "B", "text": "Left arm and right foot" }, { "id": "C", "text": "Right arm and left foot" }, { "id": "D", "text": "Left arm and right arm" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 593, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/93a4b12a91b349ecb0013a2e232e48bd", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many times does the creature with ears blink?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It blinks twice" }, { "id": "B", "text": "It blinks once" }, { "id": "C", "text": "It doesn't blink" }, { "id": "D", "text": "It blinks four times" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 594, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9b4a2b1c4e7f43c6b4abed0dfd98f7e5", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What parts of the catterpillar remain still?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Head and wings" }, { "id": "B", "text": "Head and body" }, { "id": "C", "text": "The body and green balls inside of it" }, { "id": "D", "text": "Body and wings" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 595, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9d6a933e1e77421594a5e2b9ed049f01", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many times does the character push off the ground?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Three" }, { "id": "B", "text": "Five" }, { "id": "C", "text": "Six" }, { "id": "D", "text": "One" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 596, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2fd7a41706254e6cb2c24467a5ca0370", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the wooden mother bird perform?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The mother bird chirps loudly." }, { "id": "B", "text": "The mother bird spins in a full circle." }, { "id": "C", "text": "The mother bird remains stationary throughout." }, { "id": "D", "text": "The mother bird rotates to feed the chicks alternately." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 597, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/18e68c55a09847178795682142af3d06", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "Which arm does the figure raise before bringing it back down?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Left arm, then right arm" }, { "id": "B", "text": "Raises both arms together" }, { "id": "C", "text": "Left arm" }, { "id": "D", "text": "Right arm" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 598, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/97dc38f3b7374a7cb0ce29b55c8781da", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What final action does the soldier perform after extending and lowering his arms?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He salutes with both hands." }, { "id": "B", "text": "He steps backward while raising one hand." }, { "id": "C", "text": "He sits down and ties his boots." }, { "id": "D", "text": "He crouches and remains still." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 599, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/75a4bb26da7f48819d60c3e79d5c6365", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What specific action is occurring with the car?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The car is moving backward." }, { "id": "B", "text": "The roof of the car is retracting." }, { "id": "C", "text": "The left front door of the car is opening." }, { "id": "D", "text": "The car is changing its color from blue to purple." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 600, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/238eaf4c3a2b425ab9e848d7554d8b06", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "In which direction is the woman consistently moving her arms during the dance?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The arms are moving in an upward clapping motion." }, { "id": "B", "text": "The arms are moving in a circular overhead motion." }, { "id": "C", "text": "The arms are moving in a bodyguard guarding motion." }, { "id": "D", "text": "The arms are moving in a wave-like, flowing motion." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 601, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/45db60a0a3af4f24b141d503512356a0", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the sequence of the character's arm movements?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Starting with the right arm down and resting on her hip, then raising it to look at her palm, and returning to the original position." }, { "id": "B", "text": "Keeping both arms down and then clapping above her head." }, { "id": "C", "text": "Starting with the left arm down, raising both arms above her head, and then lowering both arms." }, { "id": "D", "text": "Starting with the right arm raised, moving it to the side, then lowering it." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 602, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a3ec034e322b40c9a755d164567ab92c", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the metal typewriter during the sequence across different views?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The typewriter emerges from one end, moves along the conveyor belt, and eventually goes into a screening box that closes." }, { "id": "B", "text": "The typewriter stays stationary on the conveyor belt throughout the sequence." }, { "id": "C", "text": "The typewriter moves along the conveyor belt and exits from the opposite side without stopping." }, { "id": "D", "text": "The typewriter disappears mid-way as the conveyor belt retracts." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 603, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2e77b6e0afe04ba29579d8d847525f97", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "In which direction does the character turn her head during the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "She only turns her head to the right." }, { "id": "B", "text": "She turns her head to the left and then back to the original position." }, { "id": "C", "text": "She looks upward and then downward." }, { "id": "D", "text": "She does not move her head at all." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 604, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/82150bc4ae9d4b35a1424a334f5d6c2a", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the creature in the last?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It flies over the wooden gate." }, { "id": "B", "text": "It kicks the bottom of the gate with both legs." }, { "id": "C", "text": "It turns around to look behind it." }, { "id": "D", "text": "Its hand reaches to the other side of the gate." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 605, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3cc219ecd5654c23a84b31b566fdff76", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many tentacles can be observed from each viewpoint consistently throughout the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Four" }, { "id": "B", "text": "Seven" }, { "id": "C", "text": "One" }, { "id": "D", "text": "Six" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 606, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/81fb335478a746c5bc987efebba2bb7b", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What movement does the Australian White Ibis perform?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bird hops in place and does not use its wings." }, { "id": "B", "text": "The bird dives into water as it flaps its wings." }, { "id": "C", "text": "The bird tucks its wings in and rolls over." }, { "id": "D", "text": "The bird lowers its head and flaps its wings while standing." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 607, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7521101dbb714dc991acc78ca1d602a0", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What movement is the man performing with his right hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Raising his hand above his head." }, { "id": "B", "text": "Pressing forward as if to press a button." }, { "id": "C", "text": "Pointing upwards." }, { "id": "D", "text": "Holding something in his fist." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 608, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0737c0d50b6f4c3ea82d70980543b420", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many turquoise dots are visible on the flying saucer?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Three turquoise dots" }, { "id": "B", "text": "Five turquoise dots" }, { "id": "C", "text": "No turquoise dots" }, { "id": "D", "text": "One turquoise dot" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 609, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/97ca1fc2a00d474b8423a9774d8b5d2e", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "In which manner does the vehicle primarily move in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It moves in a zigzag pattern." }, { "id": "B", "text": "It reverses and accelerates rapidly." }, { "id": "C", "text": "It drifts smoothly around corners." }, { "id": "D", "text": "It bounces and turns sharply." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 610, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/26f7364a82f94073b6d13596e93beae7", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What specific movement pattern is the weeping willow tree exhibiting in the video sequence?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It is sliding horizontally across the screen." }, { "id": "B", "text": "Its branches are expanding and contracting." }, { "id": "C", "text": "It is continuously rotating around its own axis." }, { "id": "D", "text": "It is swaying side to side like in a breeze." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 611, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2553271e52a447af8c12318ac8205252", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What maneuver does the fighter jet perform when changing direction during the flight?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The jet performs a barrel roll." }, { "id": "B", "text": "The jet performs a sharp turn to the right without a roll." }, { "id": "C", "text": "The jet performs a loop-de-loop." }, { "id": "D", "text": "The jet rolls to the left and then to the right." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 612, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3bc90f60c71249799653347bbf9742ab", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "Which arm does the character primarily use to make a triumphant motion?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Left arm, bending at the elbow" }, { "id": "B", "text": "Right arm, waving side to side" }, { "id": "C", "text": "Right arm, bending at the elbow" }, { "id": "D", "text": "Left arm, fully extended upward" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 613, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/50e17ba5c2dc4e3d9ed0effc7d2f9fd5", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What subtle action is the elephant performing while walking as observed?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Flapping its ears" }, { "id": "B", "text": "Turning its head left" }, { "id": "C", "text": "Wagging its tail" }, { "id": "D", "text": "Raising its trunk" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 614, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/223d32f8b1754fe0b1f928dec7caddec", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What specific movement does the fish make with its body as it swims?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It moves its body in an up and down wave." }, { "id": "B", "text": "It swims backward with rapid fin movements." }, { "id": "C", "text": "It spins around while staying in place." }, { "id": "D", "text": "It waggles its body from side to side." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 615, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/514bb5b1bbdb41e7b2b537962bac61be", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the dragon-like creature perform after investigating the ground?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Flies upwards." }, { "id": "B", "text": "Moves its head to the right." }, { "id": "C", "text": "Turns its head to the left." }, { "id": "D", "text": "Starts breathing fire." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 616, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/35bf9cca0d664a4193f26a13bb7ba0b5", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What specific action is the tank performing in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The turret is rotating from right to left." }, { "id": "B", "text": "The tank is moving forward." }, { "id": "C", "text": "The tank is firing its cannon." }, { "id": "D", "text": "The turret is rotating from left to right." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 617, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/343763e21e7d4185a739555441315d05", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the man perform immediately after pulling up the switch?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He closes the door of the transformer box." }, { "id": "B", "text": "He talks to someone nearby." }, { "id": "C", "text": "He turns around and walks away." }, { "id": "D", "text": "He removes his helmet." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 618, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/178c59f4468e41c3a15e4263089f9963", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What specific action does the multi-headed siren creature perform?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Jumps upward." }, { "id": "B", "text": "Throws a left jab." }, { "id": "C", "text": "Turns its body completely around." }, { "id": "D", "text": "Throws a right hook." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 619, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/193a97f680644ce3922f3c2fc7fd4f2c", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What sequence of actions does the man perform with his right arm?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Throws an uppercut with the right hand and moves sideways." }, { "id": "B", "text": "Throws a cross with the right hand and ducks." }, { "id": "C", "text": "Throws an uppercut with the left hand and jumps." }, { "id": "D", "text": "Throws a hook with the right hand and then steps back." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 620, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/90ad05c2929d40bd8998ae74b0d5813a", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the sequence of the elephant's actions in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Flapping its ears, then standing still, then breathing." }, { "id": "B", "text": "Walking, then flapping its ears, then turning." }, { "id": "C", "text": "Standing still, flapping its ears, then breathing." }, { "id": "D", "text": "Standing still, breathing, then flapping its ears." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 621, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/25c40880744b43c6be5dc5905c7de046", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary movement of the solar panel?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Sliding across the grass." }, { "id": "B", "text": "Tilting to horizontal." }, { "id": "C", "text": "Spinning on the ground." }, { "id": "D", "text": "Splitting into two panels." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 622, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b3f0e46aec6441168313baf045de4b4d", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What specific action does the white fox robot perform with its left arm at the end of the sequence?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The left arm is gesturing in the air." }, { "id": "B", "text": "The left arm is raised above its head." }, { "id": "C", "text": "The left arm is touching the ground." }, { "id": "D", "text": "The left arm is holding a prop." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 623, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/28ebec88a44443e29fcb8ac1bbe6a4dc", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which direction is the man turning when he raises his right arm in the air?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He remains stationary, not turning at all." }, { "id": "B", "text": "He is turning backward while lifting his right arm." }, { "id": "C", "text": "He is turning to his left side." }, { "id": "D", "text": "He is turning back to the original position, facing front." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 624, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/66db7b22d5084475ab9a969de1dd3580", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action is the man performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Walking slowly forward." }, { "id": "B", "text": "Sighing heavily." }, { "id": "C", "text": "Turning his head rapidly from side to side." }, { "id": "D", "text": "Clapping his hands repeatedly." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 625, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/36e6bb5292dc4f4aa95bab2e2e1a707b", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "During what action does the character wave their hand in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Before walking, the character waves both hands." }, { "id": "B", "text": "While walking, the character waves their right hand." }, { "id": "C", "text": "After stopping, the character waves their left hand." }, { "id": "D", "text": "After jumping, the character waves their right hand." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 626, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/470efc1ca7934e7bb77b97559ab9124b", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What consistent motion does the female fighter perform with her left leg?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "She performs repeated high kicks with her left leg." }, { "id": "B", "text": "She pivots left and right on her left leg." }, { "id": "C", "text": "She turns in a circle on one foot." }, { "id": "D", "text": "She kicks with her right leg." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 627, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b606f795afb747f7ac1dc5234ff7d6ea", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "During the jump, which way does the figure turn its body?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "To the right and then to the left" }, { "id": "B", "text": "Only to the right" }, { "id": "C", "text": "To the left and then back to the front" }, { "id": "D", "text": "Does not turn at all" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 628, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/45013a6fe06549618716a5f8a636ca2e", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "In which direction is the robot turning its head while throwing the punch?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robot is nodding its head." }, { "id": "B", "text": "The robot is turning its head downward." }, { "id": "C", "text": "The robot is turning its head to the left." }, { "id": "D", "text": "The robot is turning its head to the right." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 629, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/14afeda06f604fd68940bb629a4235e9", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "During the boxing sequence, which arm does the man primarily use to punch?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Left arm" }, { "id": "B", "text": "Non-dominant arm" }, { "id": "C", "text": "Both arms equally" }, { "id": "D", "text": "He does not punch at all" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 630, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d0297fa807eb48c0a77e6ab6964da8bf", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "During the breakdance performance, which body part is the woman supporting her weight on when upside down?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Her left hand" }, { "id": "B", "text": "Her head" }, { "id": "C", "text": "Her left elbow" }, { "id": "D", "text": "Both hands" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 631, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e59ea16e52254c6189e3759dfce4dc93", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "In which direction is the pterosaur's mouth facing as it opens during flight?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The mouth is facing forward." }, { "id": "B", "text": "The mouth is facing downward." }, { "id": "C", "text": "The mouth is facing backwards." }, { "id": "D", "text": "The mouth is facing upward." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 632, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/98f52fa4dc3242adbbe6caf3101b64d4", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What consistent action does the lid of the Sony cassette player perform?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The lid opens upwards and locks in place." }, { "id": "B", "text": "The lid swings completely around and then closes." }, { "id": "C", "text": "The lid opens wide and then closes." }, { "id": "D", "text": "The lid opens slightly and remains open." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 633, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0cc299d9026c4ec19f2ce44555e85272", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which action is the model primarily performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The model is aiming the gun at a specific target." }, { "id": "B", "text": "The model is waving the gun in the air." }, { "id": "C", "text": "The model is looking around vigilantly while holding a gun." }, { "id": "D", "text": "The model is running with the gun." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 634, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/11d4334bf42b495e91128a050fe93ba2", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "In which direction does the man raise his hand after stopping?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He points with his right hand." }, { "id": "B", "text": "He waves both hands." }, { "id": "C", "text": "He raises his left hand." }, { "id": "D", "text": "He raises his right hand." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 635, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8f6f0ec635e24d8998a42f798dd97be2", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "From multiple perspectives, what action is the soldier consistently performing in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Waving" }, { "id": "B", "text": "Sitting down" }, { "id": "C", "text": "Saluting" }, { "id": "D", "text": "Marching" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 636, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dce5a2f431444496bffe27666606d7f3", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the metal structure perform as seen?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The tower turns 360 degrees but the propeller remains still." }, { "id": "B", "text": "The skis detach from the structure and reattach." }, { "id": "C", "text": "The wings retract and then the propeller spins." }, { "id": "D", "text": "The skis move independently from the rest of the structure." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 637, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7f69215ba47e40bf8c8e6c18d8397d36", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary movement direction of the machine gun nozzle?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Tilting diagonally" }, { "id": "B", "text": "Moving forward and backward" }, { "id": "C", "text": "Turning left and right" }, { "id": "D", "text": "Spinning in a full circle continuously" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 638, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5f1a279ba58748e488faacb4e4352558", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "During the dance sequence, which arm does the dancer raise above shoulder level?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Both arms" }, { "id": "B", "text": "Left arm" }, { "id": "C", "text": "Right arm" }, { "id": "D", "text": "Both not" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 639, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dd506219f2fa4d7fbcfde25a88340dd1", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What specific action is the dragon performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The dragon is breathing fire." }, { "id": "B", "text": "The dragon is gently moving its wings and tail." }, { "id": "C", "text": "The dragon is flying towards the viewer." }, { "id": "D", "text": "The dragon is lying down and sleeping." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 640, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dba0c8ab83174c87938844575fded9e6", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What sequence of movements does the pink flamingo perform in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The flamingo lowers its head while making a sound." }, { "id": "B", "text": "The flamingo spins in a circle and then jumps." }, { "id": "C", "text": "The flamingo moves its head side to side while standing still." }, { "id": "D", "text": "The flamingo raises its right leg followed by its left leg." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 641, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7dde986b68834de6b5a9deff6819d3f1", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What direction is the man turning his body while playing the guitar?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He rotates his body in a complete circle." }, { "id": "B", "text": "He turns his body from right to left." }, { "id": "C", "text": "He remains stationary without turning." }, { "id": "D", "text": "He turns his body from left to right." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 642, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0c83e28627b247e194212d9fbe119ad2", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "From which direction does the man pull out his imaginary object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "From behind his back, using his left hand." }, { "id": "B", "text": "From above his head, using both hands." }, { "id": "C", "text": "From behind his back, using his right hand." }, { "id": "D", "text": "From his front pocket, using his left hand." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 643, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0b83526973a447d1a162d3d30015277d", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What direction is the moose tilting its body while walking?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Left" }, { "id": "B", "text": "Forward" }, { "id": "C", "text": "Standing still" }, { "id": "D", "text": "Backward" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 644, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d53ce50912db4d078c771da0a78317bb", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action is performed by the green-sleeved arms immediately after the gun is fired?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Throwing the gun." }, { "id": "B", "text": "Pointing the gun upwards." }, { "id": "C", "text": "Dropping the gun." }, { "id": "D", "text": "Reloading the gun." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 645, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8cffdc73a21d452581b6887e41872733", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What consistent motion is observed in the coffee grinder?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The lid is lifting off the base with each turn." }, { "id": "B", "text": "The grinder is vibrating up and down." }, { "id": "C", "text": "The handle is spinning around the grinder lid." }, { "id": "D", "text": "The base is rotating while the lid remains stationary." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 646, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5eb38ec707044307bb5c512aea1fa91c", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "During the dance, which foot does the man lift off the ground first?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Both feet simultaneously" }, { "id": "B", "text": "Right foot" }, { "id": "C", "text": "Hops on one foot" }, { "id": "D", "text": "Left foot" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 647, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2ce9e484a7d7419ba9a8ef3c64c95298", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary movement of the axe in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The axe is spinning in the air." }, { "id": "B", "text": "The axe is rotating horizontally on the ground." }, { "id": "C", "text": "The axe is stationary above the platform." }, { "id": "D", "text": "The axe is chopping wood on the platform." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 648, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0dd0a4ae5f3f4fd8a0ff75fc7f9c1cb2", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What sequence of actions does the mouth perform?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The mouth closes fully and stays closed." }, { "id": "B", "text": "The mouth begins closed and does not open at all." }, { "id": "C", "text": "The mouth opens wide, showing sharp teeth, and then closes." }, { "id": "D", "text": "The mouth opens slightly, remains open, and then closes." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 649, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8d862ec1bcc9474eb52da4f3cb3e928f", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which arm position does the girl maintain throughout the run?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Arms stretched out behind her body." }, { "id": "B", "text": "Arms bent with hands on her hips." }, { "id": "C", "text": "Arms crossed in front of her chest." }, { "id": "D", "text": "Arms swinging back and forth." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 650, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7a5b5f516881457a941937fc667fc61d", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "In which direction does the shark's tail primarily move?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Stays still" }, { "id": "B", "text": "Backwards" }, { "id": "C", "text": "Forward and backward" }, { "id": "D", "text": "Side to side" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 651, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cf76d8c5375b4d168b879a6e0bfe5433", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the correct order of operations?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Loading the pistol, firing two shots, then unloading." }, { "id": "B", "text": "Unloading, loading, then firing three shots." }, { "id": "C", "text": "Loading, firing three shots, then unloading." }, { "id": "D", "text": "Unloading the pistol, firing two shots, then loading." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 652, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d52bd1b900054b5e8dc88c30caf0e9f6", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the cartoon character's body position at the end of the movement sequence?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character raises both arms." }, { "id": "B", "text": "The character spins in a full circle." }, { "id": "C", "text": "The character stands still with arms crossed." }, { "id": "D", "text": "The character lowers its body downwards." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 653, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/052210dc5b4449a2b9d7fb411ad1d17f", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many teeth of the blue gear touch the brown gear?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Two" }, { "id": "B", "text": "One" }, { "id": "C", "text": "Three" }, { "id": "D", "text": "Four" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 654, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0cbaea69154f44a691f427452529f485", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many wing flaps does the mysterious creature do?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Four flaps" }, { "id": "B", "text": "One flap" }, { "id": "C", "text": "Five flaps" }, { "id": "D", "text": "nan" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 655, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1c582e96fc4c4d67aef425a79c6bb146", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many fridge door shelves are stuffed with items?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Three shelves" }, { "id": "B", "text": "nan" }, { "id": "C", "text": "Five shelves" }, { "id": "D", "text": "All six shelves" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 656, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/25ff1591e7204847b7592f554b5f3487", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Where does the man move his arms first?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Arms to the sides" }, { "id": "B", "text": "Arms upward" }, { "id": "C", "text": "Arms downward" }, { "id": "D", "text": "Arms in front of the body" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 657, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/301512a7d7b148fc97d901dddf6f6e85", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What movement does the boy make with his hands?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Waves his right hand straight" }, { "id": "B", "text": "Calls on the phone" }, { "id": "C", "text": "He raises his right hand, then his left hand" }, { "id": "D", "text": "Raises his left hand up" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 658, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3293fa65589240018095232595bc3ba6", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What lies in the opening cabinets?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Magazines" }, { "id": "B", "text": "Candles" }, { "id": "C", "text": "Nothing" }, { "id": "D", "text": "Books" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 659, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3528682d376944b0af275c41a84b2a42", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many times does a boy jump in a dance?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Four times" }, { "id": "B", "text": "Twice" }, { "id": "C", "text": "He doesn't jump" }, { "id": "D", "text": "One time" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 660, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/422afe60d9704ab982fa43e5b33f87e3", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How does the shark move his sword?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Brings the sword up, brings it back down, then tilts it to the right" }, { "id": "B", "text": "Brings the sword up, tilts it to the right, then brings it back down" }, { "id": "C", "text": "Brings the sword up, then immediately brings it back down" }, { "id": "D", "text": "Brings the sword up, tilts it to the left, then brings it back down" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 661, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/46905ce1a93243c1860e3cf0cc741082", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many gold coins move around the character?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Six" }, { "id": "B", "text": "Seven" }, { "id": "C", "text": "Five" }, { "id": "D", "text": "Nine" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 662, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4876d1a00a8d481fa714850d9e0702a1", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color is the bottom outter side the waffle maker after it flips?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Blue" }, { "id": "B", "text": "Yellow" }, { "id": "C", "text": "White" }, { "id": "D", "text": "Red" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 663, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4956e12fadbb4035b8f1d4769d271017", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the lightsaber handle?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Disintegrates into elements" }, { "id": "B", "text": "Hung in the air" }, { "id": "C", "text": "Rotates and changes the color of the light beam" }, { "id": "D", "text": "Luke Skywalker takes it in his hand" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 664, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4f0fdfd7e886453ca8e5e990e266be49", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Where are the creature's hands in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Up in the air" }, { "id": "B", "text": "In front of the body" }, { "id": "C", "text": "Straight down" }, { "id": "D", "text": "Behind the back" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 665, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/50727db65087429dbe853c30650f1bfe", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Which direction does the wooden chest opens up? " ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Upward" }, { "id": "B", "text": "It does not open" }, { "id": "C", "text": "To the left" }, { "id": "D", "text": "To the right" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 666, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5849c9dc11134b079fadde5f655e9c6e", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color is the spinning elements that first hits the green one?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Orange" }, { "id": "B", "text": "Blue" }, { "id": "C", "text": "White" }, { "id": "D", "text": "Red" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 667, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/64646bb827c44bc4a997d1ca5bdab5de", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many hands are moving in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Five" }, { "id": "B", "text": "Four" }, { "id": "C", "text": "Two" }, { "id": "D", "text": "Six" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 668, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/74014796a7d54ff6b45dbba041bba8e3", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "What number is not shown on the screen?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "0" }, { "id": "B", "text": "17" }, { "id": "C", "text": "1" }, { "id": "D", "text": "31" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 669, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/801f5d27314740bcbc09c0f30440a946", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many full spins do the red wheels make?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Four" }, { "id": "B", "text": "Zero" }, { "id": "C", "text": "One" }, { "id": "D", "text": "Five" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 670, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8055eed2c486430f85c91ee1f8db7738", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the robot's right hand turn into?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Into the leg" }, { "id": "B", "text": "Into the tail" }, { "id": "C", "text": "Into the sword" }, { "id": "D", "text": "Into the wing" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 671, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/806fa5ccfae24c7baabc157a177a41e7", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does a transformer car turn into?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Space robot" }, { "id": "B", "text": "Robot boy in boots" }, { "id": "C", "text": "Robot woman in heels" }, { "id": "D", "text": "Robot man with backpack" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 672, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8674f2ea83be48e9a70fb6bbc30684c7", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What arm does the characters bends in the elbow and lifts up first?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Left arm" }, { "id": "B", "text": "Both arm" }, { "id": "C", "text": "The arms are not moving" }, { "id": "D", "text": "Right arm" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 673, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8685d3d1c08d4ff8a6a5e1d04138c59a", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What section of the insect's body remains still?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The head" }, { "id": "B", "text": "The legs" }, { "id": "C", "text": "The middle section" }, { "id": "D", "text": "The back section" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 674, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/883c1424430f4f9cbe86ee149bf7e20e", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Where does the humanoid lands when it falls?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "On the right side" }, { "id": "B", "text": "On the left side" }, { "id": "C", "text": "On its knees" }, { "id": "D", "text": "On the back" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 675, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/890003100b124e96972a7131408e77f6", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "How did the man's imaginary phone conversation end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He started laughing" }, { "id": "B", "text": "He turned and walked away" }, { "id": "C", "text": "He threw his imaginary phone away" }, { "id": "D", "text": "He didn't pick up the imaginary phone" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 676, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9173c228ee5d452b9618b2b21f32cf45", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does a Japanese warrior do?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He spins around as he throws his spear" }, { "id": "B", "text": "He stands and looks straight ahead" }, { "id": "C", "text": "He waves a trident in a circle" }, { "id": "D", "text": "Throws away the trident and takes the sword" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 677, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a3489678d43a44a194f10f7b2cb929ce", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What's inside the chest?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Scull" }, { "id": "B", "text": "Dead body" }, { "id": "C", "text": "Gold and diamonds" }, { "id": "D", "text": "A tongue" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 678, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a6af7b6fece84dc0ad5bd15afbd74ee3", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many steps back does a person take while dancing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Three" }, { "id": "B", "text": "One" }, { "id": "C", "text": "Four" }, { "id": "D", "text": "Two" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 679, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a8cdd6c2e936484aaf67e1786c8320d9", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many times does a pistol magazine change?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Two" }, { "id": "B", "text": "Three" }, { "id": "C", "text": "One" }, { "id": "D", "text": "nan" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 680, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/acafbc9831894ab0a291f1c6e05584b2", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "Who starts playing the piano?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Little girl" }, { "id": "B", "text": "Ghost" }, { "id": "C", "text": "Man in a suit" }, { "id": "D", "text": "Nobody" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 681, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/adbce47ba1014878aaa356b825ab8eaa", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the man in the pink T-shirt doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Falls down" }, { "id": "B", "text": "Martial art capoeira" }, { "id": "C", "text": "Moves hips left to right" }, { "id": "D", "text": "Stands in a fighting position" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 682, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ae6f8ca097c745b7873768c93e9449b2", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many balls of each color?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Six black, three red and three green balls" }, { "id": "B", "text": "Six red, three white and three black balls" }, { "id": "C", "text": "Six purple, three black and three red balls" }, { "id": "D", "text": "Six white, three black and three red balls" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 683, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/af8a3952faeb49779d4456c8f06c1d48", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Where is the yellow disk flying?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Nowhere, frozen in the air" }, { "id": "B", "text": "Falls to the ground" }, { "id": "C", "text": "Flies right out of the frame" }, { "id": "D", "text": "Into the hand of the leader" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 684, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b1e0a8c6cce2413bac4a4baf891db188", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Who is the boy fighting with?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Nobody" }, { "id": "B", "text": "With a gang of bandits" }, { "id": "C", "text": "Boy is dancing" }, { "id": "D", "text": "With another boy" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 685, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b26963a2f54c435cbfd02468c8c8f305", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many steps does the first robot from the left take?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It jumps" }, { "id": "B", "text": "Zero" }, { "id": "C", "text": "It falls" }, { "id": "D", "text": "Four" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 686, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b8da7e393132420db74e1dc028782b41", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many robots are standing motionless near the tank?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Two" }, { "id": "B", "text": "Three" }, { "id": "C", "text": "All move" }, { "id": "D", "text": "One" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 687, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ba0869cea1944722825abf4d18a6ae41", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color of stone is the tree's favorite?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "All" }, { "id": "B", "text": "Pink" }, { "id": "C", "text": "nan" }, { "id": "D", "text": "Grey" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 688, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/bc76f476f69e4036ad4dae618c414be5", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What drawer opens further?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Top one" }, { "id": "B", "text": "Bottom one" }, { "id": "C", "text": "Both open at same length" }, { "id": "D", "text": "Neither drawer opens" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 689, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/bdd4a5f095f944019f79e94ea928bedf", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the ball while the propeller is spinning" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Standing still" }, { "id": "B", "text": "It's going down" }, { "id": "C", "text": "Lifting up" }, { "id": "D", "text": "Down and up" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 690, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/be77fe5d25b54dde81c9a16bf41554ea", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many claps does a person make while dancing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "One" }, { "id": "B", "text": "Three" }, { "id": "C", "text": "Four" }, { "id": "D", "text": "Seven" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 691, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c021000ff8e747649283a7b2f199b89b", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the boy doing while sitting on the table?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He raises his hands while his legs are still" }, { "id": "B", "text": "He raises one hands up while his legs are moving" }, { "id": "C", "text": "He claps his hands while his legs are still" }, { "id": "D", "text": "He claps his hands while moving his legs" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 692, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c321734d580942f89090989b73e68c17", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many times does the robot stomp his right foot?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Six times" }, { "id": "B", "text": "Four times" }, { "id": "C", "text": "Once" }, { "id": "D", "text": "Nine times" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 693, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c4b98cd7b60d43ef8529a662eb7c73a1", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What does the robot do in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It suddenly stops after running" }, { "id": "B", "text": "It keeps running same as before" }, { "id": "C", "text": "It turns around" }, { "id": "D", "text": "It starts running backwards" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 694, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c5d8cf1eb04a4ad3b0a9ed6262984270", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What arm and leg does the guy have in front at the beginning of his dance?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Left arm, right leg" }, { "id": "B", "text": "Right arm, left leg" }, { "id": "C", "text": "Right arm, right leg" }, { "id": "D", "text": "Left arm, left leg" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 695, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c99c65e95a12460185b82b026f5025ed", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How are the guy's legs positioned when he performs a hand stand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Both legs slightly bent, left leg is above the right leg" }, { "id": "B", "text": "Left leg is fully straight up, right leg is bent underneath it" }, { "id": "C", "text": "Legs are straight up, both at the same level" }, { "id": "D", "text": "Both legs slightly bent, right leg above the left leg" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 696, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ca39f08bcc03446c8b973187201878fc", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "Who is the first man to attack another?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Man in a green jacket" }, { "id": "B", "text": "Man in a purple vest" }, { "id": "C", "text": "No one attacks, they hug" }, { "id": "D", "text": "Man in a red cap" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 697, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cd5f0d68b94a4afc8a0513a0f2680914", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Where are the man's hands when he falls to the ground?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Along the body" }, { "id": "B", "text": "In the air" }, { "id": "C", "text": "On the head" }, { "id": "D", "text": "On the belly" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 698, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d0432c790aba4e3b9b6a9477f23c64ff", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "Who stops the peg-top at the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Human hand" }, { "id": "B", "text": "Robot hand" }, { "id": "C", "text": "It stops by itself" }, { "id": "D", "text": "Dog paw" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 699, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d22a4eb8788d4af887061ce64746c8d9", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the lumberjack do in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Bends his knee" }, { "id": "B", "text": "Swings his arm" }, { "id": "C", "text": "Streches his leg" }, { "id": "D", "text": "Stretches his lower back" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 700, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d58a41fc723e4d8a847754f938f5e5b3", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Where does the excavator bucket turn?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Right" }, { "id": "B", "text": "Up" }, { "id": "C", "text": "Left" }, { "id": "D", "text": "Down" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 701, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d7421717d5674bafb071c6b3d10a7c70", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the craftsman pick up with his left hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Axe" }, { "id": "B", "text": "Cloth" }, { "id": "C", "text": "Knife" }, { "id": "D", "text": "Nothing" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 702, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/db820a4726934525b1c9e3108950f15d", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Where are the toy ducks located in the room?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "In the bath tub and on the floor" }, { "id": "B", "text": "In the bath tub and on the shelf" }, { "id": "C", "text": "In the bath tub and next to the cactus" }, { "id": "D", "text": "On the shelf and on the floor" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 703, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dd4c573c3dea40c692b866498c83fe2d", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Name the drawer that opens and closes?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Top drawer with a red handle" }, { "id": "B", "text": "Top drawer with a yellow handle" }, { "id": "C", "text": "Middle drawer with a yellow handle" }, { "id": "D", "text": "Bottom drawer with a purple handle" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 704, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e47d442b22d64fbd9a3b7a539fc47987", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "In what order does the mammoth do the actions?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Mammoth picks up food, turns its head to the left, then wiggles its tail" }, { "id": "B", "text": "Mammoth picks up food, wiggles its tail, then turns its head to the left" }, { "id": "C", "text": "Mammoth wiggles its tail, picks up food, then turns its head to the left" }, { "id": "D", "text": "Mammoth turns its head to the left, then wiggles its tail as he picks up food" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 705, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e6659321c9924489a2c48197591ec888", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Where is the lock when the chest is opened?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "On the right side of the chest" }, { "id": "B", "text": "Inside the chest" }, { "id": "C", "text": "On the left side of the chest" }, { "id": "D", "text": "Out of the picture" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 706, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e83d0414ba4b4c03ab6973ba2a8cea6e", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What position does Siren Head keep its arms and head while screaming?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Arms wide open, head tilted back" }, { "id": "B", "text": "Arms atraight down, head tilted forward" }, { "id": "C", "text": "Arms extended forward, head tilted back" }, { "id": "D", "text": "Arms straight up, head tilted back" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 707, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ef1e0c12fbae40bd8db03b9ca16951fc", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many sharks wiggle their tails?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Six sharks" }, { "id": "B", "text": "Two sharks" }, { "id": "C", "text": "Three sharks" }, { "id": "D", "text": "Five sharks" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 708, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f0ec9e67eab2418ea39c1adec6bb66d8", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the knight in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He falls to the ground" }, { "id": "B", "text": "He stands on one knee with his sword up" }, { "id": "C", "text": "He stands in a defensive position with his sword up" }, { "id": "D", "text": "He stands in a defensive position with his sword down" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 709, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f0f42e02cee34bb1b7d508dd253043dc", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many items remain in the box?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "One" }, { "id": "B", "text": "Three" }, { "id": "C", "text": "Two" }, { "id": "D", "text": "Four" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 710, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f2f12f0eac3c41e5b9c250d8d202749c", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Where does the lock lands?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Behind the wooden chest" }, { "id": "B", "text": "In front of the wooden chest" }, { "id": "C", "text": "To the left of the wooden chest" }, { "id": "D", "text": "To the right of the wooden chest" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 711, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f73f09fba1b040b1a35ce31d0f5f81e7", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What's inside the oven?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Baked potatoes" }, { "id": "B", "text": "Casserole" }, { "id": "C", "text": "Bread" }, { "id": "D", "text": "Cake" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 712, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/fe0101e6228e44b3a18c5a6b37f9a925", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What actions does the person in a blue costume do?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The person raises their hands while standing still" }, { "id": "B", "text": "The person stands still" }, { "id": "C", "text": "The person stands still, then crouches in the end" }, { "id": "D", "text": "The person starts dancing" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 713, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0ebbfc65a98c4d51b53f75cfc0effed3", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What's inside the chest?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A pearl necklace, gold bars and two gems" }, { "id": "B", "text": "Gold coins, a pearl necklace and one red gem" }, { "id": "C", "text": "Gold coins and pearls" }, { "id": "D", "text": "Gold coins, a pearl necklace and three gems" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 714, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2b0209b80fc14faf8d9a500b2457be13", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many burnt cigarettes are there?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Nine burnt cigarrets" }, { "id": "B", "text": "Zero burnt cigarrets" }, { "id": "C", "text": "Three burnt cigarrets" }, { "id": "D", "text": "Two burnt cigarrets" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 715, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2f9230aa81784b72aff40e48f1d4a1f5", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Where does the moving ball go in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It returns on top of the table" }, { "id": "B", "text": "It hides behind the black ball" }, { "id": "C", "text": "It keeps circling around the black ball" }, { "id": "D", "text": "It falls down the gap" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 716, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2fbf5c057e074b8f87d416afe6405b19", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What movements does the soldier do?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Soldier is turning right, then turning left" }, { "id": "B", "text": "Soldier is turning left, then turning right" }, { "id": "C", "text": "Soldier is standing still" }, { "id": "D", "text": "Soldier is turning left, then turning around" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 717, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3cdb65939d2f410caf3c074ab44f2b24", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How does the creature move?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It dances, throwing its left hand with the red flower up in the end" }, { "id": "B", "text": "It dances, throwing its right hand with the blue flower up in the end" }, { "id": "C", "text": "It dances, throwing both hands up in the end" }, { "id": "D", "text": "It dances, throwing its right hand with the red flower up in the end" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 718, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3d639bd4e5114b54bf760f8f5f690c07", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How does the shark move?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The shark moves its body to the right, then comes back to its initial position" }, { "id": "B", "text": "The shark moves its body to the left, then spins around" }, { "id": "C", "text": "The shark moves its body to the right, then spins around" }, { "id": "D", "text": "The shark moves its body to the left, then wiggles its tail to the right" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 719, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/432d012f1c5b4f0f80d56195183b3508", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the airplane do?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Opens the hatch" }, { "id": "B", "text": "Spreads its wings" }, { "id": "C", "text": "Folds its wings" }, { "id": "D", "text": "Remains still" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 720, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4b72de1a9e4f49c99e3adcef18040718", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Describe the action each girl does?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Girl in a purple dress bumps her fists, and the girl in a skirt bounces." }, { "id": "B", "text": "Girl in a purple dress bounces on her legs, and the girl in a skirt bumps her fists." }, { "id": "C", "text": "Girl in a purple dress bumps her fists, and the girl in a skirt bows." }, { "id": "D", "text": "Girl in a purple dress bumps her fists, and the girl in a skirt turns around." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 721, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4c035231afe64ab99a6d51d333e3b071", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Where does the microphone land?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "On the flloor" }, { "id": "B", "text": "In the right hand" }, { "id": "C", "text": "In the left hand" }, { "id": "D", "text": "The microphone disappears" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 722, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ffc7242475c5434595456e8a2d54eec7", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many figurines drop their guns?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Two" }, { "id": "B", "text": "NaN" }, { "id": "C", "text": "Four" }, { "id": "D", "text": "One" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 723, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/50f988fb766b400c85ddfb48cfbf0caf", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What unique motion does the puppet perform with its arms while dancing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Keeps its arms straight down." }, { "id": "B", "text": "Spins its arms in circles." }, { "id": "C", "text": "Crosses its arms in front while moving rhythmically." }, { "id": "D", "text": "Waves its arms above its head." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 724, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/075b922d9aae47d6b79d184a9e246946", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color are the gloves worn by the running character featured in these multi-view images?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "White" }, { "id": "B", "text": "Black" }, { "id": "C", "text": "Red" }, { "id": "D", "text": "Blue" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 725, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/69bc5825cda748cf8d19f20c3f48b0a9", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "Which arm does the soldier raise first?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Neither arm is raised" }, { "id": "B", "text": "Both arms simultaneously" }, { "id": "C", "text": "Right arm" }, { "id": "D", "text": "Left arm" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 726, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/778010c051564786ab89f736720c062f", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color are the tubes surrounding the human heart in the images?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "White" }, { "id": "B", "text": "Blue" }, { "id": "C", "text": "Beige" }, { "id": "D", "text": "Red" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 727, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1924052361fb4bd49b714d296a34d2ab", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What movement does the dinosaur perform with its tail while moving backward?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It raises its tail upward continuously." }, { "id": "B", "text": "It swirls its tail in circles." }, { "id": "C", "text": "It waves its tail from side to side." }, { "id": "D", "text": "It keeps its tail perfectly still." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 728, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/74e7cee7ed044439aca9241b0739156e", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the brown monkey raise while coming back to its original position?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Its left leg" }, { "id": "B", "text": "Its right arm" }, { "id": "C", "text": "Its left arm" }, { "id": "D", "text": "Its right leg" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 729, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/26a155c39ea74f9b9f74c641a7a431d9", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the man in the robot suit after he stands in a defensive pose?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He removes his helmet and waves." }, { "id": "B", "text": "He begins to convulse and falls to his knees." }, { "id": "C", "text": "He throws a punch and remains standing." }, { "id": "D", "text": "He slowly backs away from the camera." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 730, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b37e5d4833044ba597b8a5af41b540ae", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many times does the gorilla punch upwards before raising its arms?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Once" }, { "id": "B", "text": "None" }, { "id": "C", "text": "Three times" }, { "id": "D", "text": "Twice" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 731, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4a6ced2eec1b414fb549d6d23ee7bed4", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which arm does the character use to hold the rifle steady while reloading?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Both arms" }, { "id": "B", "text": "Left arm" }, { "id": "C", "text": "Neither arm" }, { "id": "D", "text": "Right arm" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 732, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1069ef323b2640a7a72876025cbe3311", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "When swinging her swords, in which hand does the female warrior hold the larger sword?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Both hands equally" }, { "id": "B", "text": "Left hand" }, { "id": "C", "text": "Right hand" }, { "id": "D", "text": "Right hand first, then left hand" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 733, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/52d8cdb4279744ce8be4ef34ff79524b", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color is the ping pong paddle when it hits the ball for the second time?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Red" }, { "id": "B", "text": "Yellow" }, { "id": "C", "text": "Green" }, { "id": "D", "text": "Pink" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 734, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/670791315b634cb0b1f59cbfe7cff3e4", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the tip of the cat's tail?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Brown" }, { "id": "B", "text": "Black" }, { "id": "C", "text": "White" }, { "id": "D", "text": "Grey" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 735, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/18c8d71fcac54c5d9dc0dbb5e05db5c8", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "While sitting, which direction does the cartoon puppy mostly turn its head?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Straight ahead" }, { "id": "B", "text": "To the left" }, { "id": "C", "text": "Upwards" }, { "id": "D", "text": "To the right" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 736, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3bc6b5dd5c874003a2005f366cb9973d", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "During the salsa dance, where does Mickey Mouse hold his hands?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Raised above his head." }, { "id": "B", "text": "Behind his back with fingers interlocked." }, { "id": "C", "text": "In front of his chest with elbows bent." }, { "id": "D", "text": "On his hips while swaying." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 737, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/26d58a4848fb44f598950c6b637bb26f", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color is the creature carrying bags?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Gray with red bags" }, { "id": "B", "text": "Pale beige with brown bags" }, { "id": "C", "text": "Pure white with black bags" }, { "id": "D", "text": "Dark brown with green bags" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 738, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b074cb583c5e4011bbe1d6c4019d0efe", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the stormtrooper perform with the spear during the gesture of victory?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He throws the spear into the air." }, { "id": "B", "text": "He raises the spear while bending backwards." }, { "id": "C", "text": "He drops the spear to the ground." }, { "id": "D", "text": "He holds the spear close to his chest." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 739, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/497d37dc4b97494b8a32966e49cfaf96", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many legs does the black robotic spider have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Four" }, { "id": "B", "text": "Six" }, { "id": "C", "text": "Ten" }, { "id": "D", "text": "Eight" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 740, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/20e59f9379d846e6bcb2fbba8d609aea", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What movement does the upper part of the skeleton perform with its arm?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The arm remains stationary, with no movement." }, { "id": "B", "text": "The arm turns palm up." }, { "id": "C", "text": "The arm swings outward, away from the body." }, { "id": "D", "text": "The arm bends at the elbow and lifts toward the head." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 741, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a4dcb07cf3e64106a58a128e3a199144", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "From the turret's perspective, in which direction does the green machine gun turret shoot first?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Upwards" }, { "id": "B", "text": "Downwards" }, { "id": "C", "text": "To the left" }, { "id": "D", "text": "To the right" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 742, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b7d56a9f12ca4b41ada6731cf14cd0b5", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What feature is located at the back of the robot and is consistently visible across different views?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Floppy grey ears." }, { "id": "B", "text": "A glowing screen on its face." }, { "id": "C", "text": "A long metallic tail." }, { "id": "D", "text": "A rod sticking out from the head." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 743, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4798d8c87a0e4ad8835217fe93ddf67b", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the elk perform after letting out a scream?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Jumps forward" }, { "id": "B", "text": "Bows its head down" }, { "id": "C", "text": "Raises its antlers high" }, { "id": "D", "text": "Lies down" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 744, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a5f422cff0f24adc96ccf061f12c6dc8", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "During the Macarena dance, what is the final position of the character's arms?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Straight down by the sides" }, { "id": "B", "text": "Extended out to the sides" }, { "id": "C", "text": "Raised above the head" }, { "id": "D", "text": "Crossed over the chest" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 745, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ab845df23bf6a0c34e7eb8db2a3fa0558526fb438be655247e8798740544b1e0", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the pig's tail doing in this sequence of images?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The pig's tail is wagging." }, { "id": "B", "text": "The pig's tail is curling up." }, { "id": "C", "text": "The pig's tail is twitching." }, { "id": "D", "text": "The pig's tail is staying straight." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 746, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/359b3ca7dd09cf6b59b0a3624e5bac4f488b6e4ac72221f6153ba7932150f798", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "Does the object have any visible hinges or latches?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Yes, it has a hinge on the top." }, { "id": "B", "text": "Yes, it has a latch on the front." }, { "id": "C", "text": "No" }, { "id": "D", "text": "Yes, it has two visible hinges on the side." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 747, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/18701fd2ff9b3ccae49d43b210ed120f45e98f7d41553297046694aed587c381", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "Describe the character's outfit and the action they are performing. From the front view, what graphic is visible on their shirt? From the side view, what is the color of their hair? From the back view, can you describe the hairstyle?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a light purple sleeveless top, pink shorts, white socks, and orange shoes. They are walking. A rabbit graphic is visible on the front of their shirt. Their hair is light teal/blue. From behind, their hair appears in a short bob with a small upward point at the back." }, { "id": "B", "text": "The character is wearing a purple dress and orange shoes. They are skipping. A bird graphic is visible on the front of their dress. Their hair is green. From behind, their hair is braided." }, { "id": "C", "text": "The character is wearing a pink sleeveless top, purple shorts, and orange shoes. They are running. A cat graphic is visible on the front of their shirt. Their hair is light blue. From behind, their hair is in a ponytail." }, { "id": "D", "text": "The character is wearing a light purple top and pink pants. They are dancing. A flower graphic is visible on the front of their top. Their hair is dark blue. From behind, their hair is in a bun." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 748, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d16b5d080bb9406183e8488ca4265690", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "Does the object have any visible limbs that are not fully connected to the main body? " ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Yes, it has two legs that are not connected at the knees." }, { "id": "B", "text": "Yes, it has a tail that is not connected at the base." }, { "id": "C", "text": "Yes, it has two arms that are not connected at the elbows." }, { "id": "D", "text": "No, all visible limbs are fully connected to the main body." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 749, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/21eaa407bf164cdea21135a1d1b2b8f3", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color are the stripes on the character's right shoulder?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "White" }, { "id": "B", "text": "Blue" }, { "id": "C", "text": "Black" }, { "id": "D", "text": "Red" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 750, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/bba8f7d50c943a9296679d7af05a5d6137685631127c5cfebc5bbf8d8181a412", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color scheme of the bottom of the skateboard?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It has blue and red stripes." }, { "id": "B", "text": "It is completely solid red." }, { "id": "C", "text": "It is completely solid blue." }, { "id": "D", "text": "It has black and white stripes." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 751, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4941b660a4ed4711b55fc4293abbeca0", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many knobs does the oven have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Four" }, { "id": "B", "text": "Three" }, { "id": "C", "text": "Two" }, { "id": "D", "text": "One" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 752, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/318feb89dae34b0ee72d89ca2b9e5784cb68120dbf29ea535621e7a7099ad547", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Is the object holding the pencil in its left hand or right hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Neither" }, { "id": "B", "text": "Both" }, { "id": "C", "text": "Left" }, { "id": "D", "text": "Right" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 753, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d6aff073561a4a9f84c9d12472f17002", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color is the tie the character is wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Blue" }, { "id": "B", "text": "Red with yellow lettering" }, { "id": "C", "text": "Yellow with red lettering" }, { "id": "D", "text": "Green" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 754, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c1010896becb9c27a7f9e712ad5294f6860c20b52eea65e33467e75b1c346149", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "Is there a tail on the object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Yes, but it's very small" }, { "id": "B", "text": "No" }, { "id": "C", "text": "Yes, on the right side" }, { "id": "D", "text": "Yes, on the left side" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 755, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1e88e2e6a745e053d22166840dfa08dbab75a2927a9f03ab21587befb386f02a", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the object writing on?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A book" }, { "id": "B", "text": "A tablet" }, { "id": "C", "text": "A piece of paper" }, { "id": "D", "text": "A clipboard" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 756, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9004dce7ce95f0763270b9ded2f1f4762808f2ec64cebe9ea4f60939f6bb6229", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the purse that the person is carrying?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Burgundy" }, { "id": "B", "text": "Black" }, { "id": "C", "text": "Red" }, { "id": "D", "text": "Brown" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 757, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0f23c4bd2275495343cd20f33a63028aed905ef1233ef6b18e9018524250d191", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many small grey screws are visible on the object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "2" }, { "id": "C", "text": "1" }, { "id": "D", "text": "3" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 758, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c571211ca2cc4f28b45ba18b4d628131", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "Does the creature have more than one antenna?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Yes, three" }, { "id": "B", "text": "No" }, { "id": "C", "text": "Yes, two" }, { "id": "D", "text": "Yes, four" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 759, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/690d3df35150f39bc1137a6f42b8dc455a70cc61bbeb54735060916f8506af28", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the person doing with their right hand in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "They are reaching down to touch something on the ground." }, { "id": "B", "text": "They are adjusting their shorts." }, { "id": "C", "text": "They are holding a water bottle." }, { "id": "D", "text": "They are pointing at something in the distance." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 760, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4cd8e23a556ee98e960f0754f7ae480127c656ceb349c89c83b2c482a41bb237", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which limb does the figure bend the most during the sequence?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Left leg" }, { "id": "B", "text": "Left arm" }, { "id": "C", "text": "Right leg" }, { "id": "D", "text": "Right arm" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 761, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f04417c2180443e18c4059d8e8102ae2", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "What is the total number of large white wings on the object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Two" }, { "id": "B", "text": "Four" }, { "id": "C", "text": "Six" }, { "id": "D", "text": "Eight" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 762, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/78ff9b1ea7e321a8f5c2579dbea3deb873f2cf08e87d1c39d676f89926f50ec3", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the object holding in its right hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A sword" }, { "id": "B", "text": "A machete" }, { "id": "C", "text": "A spear" }, { "id": "D", "text": "A stick" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 763, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2733f5754aba4eb0b885cc292fa460ab", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What type of appendages does the object have on its underside?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Blade-like appendages" }, { "id": "B", "text": "Wheel-like appendages" }, { "id": "C", "text": "Claw-like appendages" }, { "id": "D", "text": "Leg-like appendages" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 764, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/81bc876b47a3e938e920999616ab562f2516b8505298215ae7a3a5da948a4fd9", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "Describe the creature's attire and accessories: From the front view, what is covering its head? From the side view, what is attached to its left arm? From the back view, what is fastened around its waist?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature has a horned helmet on its head, a smooth, metallic bracelet on its left arm, and a leather belt with pouches around its waist." }, { "id": "B", "text": "The creature has a feathered headdress on its head, a wrapped bandage on its left arm, and a rope tied around its waist." }, { "id": "C", "text": "The creature has nothing covering its head, a glowing orange gauntlet on its left arm, and a chainmail skirt around its waist." }, { "id": "D", "text": "The creature has a red hood covering its head, a spiked, glowing orange armband on its left arm, and a simple cloth or skirt-like garment tied around its waist." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 765, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7c83ec78ba2144aeb7726a688ea2cf19", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many white clouds are surrounding the object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Seven" }, { "id": "B", "text": "Nine" }, { "id": "C", "text": "Six" }, { "id": "D", "text": "Eight" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 766, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/42326bac4b921c181af3909994bd37cc5b54e219c059890eb1da49a90201045d", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many legs does this object have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Three" }, { "id": "B", "text": "Four" }, { "id": "C", "text": "Five" }, { "id": "D", "text": "Two" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 767, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5e36f8658f9f4729848f0dbd2cf5198d", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What arm does the woman lift?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Neither" }, { "id": "B", "text": "Right" }, { "id": "C", "text": "Left" }, { "id": "D", "text": "Both" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 768, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/38f052a2027346e2943b4c76d2572415", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many bullets are in the revolver?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "One" }, { "id": "B", "text": "Zero" }, { "id": "C", "text": "Four" }, { "id": "D", "text": "Six" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 769, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/087773ca41434223bd917c837b9169d0", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Does the punching device move horizontally or vertically?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Vertically" }, { "id": "B", "text": "Doesn't move" }, { "id": "C", "text": "Horizontally" }, { "id": "D", "text": "Up and down" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 770, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/29bce972c0f740059c8af4068f2b7620", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the green wheel do in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "There is no green wheel in the video" }, { "id": "B", "text": "Continues spinning" }, { "id": "C", "text": "Stops" }, { "id": "D", "text": "Falls off" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 771, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dbca0e98e5384310883733660a1629dc", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What does the creature do in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Opens its mouth and raises its tail" }, { "id": "B", "text": "Raises its tail and shake its head" }, { "id": "C", "text": "Nothing" }, { "id": "D", "text": "Stomp its legs and opens its mouth" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 772, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/55fa532622724865aa32888e60d785c7", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many lotus leaves are there in total?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Seven" }, { "id": "B", "text": "Six" }, { "id": "C", "text": "Three" }, { "id": "D", "text": "Four" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 773, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6e98515da99d4d3395794172d7bafbbe", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the lid in the back?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It falls off" }, { "id": "B", "text": "It opens" }, { "id": "C", "text": "Nothing" }, { "id": "D", "text": "It closes" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 774, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e3f90641e895498cbd739e240ec88d0b", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many turrets are there in total?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Six" }, { "id": "B", "text": "Twelve" }, { "id": "C", "text": "Ten" }, { "id": "D", "text": "Nine" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 775, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ccdf1687772844c2b6b3fa7bb86aa1f7", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Are the vertical axes of the upper and lower parts of this part on the same straight line?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "No" }, { "id": "B", "text": "Impossible to tell" }, { "id": "C", "text": "Sometimes" }, { "id": "D", "text": "Yes" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 776, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1b0fdb477593406a91545ce67e0528fd", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Are the labels on the inside or outside of the cabinet doors?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Both" }, { "id": "B", "text": "Inside" }, { "id": "C", "text": "Outside" }, { "id": "D", "text": "Neither" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 777, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1a1bbd5e3cd746888232fefa46b3f7d1", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It stays the same size" }, { "id": "B", "text": "It grows in size" }, { "id": "C", "text": "It shrinks" }, { "id": "D", "text": "It disappears" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 778, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/69382cb2790e4e29ade4df9a611ccb63", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Who is hiding behind a closed door?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A ball" }, { "id": "B", "text": "A cat" }, { "id": "C", "text": "Nothing" }, { "id": "D", "text": "A boy" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 779, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/86f71c7808fc4ae887fb97641ba8b931", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many buttons does the character's coat have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Three" }, { "id": "B", "text": "Six" }, { "id": "C", "text": "One" }, { "id": "D", "text": "Two" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 780, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2329a15f3aba4f3c945039cb03609ebf", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What part of the body is the girl holding the dragon by?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "By its tale" }, { "id": "B", "text": "By its ears" }, { "id": "C", "text": "By its mustache" }, { "id": "D", "text": "By its neck" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 781, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/73f87c79607b4978a2bfbf3554eafdc7", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the wheel do in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Continues spinning" }, { "id": "B", "text": "Breaks apart" }, { "id": "C", "text": "Starts spinning in another direction" }, { "id": "D", "text": "Stops" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 782, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ea36dcd576e74c7390db6f4e1ae19da2", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the character do?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Runs forward" }, { "id": "B", "text": "Dances" }, { "id": "C", "text": "Jumps" }, { "id": "D", "text": "Stands still" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 783, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/fc6a07fe83cd482aa20adbfe833441c8", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What does the robot do after he breaks free from the room?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "He runs away" }, { "id": "B", "text": "He doesn't break free" }, { "id": "C", "text": "He cries" }, { "id": "D", "text": "He falls unconscious" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 784, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1314394ecfa04912aee203f658ac580b", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What does it say on this man's cap?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "CL" }, { "id": "B", "text": "SF" }, { "id": "C", "text": "ZF" }, { "id": "D", "text": "FS" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 785, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ca7d5dc90dc64c5c83def087e6cbff3e", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many coins are inserted in the rotary dial?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Two" }, { "id": "B", "text": "Zero" }, { "id": "C", "text": "Four" }, { "id": "D", "text": "One" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 786, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9a2a01885d1f4ea183daadef41a281b6", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the claw do with the smaller piece?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Pushes it" }, { "id": "B", "text": "Nothing" }, { "id": "C", "text": "Grabs and throws it" }, { "id": "D", "text": "Grabs and lifts it" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 787, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/509667ee4a40446e88abbf2dbbfbd426", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Is the robot waving its left or right hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Left Hand" }, { "id": "B", "text": "Right hand" }, { "id": "C", "text": "Doesn't wave" }, { "id": "D", "text": "Both at the same time" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 788, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6b57854a80154db9878113fb0d026d87", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the creature do?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Walks forward and throws back his head" }, { "id": "B", "text": "Jumps and turns around" }, { "id": "C", "text": "Moves his head and legs while arms remain still" }, { "id": "D", "text": "Moves his arms wide open and throws back his head" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 789, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2c770c5c38154d568fda4e0d22ae0eb1", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "Which object is the smallest here?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Mailbox" }, { "id": "B", "text": "Pokeball" }, { "id": "C", "text": "Pond" }, { "id": "D", "text": "House" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 790, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9b3bfc474fbb421ba35c8747b52f56fd", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many stripes are there on the helmet?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Two" }, { "id": "B", "text": "Three" }, { "id": "C", "text": "One" }, { "id": "D", "text": "Zero" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 791, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2ba0fe81b1654d529b296377991f7cee", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many times does the person jump?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Zero" }, { "id": "B", "text": "One" }, { "id": "C", "text": "Three" }, { "id": "D", "text": "Two" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 792, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7e39b04d097c43c4afe54ac1979ad673", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Is this door closed well?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "There is no door" }, { "id": "B", "text": "No" }, { "id": "C", "text": "Yes" }, { "id": "D", "text": "Impossible to tell" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 793, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5f81c38ad9f24b07aa02d171855ebe9c", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many Chinese characters are there on the back of the character's shirt?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "One" }, { "id": "B", "text": "Three" }, { "id": "C", "text": "Four" }, { "id": "D", "text": "Two" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 794, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/42dc0771e4b3432b9252f9547bb132e4", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the can do?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Falls from the vending machine and stops" }, { "id": "B", "text": "Nothing" }, { "id": "C", "text": "Get stuck in the vending machine" }, { "id": "D", "text": "Falls from the vending machine and rolls on the ground" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 795, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5aabd63c805444cdb71c6d636e11e4f3", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many teeth does this monster have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Four" }, { "id": "B", "text": "Five" }, { "id": "C", "text": "Two" }, { "id": "D", "text": "Three" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 796, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/417fd314b2b74e9d8cc198c9c615d140", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color is the bullet head?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Red" }, { "id": "B", "text": "Silver" }, { "id": "C", "text": "Black" }, { "id": "D", "text": "Gold" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 797, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0fd2f0a727b74526ab05b6b9e54b81e2", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What does the bear do in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Runs forward" }, { "id": "B", "text": "Gets up from the ground" }, { "id": "C", "text": "Stays still" }, { "id": "D", "text": "Falls on the ground" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 798, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/faece99a45014657923687be07437d28", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "Does the animation show the door opening or closing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Both" }, { "id": "B", "text": "Opening" }, { "id": "C", "text": "Neither" }, { "id": "D", "text": "Closing" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 799, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/17e94db15292455aa6c2f1b3ebad01ef", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color is the jar in the tree man's left hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Yellow" }, { "id": "B", "text": "Black" }, { "id": "C", "text": "Blue" }, { "id": "D", "text": "White" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 800, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/41066b83157540d992d6ad1998afa3db", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many teeth did the shark show?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Four" }, { "id": "B", "text": "Zero" }, { "id": "C", "text": "Two" }, { "id": "D", "text": "Ten" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 801, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0c08750981cd4b5d9211639de85c6fd2", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happened after the cabinet door opened?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The tv turned on" }, { "id": "B", "text": "Nothing" }, { "id": "C", "text": "The door closed" }, { "id": "D", "text": "The monster jumped out" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 802, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/235725b81c1042468029169520374c24", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the ball do after rolling down from the last step?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Rolls straight" }, { "id": "B", "text": "It doesn't roll off the steps" }, { "id": "C", "text": "Stops" }, { "id": "D", "text": "Jumps to the next ladder" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 803, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/46661a47e4aa484c91adc00b3db71ef2", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many red gears are there on this hand-cranked machine?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Seven" }, { "id": "B", "text": "Four" }, { "id": "C", "text": "Three" }, { "id": "D", "text": "Two" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 804, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/30f2271c572c494bbf7293d11a81cf87", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color is the cross necklace the girl is wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "White" }, { "id": "B", "text": "Yellow" }, { "id": "C", "text": "Green" }, { "id": "D", "text": "Black" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 805, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/df94fc680c404ffeb0a7b864958600a5", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color is Snoopy's back?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Green" }, { "id": "B", "text": "Yellow" }, { "id": "C", "text": "White" }, { "id": "D", "text": "Black" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 806, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e159e2eec58f4b9f81692e93d6505e2a", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "Which of the alien's eyes has a slit pupil?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Left" }, { "id": "B", "text": "He has only one eye" }, { "id": "C", "text": "Both" }, { "id": "D", "text": "Right" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 807, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/fc042ed91a1f4c2d90eced705bb13c77", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many apples are there in total?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Five" }, { "id": "B", "text": "Three" }, { "id": "C", "text": "Six" }, { "id": "D", "text": "Four" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 808, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1e090edef8704e3894834803f31e0efd", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many magnetic tapes are moving?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "One" }, { "id": "B", "text": "Three" }, { "id": "C", "text": "Zero" }, { "id": "D", "text": "Two" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 809, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ba87016cd69a4d97b73b6d6f05d7e17c", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many tomatoes are plucked?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Four" }, { "id": "B", "text": "Zero" }, { "id": "C", "text": "Three" }, { "id": "D", "text": "Ten" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 810, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f18fecef3f314b0dba18076b9dc82ae1", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the creature do? " ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "It wags its tail" }, { "id": "B", "text": "It doesn't move" }, { "id": "C", "text": "It blinks" }, { "id": "D", "text": "It spins around" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 811, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/deb80ff43e9a490a84803a3f11d819a2", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many heads are moving?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "One" }, { "id": "B", "text": "Zero" }, { "id": "C", "text": "Two" }, { "id": "D", "text": "Three" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 812, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f659b630f1ab4deeae04e4028ebab448", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How does the plane move vertically?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Quickly" }, { "id": "B", "text": "Slowly" }, { "id": "C", "text": "It doesn't move" }, { "id": "D", "text": "It only moves horisontally" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 813, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/40f7eafb3ffd4d2586b17830be1460eb", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Which half of the inside of the horn is red?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Bottom half" }, { "id": "B", "text": "Left half" }, { "id": "C", "text": "Right half" }, { "id": "D", "text": "Upper half" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 814, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/98a1d5b2288d49d993039cb161913cd3", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Was the person attacked from the front or the back?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "From the front" }, { "id": "B", "text": "From below down the legs" }, { "id": "C", "text": "From the back" }, { "id": "D", "text": "A blow to the head" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 815, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7a4bc9c954b4466b95c6156772a99670", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many doors are there on the upper layer of this cabinet?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Three" }, { "id": "B", "text": "Zero" }, { "id": "C", "text": "One" }, { "id": "D", "text": "Two" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 816, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/bb7af5de5bf14193a53f28c0853b48dd", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Which letter on the board is the robotic arm touching?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Letter W" }, { "id": "B", "text": "Letter S" }, { "id": "C", "text": "Letter O" }, { "id": "D", "text": "Letter C" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 817, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/48b073d405e5472d9fd7e0dbe45c973e", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Is this slipper for the left foot or the right foot?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Left foot" }, { "id": "B", "text": "Both foots" }, { "id": "C", "text": "Right foot" }, { "id": "D", "text": "nan" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 818, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6f7203133cdd46168d472ead2d2d1264", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many bubbles have been popped?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "One" }, { "id": "B", "text": "Zero" }, { "id": "C", "text": "Four" }, { "id": "D", "text": "Five" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 819, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/06bc9337e21948d3b210567ba2950db6", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What leg moves forward?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Neither" }, { "id": "B", "text": "Both" }, { "id": "C", "text": "Right" }, { "id": "D", "text": "Left" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 820, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3f63bf4c90a94d879cd0fbdb42e7a961", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What movement does the guy make with his legs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Walks forward" }, { "id": "B", "text": "Stands still" }, { "id": "C", "text": "Jumps" }, { "id": "D", "text": "Bends the knees" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 821, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b5d59eaec25746c097727c73268720f1", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many ants fall off the tape?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Zero" }, { "id": "B", "text": "Three" }, { "id": "C", "text": "One" }, { "id": "D", "text": "Two" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 822, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/879f49ca611b46c9aa1e94eab6403128", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many times did the character spin on the spot?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "One time" }, { "id": "B", "text": "Zero" }, { "id": "C", "text": "Two times" }, { "id": "D", "text": "Three times" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 823, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4f617830d6944359bc58a522a600373b", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the oven in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The fire is gone out, the smoke stopped rising from the chimney" }, { "id": "B", "text": "The fire is gone out, the smoke is rising from the chimney" }, { "id": "C", "text": "The fire is burning, the smoke is rising from the chimney" }, { "id": "D", "text": "The fire is burning, the smoke stopped rising from the chimney" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 824, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/af4614144e46440ea84c191a4c610a44", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What dance is this character performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Irish dance" }, { "id": "B", "text": "Hip hop dance" }, { "id": "C", "text": "Gangnam Style" }, { "id": "D", "text": "Cha-cha" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 825, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ce13ea4b3f7543bb92b64e1993a69838", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the lizard do?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Walks forward" }, { "id": "B", "text": "Jumps" }, { "id": "C", "text": "Sticks out his tongue" }, { "id": "D", "text": "Wags its tail" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 826, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ca107c2e7d08407eba3aef20e6899a78", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many circles did the sleepwalker turn?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "One" }, { "id": "B", "text": "Six" }, { "id": "C", "text": "Four" }, { "id": "D", "text": "Two" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 827, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f4dbd7330ab1478d8d955b02b7866efe", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many blades does this helicopter have on its main rotor?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Six" }, { "id": "B", "text": "Four" }, { "id": "C", "text": "Five" }, { "id": "D", "text": "Nine" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 828, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/96441a5f2b8f449296b204533d4bd724", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many barrels does a machine gun have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Twelve" }, { "id": "B", "text": "Three" }, { "id": "C", "text": "Four" }, { "id": "D", "text": "Ten" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 829, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/700c26325c5544fbabd8a9ca2bbe134d", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "How many times does a hockey player lift his stick up?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "One" }, { "id": "B", "text": "Two" }, { "id": "C", "text": "Zero" }, { "id": "D", "text": "Three" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 830, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0cb340a413824db18f5ff4e1fd20994a", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the vampire do? " ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The vampire got out from a coffin, then he went back in the coffin" }, { "id": "B", "text": "The vampire did nothing" }, { "id": "C", "text": "The vampire turns into a bat, then he turns back to a human form" }, { "id": "D", "text": "The vampire goes through a coffin, then the vampire runs through a coffin, then his head rotates 360 degrees" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 831, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9ba4029adc514f5386dfd9564c65b085", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the the green cube do?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Rolls to the side" }, { "id": "B", "text": "Jumps" }, { "id": "C", "text": "Stays still" }, { "id": "D", "text": "Moves in a straight line" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 832, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/df23fda0db1a432d8b2095f69a458f91", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the woman do while pointing her finger?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Cries" }, { "id": "B", "text": "Laughs" }, { "id": "C", "text": "Dances" }, { "id": "D", "text": "Speaks" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 833, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/85d9e19e35bf4060a1cdb4cc132bcfa3", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many layers are there in the cabinet?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Five" }, { "id": "B", "text": "Zero" }, { "id": "C", "text": "Three" }, { "id": "D", "text": "Four" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 834, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6f8133161f66462d9474bfbc124b7470", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the electric kettle perform after opening its lid in the multi-view video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The electric kettle slides toward the wall." }, { "id": "B", "text": "The electric kettle changes color." }, { "id": "C", "text": "The electric kettle lifts off the surface." }, { "id": "D", "text": "The electric kettle boils water visibly." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 835, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/12bd08d66fe04a84be446e583d6663ac", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What specific action is the 3D Star Wars character consistently performing across different angles?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Making hand gestures while dancing." }, { "id": "B", "text": "Sitting down and standing up repeatedly." }, { "id": "C", "text": "Standing still without any movement." }, { "id": "D", "text": "Running across a battlefield." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 836, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d13155dadb914e9cb8c2998f638ddc02", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the man perform before he starts walking in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Waves to someone in the distance." }, { "id": "B", "text": "Adjusts his gas mask." }, { "id": "C", "text": "Lifts his hands to waist level and looks around." }, { "id": "D", "text": "Puts on his helmet." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 837, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dfa0e22750b44e3f8ff856d62f05c329", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What component of the 3D model is responsible for the movement of the blue wheel?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A magnetic force applied externally" }, { "id": "B", "text": "The weight of the wheel itself making it spin" }, { "id": "C", "text": "A motor hidden inside the base" }, { "id": "D", "text": "The handle connected to the crankshaft" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 838, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ce191e0410294ccfadf1f105b51874db", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the character perform after lying on the ground?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character flies into the air and hovers." }, { "id": "B", "text": "The character runs in a circle." }, { "id": "C", "text": "The character jumps up and stands on its feet." }, { "id": "D", "text": "The character slowly stands up without jumping." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 839, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8cd6c072be3a41e9aef0afcc6b0aae6c", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What feature is uniformly present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A dust cover lid." }, { "id": "B", "text": "A blue LED light indicator." }, { "id": "C", "text": "The black spinning vinyl record." }, { "id": "D", "text": "A colorful sticker on the turntable." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 840, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8e247fd8586b46b08f4fdc9bcaa994a4", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What specific martial arts style is being demonstrated in the multi-view video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Judo" }, { "id": "B", "text": "Taekwondo" }, { "id": "C", "text": "Capoeira" }, { "id": "D", "text": "Tai Chi" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 841, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5bb2709145754197a056f88ce6b42810", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What transformation occurs to the 3D model of the lunar rover across different perspectives in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Its wheels turn inwards for compactness." }, { "id": "B", "text": "Its components unfold as it opens up." }, { "id": "C", "text": "It disassembles into smaller pieces." }, { "id": "D", "text": "It becomes transparent while moving." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 842, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e5e952b1d18d48208dbebb76bbe2c54d", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What feature of the 3D creature changes noticeably throughout the video sequence?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The tail is wagging." }, { "id": "B", "text": "The creature grows larger." }, { "id": "C", "text": "The creature starts flying." }, { "id": "D", "text": "The platform rotates." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 843, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e5e5048f3cef49158d2619252204fc0b", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action does the character perform after closing the umbrella?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character uses the umbrella as a sword." }, { "id": "B", "text": "The character spins and throws the umbrella in the air." }, { "id": "C", "text": "The character opens the umbrella again." }, { "id": "D", "text": "The character sits down with the umbrella." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 844, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9c7cbb6c2ab14c6c98beb57d091bcf8e", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What motion is observed in the table holding the red ball across different views in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The table bounces up and down." }, { "id": "B", "text": "The table tilts from side to side." }, { "id": "C", "text": "The table spins around its center." }, { "id": "D", "text": "The table rotates continuously in one direction." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 845, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5d274fcdbf2d4e59b6bdc5b9afa66839", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What movement does the elk primarily make in the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The elk turns its head from side to side." }, { "id": "B", "text": "The elk waves its antlers around." }, { "id": "C", "text": "The elk shakes its entire body." }, { "id": "D", "text": "The elk lowers its head to the ground." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 846, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0c83e28627b247e194212d9fbe119ad2", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action is the man performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Pulling an imaginary gun out from his back." }, { "id": "B", "text": "Turning around quickly." }, { "id": "C", "text": "Removing a jacket." }, { "id": "D", "text": "Looking over his shoulder." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 847, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/12b9fe42168a406c98ccf330b9998093", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action is the blue character performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Jumping up and down" }, { "id": "B", "text": "Beating his chest with his hand" }, { "id": "C", "text": "Spinning around quickly" }, { "id": "D", "text": "Clapping his hands" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 848, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/84a4477ffa4c21d1ae670e287a3b7699a392932fae0ad61c9511d33f03742fb6", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "From the side view, what color are the headphones? From the back view, how many stripes are visible on each leg of the pants? From the front view, what color are the lenses of the sunglasses?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Orange and red, two, orange/red" }, { "id": "B", "text": "Red, one, pink" }, { "id": "C", "text": "Red and white, two, orange/red" }, { "id": "D", "text": "Orange, one, pink" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 849, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/95801da96c9242c78d05e0abcb547fb8", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "Describe the character's attire and equipment: From the front view, what color is the main part of the character's outfit? From the side view, does the character appear to have any accessories attached to their back? From the back view, what distinguishing feature can be observed about the character's tail?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character's outfit is primarily light blue. They have a dark gray pack on their back. The tail is the same shade of blue as the body and has a bushy tip." }, { "id": "B", "text": "The character's outfit is primarily black. They have a jetpack on their back. The tail is the same shade as the body and is curled upwards." }, { "id": "C", "text": "The character's outfit is primarily green. They have no accessories on their back. The tail is a darker shade of green and appears to be pointed." }, { "id": "D", "text": "The character's outfit is primarily dark purple or blue. They have a light gray/beige pack on their back. The tail is a lighter shade of blue and appears to have a flame or energy effect at its tip." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 850, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2bdc34f32ef332e21746e375a1ce085990228fe65d8a435f1727fcb441af0c1c", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Describe the astronaut's actions from multiple perspectives: a) From the front view, what hand motion does the astronaut make? b) From the side view, is the astronaut moving forward or backward? c) From the back view, what is most visible on the astronaut's backpack?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "a) The astronaut brings both hands together in front of their chest. b) The astronaut appears to be walking in place or slightly forward. c) Two rectangular panels are most visible on the backpack." }, { "id": "B", "text": "a) The astronaut claps their hands. b) The astronaut is stationary. c) A large triangular panel is most visible on the backpack." }, { "id": "C", "text": "a) The astronaut points with their left hand. b) The astronaut is floating upwards. c) The backpack is completely obscured from this angle." }, { "id": "D", "text": "a) The astronaut waves with their right hand. b) The astronaut is clearly moving backward. c) A circular antenna is most visible on the backpack." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 851, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4d09acc5cf7148609be1c3b66467846d", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many six-membered rings are present in the molecule?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Three" }, { "id": "B", "text": "Two" }, { "id": "C", "text": "Four" }, { "id": "D", "text": "One" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 852, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/12e414c58b0721341954e193dc2fc74fa6fbe2f3c7ef442b471091bf1418556e", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Is the person wearing any bracelets on their wrists?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Yes, on both wrists" }, { "id": "B", "text": "Yes, on the right wrist" }, { "id": "C", "text": "No" }, { "id": "D", "text": "Yes, on the left wrist" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 853, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f7c1e3adea3915a2bad2969dba20d90237d88f1b094ac48b2936c15e6be2d187", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many presents are wrapped in red ribbon?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Three" }, { "id": "B", "text": "Two" }, { "id": "C", "text": "Four" }, { "id": "D", "text": "Five" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 854, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/eafaefd64b29410b860f80950832bc33", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the antenna on the helmet?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Red" }, { "id": "B", "text": "White" }, { "id": "C", "text": "Green" }, { "id": "D", "text": "Blue" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 855, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3c48e426db054ff38f89a3adfd8b0e23", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the relationship between the two hemispherical objects and the colored spheres?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The colored spheres are inside both hemispherical objects." }, { "id": "B", "text": "The colored spheres are not related to the hemispherical objects." }, { "id": "C", "text": "The colored spheres are inside one of the hemispherical objects." }, { "id": "D", "text": "The colored spheres are outside the hemispherical objects." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 856, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/87ecfe9c1cc745949bfb9f3e0604b02d", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color are the joints of the blocky humanoid figure?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The joints of the blocky humanoid figure are orange, brown, and beige." }, { "id": "B", "text": "The joints of the blocky humanoid figure are black, white, and gray." }, { "id": "C", "text": "The joints of the blocky humanoid figure are red, green, and purple." }, { "id": "D", "text": "The joints of the blocky humanoid figure are pink and blue." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 857, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0bc6bfa4b79b465ba54b7624f8201374", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many times does the character turn around?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "0" }, { "id": "B", "text": "1" }, { "id": "C", "text": "4" }, { "id": "D", "text": "3" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 858, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3cd8902168cf45938b221870c74db743", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Which truck is carrying a magenta car?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The truck with a yellow cab" }, { "id": "B", "text": "The truck with a magenta cab" }, { "id": "C", "text": "The truck with a magenta cargo container" }, { "id": "D", "text": "The truck with an orange cab" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 859, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ce8b14eeabcb40249c74a8319b273cde", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is walking slowly." }, { "id": "B", "text": "The character is sitting still." }, { "id": "C", "text": "The character is standing with slight body movement." }, { "id": "D", "text": "The character is sleeping." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 860, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d08f28fae9774a19a93106ea02b44335", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is jumping." }, { "id": "B", "text": "The creature is running." }, { "id": "C", "text": "The creature is sitting." }, { "id": "D", "text": "The creature is dancing." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 861, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a1c1355af8b6466b8e6910582179cb3b", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the keychain attached to the handle of the key doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The keychain is spinning in a circle." }, { "id": "B", "text": "The is no keychain." }, { "id": "C", "text": "The keychain is swinging back and forth." }, { "id": "D", "text": "The keychain is hanging still." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 862, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/15077cd6afc34fb3970e957a474b15ce", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the red humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The red humanoid figure is standing still and observing its surroundings." }, { "id": "B", "text": "The red humanoid figure is jumping and performing a high kick." }, { "id": "C", "text": "The red humanoid figure is sitting on the ground and looking up." }, { "id": "D", "text": "The red humanoid figure is transitioning from a crouching stance to standing and moving." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 863, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e910d8b2c8284891b079cd8f9e4ba74e", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many hands does the creature have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature has three hands." }, { "id": "B", "text": "The creature has two hands." }, { "id": "C", "text": "The creature has five hands." }, { "id": "D", "text": "The creature has four hands." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 864, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/14bb6f4dd4e14cd2a3ffeb88669639b5", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the shape of the top block in the stack of yellow rectangular blocks?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Rectangular with a rectangular cutout on the side" }, { "id": "B", "text": "Rectangular with a rectangular cutout in the center" }, { "id": "C", "text": "Square with a square cutout in the center" }, { "id": "D", "text": "Round with a circular cutout in the center" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 865, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3a5fb4c447084c659e320cef4bc005f6", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many stars does the flag have on one side?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "7" }, { "id": "B", "text": "8" }, { "id": "C", "text": "5" }, { "id": "D", "text": "6" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 866, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cd3dff9f2ebe4dc8a8e3644e5266b210", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many red laser beams are emitted by the device?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "5" }, { "id": "C", "text": "7" }, { "id": "D", "text": "2" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 867, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/afdcd5fa0a6641b587a2c6ce5a7a8187", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the relationship between the red pyramid and the gray pyramid?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The red pyramid is placed inside the gray pyramid." }, { "id": "B", "text": "The red pyramid is floating above the gray pyramid." }, { "id": "C", "text": "The red pyramid and the gray pyramid are stationary and not related." }, { "id": "D", "text": "The gray pyramid is floating above the red pyramid." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 868, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0e211d0de474419887e1b953bf839087", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is holding nothing." }, { "id": "B", "text": "The character is holding a camera." }, { "id": "C", "text": "The character is holding a flower." }, { "id": "D", "text": "The character is holding a book." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 869, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/43fdb74a2d834ae28422cacdc8e12060", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the sum of up arrows on the front and back?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "6" }, { "id": "C", "text": "12" }, { "id": "D", "text": "18" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 870, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4810c9a4bfef4348a229b83f66314b40", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the creature holding in its right hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is holding a stick." }, { "id": "B", "text": "The creature is holding a sword." }, { "id": "C", "text": "The creature is holding a hammer." }, { "id": "D", "text": "The creature is holding nothing." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 871, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cafa235d4cb24b22abd7325edc8aaa0b", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the relationship between the ribbed dome and the cylindrical structure?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The ribbed dome is the top part of the cylindrical structure." }, { "id": "B", "text": "The ribbed dome is the bottom part of the cylindrical structure." }, { "id": "C", "text": "The ribbed dome is a separate object from the cylindrical structure." }, { "id": "D", "text": "The ribbed dome is a part of the tiled interior of the cylindrical structure." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 872, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2d73f605c38f4970919da6c1ea5e7bd8", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the character holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "knife" }, { "id": "B", "text": "nothing" }, { "id": "C", "text": "water bottle" }, { "id": "D", "text": "gun" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 873, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b4ffa40554d84990a3ac0776693fcfb7", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the bear doing with the ball?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bear is playing with the ball on the ground." }, { "id": "B", "text": "The bear is tossing and catching the ball." }, { "id": "C", "text": "The bear is holding the ball in its mouth." }, { "id": "D", "text": "The bear is throwing the ball into the air." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 874, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1aa7d0435dcb46438af4f967030b5500", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the position of the microphone in relation to the headphones?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The microphone is attached to the right side of the headphones." }, { "id": "B", "text": "The microphone is not attached to the headphones." }, { "id": "C", "text": "The microphone is located in the center of the headphones." }, { "id": "D", "text": "The microphone is attached to the left side of the headphones." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 875, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d9a5b67b5c9142e984f76b1afec1939b", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the skeletal creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The skeletal creature is standing still." }, { "id": "B", "text": "The skeletal creature is swimming in the water." }, { "id": "C", "text": "The skeletal creature is crawling." }, { "id": "D", "text": "The skeletal creature is flying in the air." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 876, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/520dcf6baf4642358ce7e9b50dcad8dd", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "In the video, how does the position of the soccer ball change as the truck drives?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The soccer ball remains stationary on the truck as it drives." }, { "id": "B", "text": "The soccer ball falls off the truck as it drives." }, { "id": "C", "text": "The soccer ball moves closer to the back of the truck as it drives." }, { "id": "D", "text": "The soccer ball rotates around a pole." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 877, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/74014796a7d54ff6b45dbba041bba8e3", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What number is not displayed on the flat-screen monitor?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "19" }, { "id": "B", "text": "26" }, { "id": "C", "text": "1" }, { "id": "D", "text": "34" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 878, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/042201dac61041fabb88f483368daa3f", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the yellow, rectangular, industrial-looking object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object unfolds and opens, revealing a small screen, buttons, and wires, but then closes again." }, { "id": "B", "text": "The object remains closed and stationary." }, { "id": "C", "text": "The object unfolds and opens, revealing a small screen, buttons, and wires." }, { "id": "D", "text": "The top panel of the object opens and then closes." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 879, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/679f98c58e9d4b88986a637bdd31f3dc", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the central mechanism of the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The central mechanism is a light-colored door with a metallic appearance." }, { "id": "B", "text": "The central mechanism is a dark-colored door with a wooden appearance." }, { "id": "C", "text": "The central mechanism is a rectangular panel with a metallic appearance." }, { "id": "D", "text": "The central mechanism is a circular device with a metallic appearance." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 880, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/11d7c77bfe804f448a516928adf3f05d", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the fan?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The fan is detached from the air conditioning unit." }, { "id": "B", "text": "The fan remains attached to the air conditioning unit." }, { "id": "C", "text": "The fan is replaced with a new one." }, { "id": "D", "text": "The fan spins continuously." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 881, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/733890e4c33f4bf9a618418548d86cc1", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the robot's eyes?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robot has red eyes." }, { "id": "B", "text": "The robot has green eyes." }, { "id": "C", "text": "The robot has yellow eyes." }, { "id": "D", "text": "The robot has blue eyes." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 882, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ca04431879454045aebb7089aa67d5b0", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the caterpillar-like object doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The caterpillar-like object is circling around the bare tree." }, { "id": "B", "text": "The caterpillar-like object is sitting still on the ground." }, { "id": "C", "text": "The caterpillar-like object is jumping over the tree." }, { "id": "D", "text": "The caterpillar-like object is climbing up the tree." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 883, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/25234b25da3a4bb8bcc2fa1c9d7ae726", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the wooden structure?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The wooden structure raises its arms." }, { "id": "B", "text": "The wooden structure rotates on its axis." }, { "id": "C", "text": "The wooden structure remains stationary." }, { "id": "D", "text": "The wooden structure falls over." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 884, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dac12db90b9340a388ceced17916e095", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "Which character is wearing a purple and yellow coat?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character in the middle" }, { "id": "B", "text": "The character on the right" }, { "id": "C", "text": "The character in the middle with the red jacket" }, { "id": "D", "text": "The character on the left" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 885, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5232584132d24bac9c9bdf729f38ba06", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many flippers does the aquatic creature have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Three" }, { "id": "B", "text": "Six" }, { "id": "C", "text": "Five" }, { "id": "D", "text": "Four" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 886, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/470fc92ab2ab42079c5e7310bf958b04", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the ghost-like figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The ghost-like figure is rotating in various positions." }, { "id": "B", "text": "The ghost-like figure is spinning in circles." }, { "id": "C", "text": "The ghost-like figure is jumping up and down." }, { "id": "D", "text": "The ghost-like figure is floating in one spot." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 887, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/34efa81e7e33402ca19cc72f69ea4af8", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is hitting." }, { "id": "B", "text": "The character is sitting." }, { "id": "C", "text": "The character is dancing." }, { "id": "D", "text": "The character is running." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 888, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/43efd6cbf30b4e6da7b6b1b7de7f6981", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many times does the robotic gorilla-like figure rotate?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "0" }, { "id": "B", "text": "3" }, { "id": "C", "text": "2" }, { "id": "D", "text": "1" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 889, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4445f33b9b384a7787e50c7d6a1df5e6", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary component inside the cylindrical mechanical object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Fan-like structure" }, { "id": "B", "text": "Pump-like structure" }, { "id": "C", "text": "Turbine-like structure" }, { "id": "D", "text": "Motor-like structure" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 890, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/552b1d91806f4122818c79f85d653fe5", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the main object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A multi-barrel rotary machine gun" }, { "id": "B", "text": "A tank" }, { "id": "C", "text": "A single-barrel machine gun" }, { "id": "D", "text": "A helicopter" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 891, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/455d44f0b95746ef89120fcf51a6254a", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the cat doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cat is sitting." }, { "id": "B", "text": "The cat is jumping." }, { "id": "C", "text": "The cat is sleeping." }, { "id": "D", "text": "The cat is running." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 892, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4602fa8c465741a7b0dc1d6a9a1de744", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the direction of the movement of the smaller spherical object inside the larger egg-shaped object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The smaller spherical object is moving upwards." }, { "id": "B", "text": "The smaller spherical object is stationary." }, { "id": "C", "text": "The smaller spherical object is moving horizontally." }, { "id": "D", "text": "The smaller spherical object is moving downwards." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 893, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/724c33b50c504cc489bd5ff4f85a4f93", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "How many doors are opened?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "2" }, { "id": "C", "text": "6" }, { "id": "D", "text": "7" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 894, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b57e46cf75164dd6be3b656316a063c3", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the blue button?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The blue button rotates around the cylindrical container without opening it." }, { "id": "B", "text": "The blue button flys up then falls to the container ." }, { "id": "C", "text": "The blue button remains stationary and does not interact with the container." }, { "id": "D", "text": "The blue button moves upward and opens the cylindrical container." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 895, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3e1be738c6a7429f835bd2deb04d3ee1", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the blue stuffed toy?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The blue stuffed toy remains still and does not move." }, { "id": "B", "text": "The blue stuffed toy stands up and walks." }, { "id": "C", "text": "The blue stuffed toy spins around and jumps." }, { "id": "D", "text": "The blue stuffed toy moves its head back and forth." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 896, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e7157acd087e44d784627e585884c725", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "Which of the following best describes the appearance of the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Three cylindrical objects with gray tops sprout green plants sequentially." }, { "id": "B", "text": "Three cylindrical objects with brown tops sprout colorful plants simultaneously." }, { "id": "C", "text": "Three cylindrical objects with brown tops sprout green plants sequentially." }, { "id": "D", "text": "Three cylindrical objects with gray tops sprout green plants sequentially." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 897, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/44688f356e2c46baa20a3b1a63b4c131", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the green Christmas tree?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green Christmas tree turns into a snowman." }, { "id": "B", "text": "The green Christmas tree grows taller." }, { "id": "C", "text": "The green Christmas tree remains stationary." }, { "id": "D", "text": "The green Christmas tree spins around the large white sphere." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 898, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b036c59c7dda406c9b21c4138f1971f2", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the object doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object is spinning in a circle." }, { "id": "B", "text": "The object is rotating smoothly." }, { "id": "C", "text": "The object is moving up and down." }, { "id": "D", "text": "The object is remaining still." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 899, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4f90147d2304462a87337d61d1fc07bd", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which bottle is moving towards the cake?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bottle with the red label is moving towards the cake." }, { "id": "B", "text": "The bottle with the yellow label is moving towards the cake." }, { "id": "C", "text": "The bottle with the green label is moving towards the cake." }, { "id": "D", "text": "None of them." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 900, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5b1d248c63044b2e9240b0b200e5a6b9", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the creature's posture?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is jumping up with its arms raised." }, { "id": "B", "text": "The creature is standing upright with its arms at its sides." }, { "id": "C", "text": "The creature is lunging forward with its arms outstretched." }, { "id": "D", "text": "The creature is bending forward with its arms extended." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 901, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7669142ccfd644329b57654b8e96c24e", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many circular rings are present around the blade of the sword?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "2" }, { "id": "C", "text": "4" }, { "id": "D", "text": "1" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 902, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7abebe340aca4f46b36368417b3ca920", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the person doing with the handgun?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The person is loading the handgun." }, { "id": "B", "text": "The person is holding the handgun in a stationary position." }, { "id": "C", "text": "The person is disassembling the handgun." }, { "id": "D", "text": "The person is shooting the handgun." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 903, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ff4074c4cecd46eeba96620bf44e3aee", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many colored bands are present on the sphere?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "2" }, { "id": "C", "text": "5" }, { "id": "D", "text": "3" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 904, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6b0f0ee9be564edd9035e5a594058114", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Where is the white object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A white object on top of the wooden box" }, { "id": "B", "text": "A white object inside the wooden box" }, { "id": "C", "text": "A white object on the floor" }, { "id": "D", "text": "A white object protruding on one side of the wooden box" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 905, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7a9d20d5196942b89db7c4bc12d33bc0", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first action that occurs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The door of the device opens." }, { "id": "B", "text": "The handle of the device is turned." }, { "id": "C", "text": "The device moves to a different position." }, { "id": "D", "text": "The control panel is activated." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 906, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/708c0ba60477438391accb57d991ef5b", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is jumping." }, { "id": "B", "text": "The creature is sitting down." }, { "id": "C", "text": "The creature is sleeping." }, { "id": "D", "text": "The creature is walking forward." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 907, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5bfba90630924705818f967c4e85a6ee", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many clouds are visible?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "2" }, { "id": "C", "text": "4" }, { "id": "D", "text": "3" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 908, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/145cf913217d436a9da6ce1c70171f17", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the skeleton figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The skeleton figure is standing still." }, { "id": "B", "text": "The skeleton figure is jumping up and down." }, { "id": "C", "text": "The skeleton figure is walking slowly." }, { "id": "D", "text": "The skeleton figure is moving forward with a swinging motion" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 909, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c57b4c2755874304af815176be0d7bb3", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the rider holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The rider is holding a short, curved object." }, { "id": "B", "text": "The rider is holding a long, straight object." }, { "id": "C", "text": "The rider is holding a small, round object." }, { "id": "D", "text": "The rider is holding a large, flat object." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 910, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a1f1b6991f0048f1b051d62c0862ddae", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the small white figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The small white figure is running with pink shoes." }, { "id": "B", "text": "The small white figure is standing with pink shoes." }, { "id": "C", "text": "The small white figure is sitting with pink shoes." }, { "id": "D", "text": "The small white figure is jumping with pink shoes." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 911, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/80b25ed8f8ac492ebd1b441fc790b902", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the button of the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The button on the side of the 4D object is white." }, { "id": "B", "text": "The button on the side of the 4D object is black." }, { "id": "C", "text": "The button on the side of the 4D object is red." }, { "id": "D", "text": "The button on the side of the 4D object is green." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 912, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/79c7abd42e6a47bd8d0fe70e2073cf85", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "In the video, how many drawers are there in the cabinet?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "3" }, { "id": "C", "text": "5" }, { "id": "D", "text": "2" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 913, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/20149f29c172416fa771dd475d32c120", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is running and skipping." }, { "id": "B", "text": "The character is sitting and resting." }, { "id": "C", "text": "The character is walking." }, { "id": "D", "text": "The character is jumping and flipping." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 914, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b5e0ae7c5d5d4ff39b4207ed336a3b59", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the bird doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bird is sitting on the ice cream container." }, { "id": "B", "text": "The bird is eating the ice cream." }, { "id": "C", "text": "The bird is flying." }, { "id": "D", "text": "The bird is playing the drums." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 915, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c3f551011aaf4e7fb36a3d4e7912b5d3", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the shape of the humanoid figure's head?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure's head is heart-shaped." }, { "id": "B", "text": "The humanoid figure's head is oval." }, { "id": "C", "text": "The humanoid figure's head is square." }, { "id": "D", "text": "The humanoid figure's head is round." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 916, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e0312cf791da4abfb0bb66e446bcb9f8", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the robotic spider doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robotic spider is crawling." }, { "id": "B", "text": "The robotic spider is scuttling across a dark background." }, { "id": "C", "text": "The robotic spider is flying through the air." }, { "id": "D", "text": "The robotic spider is standing still on a white background." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 917, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0df9922071a945f2a319ca8d35fe66a2", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "How many screws are moving?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "6" }, { "id": "C", "text": "4" }, { "id": "D", "text": "3" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 918, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7df79a6d21d3460abfdb8a93b768a6be", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which part of the horse is moving the most?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The horse's head" }, { "id": "B", "text": "The horse's tail" }, { "id": "C", "text": "The horse's mane" }, { "id": "D", "text": "The horse's hooves" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 919, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c2a965feaa4d430d944e4dcafc64f0b0", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many drawers are opened?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "0" }, { "id": "B", "text": "1" }, { "id": "C", "text": "2" }, { "id": "D", "text": "3" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 920, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0be986d010d4465d9fc5621904d579cb", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the wooden barrel?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The wooden barrel's lid closes in sequence." }, { "id": "B", "text": "The wooden barrel fills up with water." }, { "id": "C", "text": "The wooden barrel spins around in a circle." }, { "id": "D", "text": "The wooden barrel remains closed throughout the video." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 921, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/bf2bc328f2d64d8e934019f0757c7e6f", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What does the young boy do with the staff?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The young boy puts the staff on the ground." }, { "id": "B", "text": "The young boy throws the staff." }, { "id": "C", "text": "The young boy holds the staff with one hand." }, { "id": "D", "text": "The young boy holds the staff with both hands." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 922, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4aeeb06b84774044a1a413010574a75f", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many pillows are there in total?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "6" }, { "id": "C", "text": "4" }, { "id": "D", "text": "5" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 923, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ac7a077a92444216ad713980ba64160d", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary motion of the object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object moves backward." }, { "id": "B", "text": "The object moves forward." }, { "id": "C", "text": "The object rotates 360 degrees." }, { "id": "D", "text": "The object remains stationary." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 924, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/47fea286345343dbb0ec95c919811ce1", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the person doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The person is dancing." }, { "id": "B", "text": "The person is sitting down." }, { "id": "C", "text": "The person is walking forward." }, { "id": "D", "text": "The person is waving his arm." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 925, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/00deafaa1c06485f9099260c1800b216", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many punches does the humanoid figure perform?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure performs a total of 2 punches." }, { "id": "B", "text": "The humanoid figure performs a total of 3 punches." }, { "id": "C", "text": "The humanoid figure performs a total of 4 punches." }, { "id": "D", "text": "The humanoid figure performs a total of 1 punch." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 926, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c290206e5655494fbe0c6aa9d2f66102", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the number on the back of the sports jersey?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "38" }, { "id": "B", "text": "18" }, { "id": "C", "text": "28" }, { "id": "D", "text": "8" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 927, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cd02bdc2ac914baa8573bd6a24b48e75", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure is sitting." }, { "id": "B", "text": "The humanoid figure is jumping." }, { "id": "C", "text": "The humanoid figure is running." }, { "id": "D", "text": "The humanoid figure is waving its body." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 928, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a659e6cb755a45b4aa46e66d807d5165", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the top section of the joystick-like object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The top section of the joystick-like object is a curved handle." }, { "id": "B", "text": "The top section of the joystick-like object is smooth and lacks any buttons or controls." }, { "id": "C", "text": "The top section of the joystick-like object features multiple buttons and controls." }, { "id": "D", "text": "The top section of the joystick-like object is a single button." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 929, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e4db2a5885eb442cac73b719d581478c", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the spider doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The spider is jumping." }, { "id": "B", "text": "The spider is spinning a web." }, { "id": "C", "text": "The spider is sitting still." }, { "id": "D", "text": "The spider is crawling around." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 930, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e5b1558c3bcf458a9d2a23e68c6b8961", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many objects are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "1" }, { "id": "C", "text": "4" }, { "id": "D", "text": "2" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 931, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/be818b5570914b4f8566020d3803c4e6", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the pink object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The pink object is cut by the machine." }, { "id": "B", "text": "The pink object is lifted by the machine." }, { "id": "C", "text": "The pink object is placed on the table." }, { "id": "D", "text": "The pink object is moved by the machine." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 932, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4517255f15d04a918653c67685cec645", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What does the skeleton figure do with the spear?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The skeleton figure raises the spear and then lowers it." }, { "id": "B", "text": "The skeleton figure spins the spear around." }, { "id": "C", "text": "The skeleton figure holds the spear upright." }, { "id": "D", "text": "The skeleton figure throws the spear." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 933, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0f4349ded7e5413895f6def949c1a10e", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the space telescope?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The space telescope rotates its solar panels." }, { "id": "B", "text": "The space telescope remains stationary." }, { "id": "C", "text": "The space telescope launches into space." }, { "id": "D", "text": "The space telescope rotates in space." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 934, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/be77fe5d25b54dde81c9a16bf41554ea", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the text on the back of the chicken costume?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "cluckin' bell taste the cock" }, { "id": "B", "text": "cluckin' bell" }, { "id": "C", "text": "taste the cock" }, { "id": "D", "text": "taste the chicken" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 935, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d52be89d681e4d669e3c7f0397de470f", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the lid of the toilet?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The lid of the toilet opens and then moves to the side." }, { "id": "B", "text": "The lid of the toilet opens and then closes." }, { "id": "C", "text": "The lid of the toilet opens and then opens again." }, { "id": "D", "text": "The lid of the toilet remains closed throughout the video." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 936, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b627b1dcbff0464386188b6a033f7bea", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the person doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The person is standing still." }, { "id": "B", "text": "The person is walking slowly." }, { "id": "C", "text": "The person is dancing." }, { "id": "D", "text": "The person is performing a sequence of tactical movements." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 937, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9637fbb8f34144718d6ef438f3c97390", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many letters are projected in the 3D holographic display?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "3" }, { "id": "C", "text": "4" }, { "id": "D", "text": "1" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 938, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4ff210083a3d4bf5a9bbe7f2632cd031", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first action that occurs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The electric toothbrush is placed on the table." }, { "id": "B", "text": "The electric toothbrush is placed in the charging stand." }, { "id": "C", "text": "The electric toothbrush is removed from the charging stand." }, { "id": "D", "text": "The electric toothbrush is turned on." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 939, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c8e3c5f2be644ba6b9715aa84495a255", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the green, frog-like Lego figure?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green, frog-like Lego figure dances and spins around." }, { "id": "B", "text": "The green, frog-like Lego figure stands up and runs away." }, { "id": "C", "text": "The green, frog-like Lego figure falls down and lies on the ground." }, { "id": "D", "text": "The green, frog-like Lego figure jumps and flies in the air." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 940, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/770d4bc57c314cb599eaed1589335f6a", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the green light in the center of the door as the door opens?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green light in the center of the door remains illuminated." }, { "id": "B", "text": "The green light in the center of the door disappears." }, { "id": "C", "text": "The green light in the center of the door changes to red." }, { "id": "D", "text": "The green light in the center of the door turns off." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 941, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/28140fa8696743e39341bad4fd96f70b", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "How many chocolate-covered wafer biscuits are scattered in various orientations?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "13" }, { "id": "B", "text": "10" }, { "id": "C", "text": "12" }, { "id": "D", "text": "11" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 942, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ad59aeffaa4f44cebc006c3191bdcaf4", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the yellow sphere?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The yellow sphere transforms into a different object." }, { "id": "B", "text": "The yellow sphere moves away from the white rabbit figure and blue cones on the blue platform." }, { "id": "C", "text": "The yellow sphere moves towards the white rabbit figure and blue cones on the blue platform." }, { "id": "D", "text": "The yellow sphere remains stationary throughout the video." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 943, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cf8bc9db1648493d9ef2dbd2b63b9694", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the primary function of the green lever in the mechanical assembly?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green lever is used to rotate the gray gear." }, { "id": "B", "text": "The green lever is used to rotate the yellow triangular base." }, { "id": "C", "text": "The green lever is used to rotate the blue gear." }, { "id": "D", "text": "The green lever is used to rotate the black rod." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 944, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/09998ed235134ae39ae4435a136bcb51", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the dark segment on the cylindrical object as the video progresses?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The dark segment on the cylindrical object remains stationary." }, { "id": "B", "text": "The dark segment on the cylindrical object disappears." }, { "id": "C", "text": "The dark segment on the cylindrical object moves from the back to the front." }, { "id": "D", "text": "The dark segment on the cylindrical object moves inside and then backs to the surface." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 945, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4f10d2dbe57f4e21a956263eb84681ce", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the character's primary color and what is the shape of its body?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character's primary color is blue and it has a round body." }, { "id": "B", "text": "The character's primary color is green and it has a triangular body." }, { "id": "C", "text": "The character's primary color is turquoise and it has a square body." }, { "id": "D", "text": "The character's primary color is turquoise and it has a rectangular body." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 946, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1512c3b5ac404a908b311a657c6f1c46", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the first vegetable under the egg?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "carrot." }, { "id": "B", "text": "tomato." }, { "id": "C", "text": "lettuce." }, { "id": "D", "text": "cucumber." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 947, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e9d7b46f12d44b6cafe321bdbf6f4731", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the main feature of the brown, humanoid figure?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The brown, humanoid figure has a bunny drawing on its chest." }, { "id": "B", "text": "The brown, humanoid figure is wearing a red heart." }, { "id": "C", "text": "The brown, humanoid figure is dancing." }, { "id": "D", "text": "The brown, humanoid figure has bear-like ears." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 948, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/059adc84fcdf4d8f8f3c79f268167b3b", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the bird?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bird stays in the same position." }, { "id": "B", "text": "The bird flies around the lighthouse." }, { "id": "C", "text": "The bird lands on the lighthouse." }, { "id": "D", "text": "The bird flies away from the lighthouse." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 949, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/426e2fe4a13c4cc78b280b4d76c72007", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the man holding in his right arm?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "a brown scarf" }, { "id": "B", "text": "a brown umbrella" }, { "id": "C", "text": "a brown bag" }, { "id": "D", "text": "a brown coat" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 950, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6a08e1f729194dba9f80eef2af5d002b", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure is walking away from the wall." }, { "id": "B", "text": "The figure is running between two white walls." }, { "id": "C", "text": "The figure is standing still against the wall." }, { "id": "D", "text": "The figure is jumping over the wall." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 951, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a2b1d467e4c847abbc31f3f1b84d0127", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action is the spiked, icy character performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is holding a large spiked ball and a hammer." }, { "id": "B", "text": "The character is holding a large spiked ball and a hammer, but it is sitting still." }, { "id": "C", "text": "The character is holding a large spiked ball and a hammer, but it is jumping." }, { "id": "D", "text": "The character is holding a large spiked ball and a hammer, but it is not moving." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 952, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/720ddbe936504ae194d514876b235067", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the rear door of the red vehicle with white stripes and solar panels on the roof?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The rear door opens and remains opened." }, { "id": "B", "text": "The rear door opens and then closes again." }, { "id": "C", "text": "The rear door remains closed." }, { "id": "D", "text": "The rear door opens and then opens again." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 953, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/58f7e01312434ea7bd55162afe7a6117", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary motion of the red spherical object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The red spherical object is shrinking." }, { "id": "B", "text": "The red spherical object is moving forward." }, { "id": "C", "text": "The red spherical object is rotating." }, { "id": "D", "text": "The red spherical object is expanding." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 954, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c4b1af1928af4959842a02b564c5200f", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many times does the person turn around?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "2" }, { "id": "C", "text": "1" }, { "id": "D", "text": "0" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 955, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5cf229c870224fe1984708ba66e9c734", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the box?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The box opens." }, { "id": "B", "text": "The box spins around." }, { "id": "C", "text": "The box moves up and down." }, { "id": "D", "text": "The box changes color." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 956, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3901104dc8104a348fd2625b58dd7d50", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many trams are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "1" }, { "id": "C", "text": "2" }, { "id": "D", "text": "3" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 957, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5af7ec6f5f20447faef020a442eb3312", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure is standing still and looking around." }, { "id": "B", "text": "The humanoid figure is walking on a flat surface." }, { "id": "C", "text": "The humanoid figure is jumping and flipping." }, { "id": "D", "text": "The humanoid figure is raising its right arm." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 958, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4ef1452fa2ab4c47abed001fade8f903", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the person doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The person is lying down and then running." }, { "id": "B", "text": "The person is lying down and then sitting up." }, { "id": "C", "text": "The person is lying down and then sleeping." }, { "id": "D", "text": "The person is lying down and then standing up." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 959, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/425b9104f2524a3599b6b3ed741a8c86", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the robot doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robot is turning around." }, { "id": "B", "text": "The robot is standing still." }, { "id": "C", "text": "The robot is jumping up and down." }, { "id": "D", "text": "The robot is marching forward." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 960, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d5fbd1f92ba64978954f0aef0d178e37", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the object doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object is floating in the air without any movement." }, { "id": "B", "text": "The object is stationary and not moving." }, { "id": "C", "text": "The object is spinning in a circle." }, { "id": "D", "text": "The object is hovering and moving horizontally." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 961, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0a881e89e6ca46e1a4586786caa702bb", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the tag attached to the bunny head doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The tag is floating up and down." }, { "id": "B", "text": "The tag is spinning around." }, { "id": "C", "text": "The tag is attached to the bunny head and not moving." }, { "id": "D", "text": "The tag is hanging and shaking following the head's motion." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 962, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e2526a18ec1f409bb0fd1a37c680aef5", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which action is Spider-Man performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Spider-Man is jumping." }, { "id": "B", "text": "Spider-Man is climbing a wall." }, { "id": "C", "text": "Spider-Man is standing still." }, { "id": "D", "text": "Spider-Man is twisting his waist and expanding his chest." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 963, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b6b67e39466642dcb677629ae103de64", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the black spherical object doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The black spherical object is standing still." }, { "id": "B", "text": "The black spherical object is jumping up and down." }, { "id": "C", "text": "The black spherical object is spinning around." }, { "id": "D", "text": "The black spherical object is moving in a loop." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 964, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3321845e6d374ca284ac2a4ccd0e8c50", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first action that the robotic figure performs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robotic figure lifts its body." }, { "id": "B", "text": "The robotic figure moves its arms." }, { "id": "C", "text": "The robotic figure turns its head." }, { "id": "D", "text": "The robotic figure moves its legs." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 965, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e7b0f773f9e8451983b919aaf8e36cd5", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the blue lizard-like creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The blue lizard-like creature is sitting and holding a shield and a sword." }, { "id": "B", "text": "The blue lizard-like creature is walking and holding a shield and a sword." }, { "id": "C", "text": "The blue lizard-like creature is running and holding a shield and a sword." }, { "id": "D", "text": "The blue lizard-like creature is jumping and holding a shield and a sword." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 966, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/63b30bdfa1d64b019fa2750902e5a5db", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the humanoid bee character's antennae?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The color of the humanoid bee character's antennae is blue." }, { "id": "B", "text": "The color of the humanoid bee character's antennae is red." }, { "id": "C", "text": "The color of the humanoid bee character's antennae is yellow." }, { "id": "D", "text": "The color of the humanoid bee character's antennae is green." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 967, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1e0371ad73e14e29a13464a54105e80a", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many lifebuoys on the fishing boat?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "6" }, { "id": "B", "text": "3" }, { "id": "C", "text": "7" }, { "id": "D", "text": "9" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 968, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ebaebf253a9548358f157ee46e822df4", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing with the sword?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is swinging the sword." }, { "id": "B", "text": "The character is throwing the sword." }, { "id": "C", "text": "The character is sheathing the sword." }, { "id": "D", "text": "The character is holding the sword in one hand." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 969, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5fa78231c06d4622860dee9e4cb3cfa6", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "What is this person doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "dancing" }, { "id": "B", "text": "walking" }, { "id": "C", "text": "jumpping" }, { "id": "D", "text": "Sitting" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 970, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4e82ebfdbb19428993866c0a75f32df9", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "How many black cubes are floating above the reflective bowl at the begining?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "6" }, { "id": "C", "text": "4" }, { "id": "D", "text": "5" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 971, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b993e97a84c3444b88f30b5f405ecc2e", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the relationship between the two animated characters?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The two animated characters are performing synchronized dance moves." }, { "id": "B", "text": "The two animated characters are engaged in a fight." }, { "id": "C", "text": "The two animated characters are standing still and not interacting." }, { "id": "D", "text": "The two animated characters are unrelated and do not interact with each other." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 972, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ad28e3a032f443459811c81c9f94c4a9", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the blue bear-like character holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "a red, white, and blue balloon" }, { "id": "B", "text": "a red, white, and blue flag" }, { "id": "C", "text": "a large rocket" }, { "id": "D", "text": "a small, striped rocket" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 973, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/703b68dc8d874c80b19f0e8056f6731d", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary action of the knife?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The knife is rotating." }, { "id": "B", "text": "The knife is being sliced through an object." }, { "id": "C", "text": "The knife is being held stationary." }, { "id": "D", "text": "The knife is being sharpened." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 974, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/38b8ba43ed6f45228762e2d3ed2cae6c", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the internal component located at the bottom of the spherical object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "White" }, { "id": "B", "text": "Red" }, { "id": "C", "text": "Black" }, { "id": "D", "text": "Blue" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 975, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/32df762c67bb46609e2ffb449de18ad6", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing in the first frame?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is sitting down." }, { "id": "B", "text": "The character is standing with arms slightly extended." }, { "id": "C", "text": "The character is jumping in the air." }, { "id": "D", "text": "The character is running forward." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 976, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f926050cf1524914944b6e113514c549", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which of the following actions is the yellow robot performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The yellow robot is standing still on the platform." }, { "id": "B", "text": "The yellow robot is moving its arms and upper body." }, { "id": "C", "text": "The yellow robot is spinning around on the platform." }, { "id": "D", "text": "The yellow robot is lifting its legs off the platform." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 977, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/96dec386b43649afa124c2c03a178fac", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many times does the soccer player change his running direction?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "4" }, { "id": "C", "text": "0" }, { "id": "D", "text": "3" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 978, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2cf45d827b9847899dd14c2b367cce76", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the penguin-like character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The penguin-like character is flying in the air." }, { "id": "B", "text": "The penguin-like character is looking around." }, { "id": "C", "text": "The penguin-like character is sitting still." }, { "id": "D", "text": "The penguin-like character is walking forward." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 979, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/609cfdf8c1ac444798fadf6078be384a", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first action that occurs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The chair starts to move forward." }, { "id": "B", "text": "The chair starts to move backward." }, { "id": "C", "text": "The chair starts to rotate." }, { "id": "D", "text": "The chair starts to move sideways." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 980, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/94de4e90a4654435ba42765a725e7aa3", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many legs does the mechanical walker have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "6" }, { "id": "C", "text": "8" }, { "id": "D", "text": "4" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 981, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e3085c113b7a4bb8b2fe233a218df4b6", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "Which color is not present on the gears attached to the blue panel?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Yellow" }, { "id": "B", "text": "Red" }, { "id": "C", "text": "purple" }, { "id": "D", "text": "Green" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 982, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/50d4bb6e4d9743d1a888b9b9a2f9a5fe", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is walking forward." }, { "id": "B", "text": "The character is sitting down." }, { "id": "C", "text": "The character is jumping in the air." }, { "id": "D", "text": "The character is striking a pose with one hand on her waist." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 983, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/612f5041ce914fa18296bb04962d0225", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What color does the object transition to during the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object transitions to blue." }, { "id": "B", "text": "The object transitions to yellow." }, { "id": "C", "text": "The object transitions to green." }, { "id": "D", "text": "The object transitions to pink." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 984, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7e85c836f5ae4db7b45d027d7853334f", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "In the video, what is the small orange object that the purple humanoid figure is holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The small orange object is a ball." }, { "id": "B", "text": "The small orange object is a flower." }, { "id": "C", "text": "The small orange object is a heart." }, { "id": "D", "text": "The small orange object is a key." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 985, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9cd6e456b86e44eb8a68603455142761", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary action depicted?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The stone monument is being lifted." }, { "id": "B", "text": "The stone monument is being carved." }, { "id": "C", "text": "The stone monument is rotating." }, { "id": "D", "text": "The stone monument is standing still." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 986, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ebbab398045f4ebda00358238c91f528", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the person doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Cleaning the windshield of the red vintage car" }, { "id": "B", "text": "Opening the trunk of the red vintage car" }, { "id": "C", "text": "Inspecting the engine of the red vintage car" }, { "id": "D", "text": "Adjusting the radio of the red vintage car" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 987, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2e2a03b0f08148119793c3f3c355579e", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many mechanical legs does the yellow cylindrical object have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "3" }, { "id": "C", "text": "4" }, { "id": "D", "text": "2" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 988, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/654790ee731a4476bd6ae1ea368cb198", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many teeth does the humanoid figure have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "3" }, { "id": "C", "text": "4" }, { "id": "D", "text": "5" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 989, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3cdba77ce6c447199605cee23ab4eff4", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is holding a helmet in his hand." }, { "id": "B", "text": "The character is walking or running." }, { "id": "C", "text": "The character is brandishing a blue sword." }, { "id": "D", "text": "The character is wearing gray armor with a blue visor." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 990, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ed151fd59a894b3290d9eee3f716f0bf", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the dog figurine?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The dog figurine changes its color." }, { "id": "B", "text": "The dog figurine remains stationary." }, { "id": "C", "text": "The dog figurine grows in size." }, { "id": "D", "text": "The dog figurine wags its tail." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 991, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/561398c9ffba4c1181a565e2a66cc666", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the person wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The person is wearing a red hoodie, blue shorts, and black sneakers." }, { "id": "B", "text": "The person is wearing a white hoodie, black shorts, and white sneakers." }, { "id": "C", "text": "The person is wearing a gray hoodie, black shorts, and white sneakers." }, { "id": "D", "text": "The person is wearing a black hoodie, white shorts, and black sneakers." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 992, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/51220e15387d4f39b0dce750ef2f8c72", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary action depicted?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object expands." }, { "id": "B", "text": "The object reaches out into the distance." }, { "id": "C", "text": "The object remains stationary." }, { "id": "D", "text": "The object contracts." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 993, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/05d411855efa4aa28a40dfbb48c90a18", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary action of the object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object is spinning in a circle." }, { "id": "B", "text": "The object is rotating." }, { "id": "C", "text": "The object is moving up and down." }, { "id": "D", "text": "The object is remaining stationary." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 994, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6dddee76221d4e9cba9dc3ab8a9e5649", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary motion of the spacecraft?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The spacecraft is moving in a circular pattern." }, { "id": "B", "text": "The spacecraft is stationary and not moving." }, { "id": "C", "text": "The spacecraft is flying straight in a straight line." }, { "id": "D", "text": "The spacecraft is rotating and maneuvering." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 995, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/62fc52e950374e808fe67e770ac9f7a1", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure is walking." }, { "id": "B", "text": "The humanoid figure is sitting." }, { "id": "C", "text": "The humanoid figure is sleeping." }, { "id": "D", "text": "The humanoid figure is dancing." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 996, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/311de15cfd5c491c90d5c264932f8363", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the dragon-like creature holding in its front claws?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Nothing" }, { "id": "B", "text": "only orange orbs" }, { "id": "C", "text": "only green orbs" }, { "id": "D", "text": "colorful orbs" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 997, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5ffe2612f3f0471494256e4f11561159", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the man doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The man is jumping up and down." }, { "id": "B", "text": "The man is walking forward with his hands behind his back." }, { "id": "C", "text": "The man is crouching with his fists raised." }, { "id": "D", "text": "The man is standing with his arms crossed." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 998, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9856894bcd3443d1853a6fb040882a13", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the handle-like structure on the stone block?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Brown and purple" }, { "id": "B", "text": "Gray and black" }, { "id": "C", "text": "Green and yellow" }, { "id": "D", "text": "Brown and blue" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 999, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/46a76e5f18d74591adb3a7022745a0e9", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many objects are in the frying pan at the end of the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "4" }, { "id": "C", "text": "3" }, { "id": "D", "text": "2" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1000, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/005bd47d11294778b1882c3e17991672", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the colorful frog?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The colorful frog jumps but stays in the same pad." }, { "id": "B", "text": "The colorful frog sits still on the lily pad." }, { "id": "C", "text": "The colorful frog flies away from the lily pads." }, { "id": "D", "text": "The colorful frog swims in the water." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1001, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/90c749c4ebcd4069aa538141601b0305", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the green core in the cylindrical container as the video progresses?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green core decreases in size within the cylindrical container." }, { "id": "B", "text": "The green core changes color within the cylindrical container." }, { "id": "C", "text": "The green core remains stationary within the cylindrical container." }, { "id": "D", "text": "The green core rises within the cylindrical container." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1002, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/91437baed6b04c4884638612f527a448", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many legs does the teal robot have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "2" }, { "id": "C", "text": "3" }, { "id": "D", "text": "4" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1003, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b645a00f09ab4073afae47ce58eda06c", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure is standing still and looking around." }, { "id": "B", "text": "The figure is holding a box and walking." }, { "id": "C", "text": "The figure is dancing and moving energetically." }, { "id": "D", "text": "The figure is holding its head with both hands, appearing to be in distress." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1004, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4d14272e44bd4bb6898d91b223296606", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first action that occurs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The carrying handle is placed on top of the box." }, { "id": "B", "text": "The lid of the ammunition box is opened." }, { "id": "C", "text": "The lid of the ammunition box is closed." }, { "id": "D", "text": "The carrying handle is lifted." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1005, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5e00bea324db4ae79deec5ac96d01f5c", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the handheld gaming console?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The screen of the handheld gaming console remains closed." }, { "id": "B", "text": "The screen of the handheld gaming console rotates 180 degrees." }, { "id": "C", "text": "The screen of the handheld gaming console opens to reveal a keyboard." }, { "id": "D", "text": "The screen of the handheld gaming console closes." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1006, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4a889daa2aa4441f9ca199fe36282453", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "Where does the football land at the end of the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "on top of a bench." }, { "id": "B", "text": "on top of Purple trampoline" }, { "id": "C", "text": "on top of brick-wall" }, { "id": "D", "text": "on top of brick-wall" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1007, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7fc707a702b94d14923290bdae836894", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which part of the car opens first?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The rear door" }, { "id": "B", "text": "The hood" }, { "id": "C", "text": "The roof" }, { "id": "D", "text": "The front door, rear door, hood, and trunk" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1008, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/023e661a18f6473e8476f9845b56a4b5", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the character's bow tie?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character's bow tie is blue." }, { "id": "B", "text": "The character's bow tie is green." }, { "id": "C", "text": "The character's bow tie is red." }, { "id": "D", "text": "The character's bow tie is orange." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1009, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e8ee13cf2e9048db88a44469f7f42c68", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first action performed by the humanoid figure?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure raises its arms." }, { "id": "B", "text": "The humanoid figure moves forward." }, { "id": "C", "text": "The humanoid figure jumps up." }, { "id": "D", "text": "The humanoid figure turns around." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1010, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0a8c2ef57b0f4fff9be2410c4a30af0a", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the wooden chest?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The wooden chest opens and closes four times." }, { "id": "B", "text": "The wooden chest opens and closes five times." }, { "id": "C", "text": "The wooden chest remains closed throughout the video." }, { "id": "D", "text": "The wooden chest opens and closes only one time." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1011, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5094ecae68cd4551845d465b99ec73d8", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure is standing still." }, { "id": "B", "text": "The figure is dancing." }, { "id": "C", "text": "The figure is performing expressive hand movements." }, { "id": "D", "text": "The figure is sitting down." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1012, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b69d149f87a94a058fc4e8adb58ebbbf", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the man doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The man is walking." }, { "id": "B", "text": "The man is dancing." }, { "id": "C", "text": "The man is eating the pizza." }, { "id": "D", "text": "The man is Act like answering a phone call" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1013, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d23d6d493bcb44268740cb24a5eac9e6", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary motion of the wooden L-shaped object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The wooden L-shaped object expands and contracts." }, { "id": "B", "text": "The wooden L-shaped object separates and then reunits." }, { "id": "C", "text": "The wooden L-shaped object remains stationary." }, { "id": "D", "text": "The wooden L-shaped object translates." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1014, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e89cbd86bb9d49a7810c7588c5ab53ab", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the shape of the ornament on top of the wooden box?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "mustache-shaped" }, { "id": "B", "text": "flower-shaped" }, { "id": "C", "text": "star-shaped" }, { "id": "D", "text": "heart-shaped" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1015, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ad2450c22d664317b36ccfd2e81016ea", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "In the video, how does the humanoid figure's stance change when aiming the rifle?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure's stance changes from a shooting stance to a more relaxed stance." }, { "id": "B", "text": "The humanoid figure's stance changes from a shooting stance to a more defensive stance." }, { "id": "C", "text": "The humanoid figure's stance changes from a shooting stance to a more aggressive stance." }, { "id": "D", "text": "The humanoid figure's stance remains the same throughout the video." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1016, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/06c54ca5319a47c8b23ffd39da741848", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the robot doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robot is flying in the air." }, { "id": "B", "text": "The robot is standing still." }, { "id": "C", "text": "The robot is sitting on a chair." }, { "id": "D", "text": "The robot is walking." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1017, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2cb5d77691aa4341af7e0590789f1768", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the person doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The person is jumping in a high-tech exoskeleton suit." }, { "id": "B", "text": "The person is standing still in a high-tech exoskeleton suit." }, { "id": "C", "text": "The person is walking in a high-tech exoskeleton suit." }, { "id": "D", "text": "The person is performing attack poses." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1018, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/67bfb8692f0344fcbcfa84fb13d614ac", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the character wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a black shirt and green pants." }, { "id": "B", "text": "The character is wearing a red shirt and blue pants." }, { "id": "C", "text": "The character is wearing a blue shirt and green pants." }, { "id": "D", "text": "The character is wearing a green shirt and red pants." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1019, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/787836d479f5424db8a021119eafebff", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first action performed by the red and green figure?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The red and green figure punches the white figure." }, { "id": "B", "text": "The white figure kicks the red and green figure." }, { "id": "C", "text": "The white figure punches the red and green figure." }, { "id": "D", "text": "The red and green figure tries to kick the white figure and the white figure punches the red and green figure." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1020, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ebf9f09efabd46d0a9f5ee23848f40bb", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the animatronic figure wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The animatronic figure is wearing a shirt with sleeves." }, { "id": "B", "text": "The animatronic figure is wearing a scarf." }, { "id": "C", "text": "The animatronic figure is wearing a hat." }, { "id": "D", "text": "The animatronic figure is wearing a bow tie." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1021, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0e47f3addcc2435ba27cb90de9b5914f", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many cars pass by the 'Monster Hunter' building?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "3" }, { "id": "C", "text": "2" }, { "id": "D", "text": "4" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1022, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1fa3c0b99afe48118b232390c98ec2ca", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is sleeping and snoring." }, { "id": "B", "text": "The creature is eating a small ball." }, { "id": "C", "text": "The creature is sitting and looking around." }, { "id": "D", "text": "The creature is sticking out its mechanical tongue" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1023, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2f2e82108ecc4645aeb091b5a9b40671", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing with the wrench-like tool?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character holds the wrench-like tool on the ground with left hand." }, { "id": "B", "text": "The character is throwing the wrench-like tool away." }, { "id": "C", "text": "The character is using the wrench-like tool to fix something." }, { "id": "D", "text": "The character is holding the wrench-like tool in their hand." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1024, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0f7737503c624c439ee3a49922811a43", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the fox-like character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The fox-like character is sitting and resting." }, { "id": "B", "text": "The fox-like character is dancing and spinning." }, { "id": "C", "text": "The fox-like character is walking and running." }, { "id": "D", "text": "The fox-like character is jumping and flipping." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1025, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/805be6f9445e42cebdc5563b571a7b96", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the robot doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robot is spinning around." }, { "id": "B", "text": "The robot is sitting still." }, { "id": "C", "text": "The robot is spinning its left mechanical hand." }, { "id": "D", "text": "The robot is waving its mechanical arm." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1026, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7075b6c91f094eeca64df9736231c2c0", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many stars are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "5" }, { "id": "C", "text": "3" }, { "id": "D", "text": "6" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1027, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3fddc1e9ae1e4cb0bd39cbbd2c7fcf4f", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the table?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The table opens up to reveal a lower shelf." }, { "id": "B", "text": "The table closes and hides the lower shelf." }, { "id": "C", "text": "The table rotates 180 degrees." }, { "id": "D", "text": "The table remains closed and does not change." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1028, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3279369cb5a04838a859bd69823d545c", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the turtle?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The turtle stands still and does not move." }, { "id": "B", "text": "The turtle walks and moves its flippers." }, { "id": "C", "text": "The turtle swims in the water." }, { "id": "D", "text": "The turtle flies through the air." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1029, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/660a07034cf9469f80c466236475f3bb", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What color is this person's tie?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "blue" }, { "id": "B", "text": "black" }, { "id": "C", "text": "reddish-purple" }, { "id": "D", "text": "white" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1030, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3fe5df6e2b774fa5830734818b8b2d1b", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the blue container?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The blue container remains unchanged throughout the video." }, { "id": "B", "text": "The blue container retracts a hidden brown drawer." }, { "id": "C", "text": "The blue container expands to reveal a hidden brown drawer." }, { "id": "D", "text": "The blue container transforms into a different shape." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1031, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b0301ec0c52f4e8cbef0acae03beb33e", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many wheels does the vintage steampunk vehicle have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The vintage steampunk vehicle has six wheels." }, { "id": "B", "text": "The vintage steampunk vehicle has four wheels." }, { "id": "C", "text": "The vintage steampunk vehicle has ten wheels." }, { "id": "D", "text": "The vintage steampunk vehicle has eight wheels." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1032, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3a531202abc74a27a8ac0f7dee5337ba", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the shape of the gemstone on the ring?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Round" }, { "id": "B", "text": "Oval" }, { "id": "C", "text": "Dome-shaped" }, { "id": "D", "text": "Square" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1033, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a194f0c0efcb4c30a4cd4854242e513f", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the gorilla wearing on its neck?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The gorilla is wearing a red bandana." }, { "id": "B", "text": "The gorilla is wearing a blue scarf." }, { "id": "C", "text": "The gorilla is wearing a black collar." }, { "id": "D", "text": "The gorilla is wearing a red tie with yellow letters 'DK' on it." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1034, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b0fe537b26c140b5be28e4d244b3c062", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is flying." }, { "id": "B", "text": "The creature is sitting." }, { "id": "C", "text": "The creature is walking." }, { "id": "D", "text": "The creature is jumping." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1035, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b6115b2e1bd64d4fb10d0ecd6646f863", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the person wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The person is wearing a blue shirt and black pants." }, { "id": "B", "text": "The person is wearing a red shirt and black pants." }, { "id": "C", "text": "The person is wearing a white shirt and black pants." }, { "id": "D", "text": "The person is wearing a black pants only." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1036, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/684f3059cc7d41a79c74f2b937d31959", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the topmost feature of the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A triangular and pointed top." }, { "id": "B", "text": "A circular and smooth top without any pointed elements." }, { "id": "C", "text": "A spherical and ear-like structure surrounded by pointed elements." }, { "id": "D", "text": "A flat and round top with no pointed elements." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1037, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/42321a13c3c240c5a35dd04a97d770a4", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many red cylindrical protrusions are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "4" }, { "id": "C", "text": "2" }, { "id": "D", "text": "1" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1038, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/77fe1004ed4e432dabce56a61f93b96d", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the position of the plant in the backpack?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The plant is sprouting from the top of the backpack." }, { "id": "B", "text": "The plant is hanging from the bottom of the backpack." }, { "id": "C", "text": "The plant is inside the backpack." }, { "id": "D", "text": "The plant is attached to the belt of the backpack." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1039, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6304bbc21efe43cabf0e53a43df5c7ec", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "How many metal rings are in the yellow display case?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Six" }, { "id": "B", "text": "Four" }, { "id": "C", "text": "Three" }, { "id": "D", "text": "Five" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1040, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/31182bdc05dd40ddbc7f284297e8f225", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the central feature of the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A circular blue portal surrounded by stone arches and pillars" }, { "id": "B", "text": "A floating rocky platform with a black portal surrounded by stone arches and pillars" }, { "id": "C", "text": "A rocky platform with a white portal surrounded by stone arches and pillars" }, { "id": "D", "text": "A rocky platform with a green portal surrounded by stone arches and pillars" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1041, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f064784c624742b5acd1d3fe4e7e22c2", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "What is the pink humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The pink humanoid figure is standing still." }, { "id": "B", "text": "The pink humanoid figure is raising an arm upward." }, { "id": "C", "text": "The pink humanoid figure is dancing and jumping." }, { "id": "D", "text": "The pink humanoid figure rotates an arm counterclockwise.." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1042, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/359a17121a744e1cb1e9b5cee7b8161d", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the money bag?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The money bag expands and contracts." }, { "id": "B", "text": "The money bag spins around." }, { "id": "C", "text": "The money bag falls over." }, { "id": "D", "text": "The money bag remains stationary." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1043, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4d0933176ee447b2a0422abf9dac25e5", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the robot?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robot opens its mouth and speaks." }, { "id": "B", "text": "The robot presses a button on its side." }, { "id": "C", "text": "The robot stops and remains still." }, { "id": "D", "text": "The robot rotates and moves its antennas." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1044, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b5a81f2135f74486a816388752881d0a", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the small figure on the floating island?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The small figure raises both hands above its head and waves them." }, { "id": "B", "text": "The small figure falls off the floating island." }, { "id": "C", "text": "The small figure stands still on the floating island." }, { "id": "D", "text": "The small figure is transported to another location on the floating island." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1045, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2464212690304108bce8e344be8f14a6", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many yellow rectangular prisms are present in the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "1" }, { "id": "C", "text": "4" }, { "id": "D", "text": "2" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1046, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/72e4fe1b943e46a782ac7e1790630589", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the toy figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The toy figure is performing a dance." }, { "id": "B", "text": "The toy figure is jumping." }, { "id": "C", "text": "The toy figure is walking." }, { "id": "D", "text": "The toy figure is sitting still." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1047, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/87072288fb234226b9a3f02ae674a310", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many spheres are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "4" }, { "id": "C", "text": "2" }, { "id": "D", "text": "3" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1048, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/72b0c57fd7b2489ab12752db9d8df335", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many triangular cutouts are present on the black cube?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "8" }, { "id": "B", "text": "6" }, { "id": "C", "text": "2" }, { "id": "D", "text": "4" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1049, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b3f66d4e3dec4044b2e3acd07a4a84e4", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What action does the figure perform?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure waves its hand." }, { "id": "B", "text": "The figure throw his arm forward." }, { "id": "C", "text": "The figure points its finger." }, { "id": "D", "text": "The figure raises its arm." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1050, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7ae35d274575433aa94f494d1f01c32c", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure is standing still." }, { "id": "B", "text": "The humanoid figure is running." }, { "id": "C", "text": "The humanoid figure is sitting on the ground." }, { "id": "D", "text": "The humanoid figure is walking." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1051, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/616e0dd72bf04503a350d774f48a6e6c", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the book?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The book opens and shows a page with a dog picture on it." }, { "id": "B", "text": "The book remains closed throughout the video." }, { "id": "C", "text": "The book opens and shows a page with a cat picture on it." }, { "id": "D", "text": "The book flips through pages and then closes completely." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1052, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5adf88414add479f940a8dded4815db0", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Which of the following best describes the relationship between the handgun and the magazine?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The magazine is already loaded into the handgun." }, { "id": "B", "text": "The magazine is being loaded into the handgun." }, { "id": "C", "text": "The magazine remains still." }, { "id": "D", "text": "The magazine is detached from the handgun." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1053, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/481837e4f1c2403b9bf650fd3d199e9e", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the order of the colors on the cylindrical object from top to bottom?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Yellow, Brown, Blue" }, { "id": "B", "text": "Brown, Blue, Yellow" }, { "id": "C", "text": "Blue, Brown, Yellow" }, { "id": "D", "text": "Blue, Yellow, Brown" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1054, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dd2b505ec642481fac1b4c9b17362c1f", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many black rings are encircling the polyhedral object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "1" }, { "id": "C", "text": "2" }, { "id": "D", "text": "3" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1055, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a9467739bb9b44de82eaea382d3c02d8", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many sliding doors are present in the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "3" }, { "id": "C", "text": "1" }, { "id": "D", "text": "4" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1056, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1020ea92f41240b78688adc0915bd981", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure is jumping." }, { "id": "B", "text": "The figure is waving its arms." }, { "id": "C", "text": "The figure is standing still." }, { "id": "D", "text": "The figure is running." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1057, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b5fa52f611f34460819ee643755a7e00", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A spherical monster with a non-human face texture and four elongated, flexible tails." }, { "id": "B", "text": "A humanoid monster with a human face texture and four elongated, flexible limbs." }, { "id": "C", "text": "A cylindrical monster with a human face texture and four flexible hands." }, { "id": "D", "text": "A rectangular monster with a human face texture and four rigid fingers." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1058, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/66e8db08bd214a6db4627b0f84a583a1", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the figure holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure is holding a knife." }, { "id": "B", "text": "The figure is holding a black sword." }, { "id": "C", "text": "The figure is holding two glowing green swords." }, { "id": "D", "text": "The figure is holding a green sword." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1059, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/319a57619948416288cc2d2880c70a4a", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the wooden toy biplane?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The wooden toy biplane remains stationary." }, { "id": "B", "text": "The wooden toy biplane flies away." }, { "id": "C", "text": "The wooden toy biplane's propeller spins." }, { "id": "D", "text": "The wooden toy biplane rotates in mid-air." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1060, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c2a965feaa4d430d944e4dcafc64f0b0", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "How many drawers does the white wooden dresser have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The white wooden dresser has four drawers." }, { "id": "B", "text": "The white wooden dresser has three drawers." }, { "id": "C", "text": "The white wooden dresser has two drawers." }, { "id": "D", "text": "The white wooden dresser has one drawer." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1061, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5fa9315fd72e4008a5d8114abbc64076", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the person holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "a bag" }, { "id": "B", "text": "a small animal" }, { "id": "C", "text": "a flower" }, { "id": "D", "text": "a book" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1062, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/93a4f1cf02864ec8bdee7a85b4edcfa3", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the green, muscular creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green, muscular creature is running." }, { "id": "B", "text": "The green, muscular creature is walking forward." }, { "id": "C", "text": "The green, muscular creature is standing still." }, { "id": "D", "text": "The green, muscular creature is jumping." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1063, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/34e0c33b1f98407f81c3f7eb43ba9499", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary color of the lights on the spaceship?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Red" }, { "id": "B", "text": "Green" }, { "id": "C", "text": "Yellow" }, { "id": "D", "text": "Blue" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1064, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5254fba825584d53aa9f2626d5f87c76", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the blue humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The blue humanoid figure is standing still and looking around." }, { "id": "B", "text": "The blue humanoid figure is jumping and flipping." }, { "id": "C", "text": "The blue humanoid figure is walking and running." }, { "id": "D", "text": "The blue humanoid figure is tilting its head in various directions." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1065, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ad0b3e1a45874af099acc10abada1709", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is running." }, { "id": "B", "text": "The creature is eating." }, { "id": "C", "text": "The creature is sleeping." }, { "id": "D", "text": "The creature is looking around." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1066, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/04e2f89dff504a1db0d62c2fc0788d6e", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the shape of the base of the campfire?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "octagonal" }, { "id": "B", "text": "square" }, { "id": "C", "text": "hexagonal" }, { "id": "D", "text": "circular" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1067, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e57003601cf340efad9f4e91a10ceb99", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many candy canes are?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "7" }, { "id": "B", "text": "5" }, { "id": "C", "text": "6" }, { "id": "D", "text": "8" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1068, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/10081d39f6a340509ece409073a9213f", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "How many arrows stuck in the shield?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1." }, { "id": "B", "text": "2." }, { "id": "C", "text": "3." }, { "id": "D", "text": "4." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1069, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5e7806778da74d39bdcf3e6ea3eaedec", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the mirror on the vanity table as it rotates?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The mirror flips upside down." }, { "id": "B", "text": "The mirror opens and closes." }, { "id": "C", "text": "The mirror remains stationary." }, { "id": "D", "text": "The mirror rotates with the table." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1070, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cb1858fed9f947eba352df0831386333", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is sitting still." }, { "id": "B", "text": "The character is sleeping." }, { "id": "C", "text": "The character is running." }, { "id": "D", "text": "The character is spinning." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1071, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/aad0f733a4bb4830921941899227cdc9", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the creature's body color and what are its wings like?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature has a brown, segmented body and large green wings with simple patterns." }, { "id": "B", "text": "The creature has a green, segmented body and large brown wings with intricate patterns." }, { "id": "C", "text": "The creature has a brown, segmented body and large green wings with intricate patterns." }, { "id": "D", "text": "The creature has a green, segmented body and large brown wings with simple patterns." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1072, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/075f198ddb074abbb8648a4bdd8d788e", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure is walking forward." }, { "id": "B", "text": "The humanoid figure is standing still." }, { "id": "C", "text": "The humanoid figure is stretching his body." }, { "id": "D", "text": "The humanoid figure is jumping." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1073, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f72ae700198740dfb5c0a5fce8a99729", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many blue butterflies are flying around the bouquet of flowers?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "3" }, { "id": "C", "text": "5" }, { "id": "D", "text": "4" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1074, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6b6c83e2d10f4d348e46b03994ccd8c4", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "How many Matryoshka dolls are there?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "4" }, { "id": "C", "text": "2" }, { "id": "D", "text": "3" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1075, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a4f1586d18cd48a7bbdbe6f9c1ad07e7", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the number of rings in the cylindrical coil?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "6" }, { "id": "C", "text": "10" }, { "id": "D", "text": "4" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1076, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f58788328c4944e99e67e65e07f7460f", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many keys are visible on the person's waist?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "4" }, { "id": "C", "text": "3" }, { "id": "D", "text": "2" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1077, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b4201a1f2d4f4231aca60b45feaa8a47", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the creature holding in its hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is holding a stick in its hand." }, { "id": "B", "text": "The creature is holding nothing." }, { "id": "C", "text": "The creature is holding a chain in its hand." }, { "id": "D", "text": "The creature is holding a knife in its hand." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1078, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/bbafd60b419d4533a8aff5460a5ef0a4", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the object doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object is bending and twisting energetically." }, { "id": "B", "text": "The object is moving in a straight line." }, { "id": "C", "text": "The object is spinning in a circle." }, { "id": "D", "text": "The object is standing still." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1079, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c8f63f71397b4580bbcd3a6aa3824995", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the wooden figure when the crank is turned?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The wooden figure kicks its legs" }, { "id": "B", "text": "The wooden figure moves up and down on the wooden box." }, { "id": "C", "text": "The wooden figure remains stationary on the wooden box." }, { "id": "D", "text": "The wooden figure turns around on the wooden box." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1080, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/797ae02b74154dc38cc47c902046f057", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first action performed?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Removing the bullets from the handgun" }, { "id": "B", "text": "Ejecting the magazine from the handgun" }, { "id": "C", "text": "Removing the left hand holding the handgun" }, { "id": "D", "text": "Firing the handgun" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1081, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/25a8e43a08554fb59def8f0e3f8eb4d1", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the figure's movement pattern?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure is depicted walking in a straight line." }, { "id": "B", "text": "The figure is depicted jumping up and down." }, { "id": "C", "text": "The figure is depicted sitting still." }, { "id": "D", "text": "The figure is depicted running in a circular pattern." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1082, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cbd85e5090e842ff9954ede2e4ff574d", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What logo is on the clothing of the character" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "a yellow toy with a bib" }, { "id": "B", "text": "a red ball with eyes" }, { "id": "C", "text": "let's eat" }, { "id": "D", "text": "just do it" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1083, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cecf554b48ca4356b3e5c7c50ab8c8c2", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "In the video, what is the blocky humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The blocky humanoid figure is sitting and resting." }, { "id": "B", "text": "The blocky humanoid figure is jumping and running." }, { "id": "C", "text": "The blocky humanoid figure is standing and walking." }, { "id": "D", "text": "The blocky humanoid figure is kneeling." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1084, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/202d64a2fca34a7aa20143ba9b7ddfe5", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many legs does the blocky figure have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "5" }, { "id": "C", "text": "2" }, { "id": "D", "text": "4" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1085, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/adc2c82a953c47c4b197a8a42da12460", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the cylindrical black object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cylindrical black object opens and closes." }, { "id": "B", "text": "The cylindrical black object moves to a different location." }, { "id": "C", "text": "The cylindrical black object changes color." }, { "id": "D", "text": "The cylindrical black object spins around." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1086, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d7e5ac4a87644fd19b3ecfbb41dc39ef", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the cartoon character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cartoon character is jumping in the air." }, { "id": "B", "text": "The cartoon character is standing with hands on hips." }, { "id": "C", "text": "The cartoon character is sitting down." }, { "id": "D", "text": "The cartoon character is dancing." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1087, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f73f09fba1b040b1a35ce31d0f5f81e7", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first action that occurs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The oven light turns on." }, { "id": "B", "text": "The stove top is lit." }, { "id": "C", "text": "The bread is placed in the oven." }, { "id": "D", "text": "The oven door opens." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1088, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/50fc8e75926841e68ec2b9434cf0315c", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the cat-like character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cat-like character is waving its hand." }, { "id": "B", "text": "The cat-like character is playing with a toy." }, { "id": "C", "text": "The cat-like character is eating a snack." }, { "id": "D", "text": "The cat-like character is sleeping." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1089, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3bf2121b881c43edb85061498ab03b5a", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the number being displayed?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "9" }, { "id": "B", "text": "3" }, { "id": "C", "text": "7" }, { "id": "D", "text": "5" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1090, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/692fbd057f7b479da31b76bf866cbb3d", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many hanging purple droplets are there?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "There are 5 hanging purple pods." }, { "id": "B", "text": "There are 10 hanging purple pods." }, { "id": "C", "text": "There are 4 hanging purple pods." }, { "id": "D", "text": "There are 6 hanging purple pods." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1091, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/77cf520d1b664bf39473d455f5e59e34", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the larger bear holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The larger bear is holding small boats." }, { "id": "B", "text": "The larger bear is holding small flowers." }, { "id": "C", "text": "The larger bear is holding small houses." }, { "id": "D", "text": "The larger bear is holding small animals." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1092, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4cd718ecf5c443e4a62c6d8e4140ed33", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the penguin doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The penguin is sitting on a snowman." }, { "id": "B", "text": "The penguin is flying a kite." }, { "id": "C", "text": "The penguin is riding a sled." }, { "id": "D", "text": "The penguin is standing next to a blue vending machine." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1093, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/88f8d925cc7e4af69518210e6ce3c1d5", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the knight doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The knight is walking." }, { "id": "B", "text": "The knight is standing up." }, { "id": "C", "text": "The knight is jumping." }, { "id": "D", "text": "The knight is falling to the ground." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1094, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/74ee92cf4d224af4bacfb234b41d6708", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the vehicle doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The vehicle is flying." }, { "id": "B", "text": "The vehicle is shaking." }, { "id": "C", "text": "The vehicle is moving forward." }, { "id": "D", "text": "The vehicle is stationary." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1095, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d7e1ab755c264455a116085daba3041c", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the humanoid robot wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid robot is wearing a hat, a black vest, and a red shirt." }, { "id": "B", "text": "The humanoid robot is wearing a hat, a gray vest, and a green shirt." }, { "id": "C", "text": "The humanoid robot is wearing a hat, a white vest, and a yellow shirt." }, { "id": "D", "text": "The humanoid robot is wearing a hat, a brown vest, and a blue shirt." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1096, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/38f052a2027346e2943b4c76d2572415", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which part of the steampunk-style revolver is moving?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The barrels are spinning." }, { "id": "B", "text": "The trigger is moving." }, { "id": "C", "text": "The barrel is stationary." }, { "id": "D", "text": "The grip is moving." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1097, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/675050a5062a457a8cb53cc44427830e", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is placing its hand on the rock and turning its head side to side to observe the surroundings." }, { "id": "B", "text": "The creature is standing still." }, { "id": "C", "text": "The creature is jumping over the rock." }, { "id": "D", "text": "The creature is pulling the rock." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1098, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d1c239fa4a6647ce874865e833d36285", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the object doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object is floating in the air." }, { "id": "B", "text": "The object is bouncing on the green platform." }, { "id": "C", "text": "The object is sliding on the green platform." }, { "id": "D", "text": "The object is spinning on the green platform." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1099, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/bb7af5de5bf14193a53f28c0853b48dd", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the robotic arm doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Holding a sign with text" }, { "id": "B", "text": "Adjusting the position of the Christmas tree" }, { "id": "C", "text": "Removing the star from the Christmas tree" }, { "id": "D", "text": "Decorating the banner near Christmas tree" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1100, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/75a4bb26da7f48819d60c3e79d5c6365", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the car?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The car opens its drive-side door and the passenger-side door closes." }, { "id": "B", "text": "The car opens its doors and the trunk opens." }, { "id": "C", "text": "The car opens its the passenger-side door." }, { "id": "D", "text": "The car opens its doors and the hood opens." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1101, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/431b2349ddf64008a9108a19f11b38bd", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many objects does the character hold?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character holds two objects." }, { "id": "B", "text": "The character holds four objects." }, { "id": "C", "text": "The character holds three objects." }, { "id": "D", "text": "The character holds one object." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1102, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/276dbd9ed26540b1ac605bca7893f310", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the bee doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Sitting still" }, { "id": "B", "text": "Fluttering its small wings" }, { "id": "C", "text": "Walking on the ground" }, { "id": "D", "text": "Eating nectar" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1103, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ed7a882b59f4432da931bba54737ae10", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the swords?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The swords move closer together." }, { "id": "B", "text": "The swords rotate and flip." }, { "id": "C", "text": "The swords remain static." }, { "id": "D", "text": "The swords extend blades from their side." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1104, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/25dd2f332c1a41d9843b2a75a1f39ad2", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is jumping." }, { "id": "B", "text": "The character is running." }, { "id": "C", "text": "The character is walking." }, { "id": "D", "text": "The character is sitting." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1105, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a915d9179fe6422b9d669a3a0d726b8e", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the rhinoceros doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The rhinoceros is standing still." }, { "id": "B", "text": "The rhinoceros is running." }, { "id": "C", "text": "The rhinoceros is jumping." }, { "id": "D", "text": "The rhinoceros is walking." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1106, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dfd3285cb200436aabc0416e28b3add1", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure is standing still." }, { "id": "B", "text": "The figure is waving arms." }, { "id": "C", "text": "The figure is spinning around." }, { "id": "D", "text": "The figure is jumping up and down." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1107, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/04cabed5217a46f4aa75580b5643e740", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the man wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The man is wearing a patterned shirt and dark pants." }, { "id": "B", "text": "The man is wearing a solid-colored shirt and dark pants." }, { "id": "C", "text": "The man is wearing a casual shirt and dark pants." }, { "id": "D", "text": "The man is wearing a striped shirt and dark pants." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1108, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/18a72bc33a5a4533a0aa83aef654a76a", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the 4D object holding in its hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The 4D object is holding a red shield." }, { "id": "B", "text": "The 4D object is holding a large red sword." }, { "id": "C", "text": "The 4D object is holding a small blue sword." }, { "id": "D", "text": "The 4D object is holding a red cape." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1109, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dcad605773944292a309a2bf964ba7be", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure is sitting and resting." }, { "id": "B", "text": "The humanoid figure is standing and looking around." }, { "id": "C", "text": "The humanoid figure is running and jumping." }, { "id": "D", "text": "The humanoid figure is crouching and moving slightly in place." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1110, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4e184c2d22e9449ab81678fe29eca5ba", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the sequence of events?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The red and white canopy spins around the carousel." }, { "id": "B", "text": "The green swing seats rotate" }, { "id": "C", "text": "The yellow stars on the carousel change color." }, { "id": "D", "text": "The blue accents on the carousel move independently of the swing seats." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1111, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5d567a34718e4dd8bfe4468623130db1", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the character wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a blue shirt, black pants, and brown boots." }, { "id": "B", "text": "The character is wearing a green shirt, beige pants, and black boots." }, { "id": "C", "text": "The character is wearing a green shirt, brown pants, and black boots." }, { "id": "D", "text": "The character is wearing a green shirt, beige pants, and brown boots." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1112, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/23e1b3d8c0f843e0960c673d932712f2", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary feature of the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A rough, matte sphere on a simple, plain stand." }, { "id": "B", "text": "A smooth, reflective sphere placed on a textured, ornate stand." }, { "id": "C", "text": "A metallic, angular sphere on a sleek, modern stand." }, { "id": "D", "text": "A transparent, floating sphere with no stand." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1113, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/085bdde920c347aea002f60018b4ce06", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the colors of the cylindrical objects?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cylindrical objects remain the same color throughout the video." }, { "id": "B", "text": "The cylindrical objects change colors from red to green, then back to red, and finally off." }, { "id": "C", "text": "The cylindrical objects change colors from green to red, then back to green, and finally on." }, { "id": "D", "text": "The top of the two cylindrical objects show red and green color, respectively." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1114, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0f7737503c624c439ee3a49922811a43", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the fox-like character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The fox-like character is walking around with its hand lifted." }, { "id": "B", "text": "The fox-like character is sitting on the ground." }, { "id": "C", "text": "The fox-like character is dancing and spinning." }, { "id": "D", "text": "The fox-like character is jumping up and down." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1115, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b364f976f2374335b5ec3627c6a4bf59", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary action depicted?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The train car is opening its sliding doors on both sides." }, { "id": "B", "text": "The train car is moving forward." }, { "id": "C", "text": "The train car is stationary." }, { "id": "D", "text": "The train car is accelerating." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1116, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1c24d235556f48caa0e2ec8975d2b2b5", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the bird-like creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bird-like creature is eating." }, { "id": "B", "text": "The bird-like creature is opening its arms." }, { "id": "C", "text": "The bird-like creature is standing still." }, { "id": "D", "text": "The bird-like creature is sleeping." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1117, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5e58d1cc04784a0195ba87abd563078f", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many times does the heavily armored figure in the futuristic suit collapse to the ground?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure does not collapse to the ground at all." }, { "id": "B", "text": "The figure collapses to the ground three times." }, { "id": "C", "text": "The figure collapses to the ground twice." }, { "id": "D", "text": "The figure collapses to the ground once." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1118, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d53e02885e3c450aa10d7b33ecf47d43", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the figure wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure is wearing a crown and a spade symbol on a card-like costume." }, { "id": "B", "text": "The figure is wearing a card-like costume featuring a spade symbol and a crown." }, { "id": "C", "text": "The figure is wearing a hat and a shirt." }, { "id": "D", "text": "The figure is wearing a black hood and a red face." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1119, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/28b7960805064ba9b9133e6b8166658e", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the red egg-shaped robot doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The red egg-shaped robot is opening and closing its transparent dome." }, { "id": "B", "text": "The red egg-shaped robot is flying through the air." }, { "id": "C", "text": "The red egg-shaped robot is moving its mechanical arms." }, { "id": "D", "text": "The red egg-shaped robot is standing still and not moving." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1120, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/eeaa2cc30ece4c368b49c7f844f3d1bd", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the metallic object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The metallic object rotates in a circular motion." }, { "id": "B", "text": "The metallic object unfolds to reveal its blade and then folds." }, { "id": "C", "text": "The metallic object disassembles into separate parts." }, { "id": "D", "text": "The metallic object remains static throughout the video." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1121, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7cff1f13dbbe48e4bbf71f5cbc5008f6", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many blue propellers are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "3" }, { "id": "C", "text": "4" }, { "id": "D", "text": "2" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1122, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4c2908e0273d4f39bff67991963e1cf8", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "How many gun barrels are on the back of this object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "2" }, { "id": "C", "text": "3" }, { "id": "D", "text": "4" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1123, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7dc5fc0172a847469172fe1143ee4025", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the brown, stitched plush toy figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The brown, stitched plush toy figure is jumping up and down." }, { "id": "B", "text": "The brown, stitched plush toy figure is raising its arm in a sequence of movements." }, { "id": "C", "text": "The brown, stitched plush toy figure is waving its hand." }, { "id": "D", "text": "The brown, stitched plush toy figure is sitting still." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1124, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/246742778c854067acd22b9cbc6d58b7", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is standing up and not moving." }, { "id": "B", "text": "The character is moving from lying down to standing up." }, { "id": "C", "text": "The character is jumping up and down." }, { "id": "D", "text": "The character is lying down and not moving." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1125, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e9390e5fac8a41278426fd5b19cd5fee", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the green door?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green door closes and then opens." }, { "id": "B", "text": "The green door opens and then remains open." }, { "id": "C", "text": "The green door opens and then closes." }, { "id": "D", "text": "The green door opens and then moves to the side." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1126, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/16e379f271414c2cbf5a8806083c5b79", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the primary motion of the green object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green object twists and rotates in a continuous motion." }, { "id": "B", "text": "The green object vibrates in place." }, { "id": "C", "text": "The green object remains stationary throughout the video." }, { "id": "D", "text": "The green object moves in a straight line." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1127, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c813ee273f48445090c559271965592b", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the character?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character jumps up." }, { "id": "B", "text": "The character is shaking." }, { "id": "C", "text": "The character remains still." }, { "id": "D", "text": "The character flies away." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1128, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/739c5c0882d94a62977848ee2c4feb88", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many holsters are visible on the humanoid figure?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "4" }, { "id": "C", "text": "1" }, { "id": "D", "text": "3" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1129, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b007232076fe4b3c99808e5e704b9600", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is swimming." }, { "id": "B", "text": "The creature is flying." }, { "id": "C", "text": "The creature is standing still." }, { "id": "D", "text": "The creature is walking and running." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1130, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ac28ca4cd1334c13a0f51bfbd3f2ddb0", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many objects does the character hold?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character holds two objects." }, { "id": "B", "text": "The character holds three objects." }, { "id": "C", "text": "The character holds four objects." }, { "id": "D", "text": "The character holds one object." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1131, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/572d17bbbd6e4e7e8bf34c507c347dd9", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many black, curved blades are attached to the yellow hub?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "7" }, { "id": "B", "text": "5" }, { "id": "C", "text": "6" }, { "id": "D", "text": "8" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1132, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4b381620c7304c7da3954c07c1728320", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many times does the person in the red and black futuristic suit turn around?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "2" }, { "id": "C", "text": "1" }, { "id": "D", "text": "0" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1133, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/772bbdea7c444bb8aa71c4c1049ddc27", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the pink, humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Performing a flip" }, { "id": "B", "text": "Jumping up and down" }, { "id": "C", "text": "Walking on all fours" }, { "id": "D", "text": "Standing with its arms bent and legs slightly apart" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1134, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f5d6bb86046f40aea1d732d007dd3bf9", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "How many clusters of leaves does the tree have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "four" }, { "id": "B", "text": "eight" }, { "id": "C", "text": "ten" }, { "id": "D", "text": "five" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1135, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b2f52afe8501436391041efff41dd6e5", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the stand of the flat-screen monitor?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The stand is black." }, { "id": "B", "text": "The stand is red." }, { "id": "C", "text": "The stand is gold." }, { "id": "D", "text": "The stand is silver." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1136, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/285848d99ee849fc858988679dbab6d5", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure is walking on the ground and looking around." }, { "id": "B", "text": "The figure is standing still and observing its surroundings." }, { "id": "C", "text": "The figure is shot and falls to the ground." }, { "id": "D", "text": "The figure is sitting on the ground and resting." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1137, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6fd94c6a304840a79a7a33d29fcdc3c7", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the robot doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robot is walking on its tripod-like base." }, { "id": "B", "text": "The robot is spinning its head around." }, { "id": "C", "text": "The robot is waving its arms while walking forward." }, { "id": "D", "text": "The robot is standing still and looking around." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1138, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/43a31b033b1549e3b059246b04d47d9a", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the bat doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bat is flapping its wings and gliding." }, { "id": "B", "text": "The bat is eating." }, { "id": "C", "text": "The bat is flying in a circle." }, { "id": "D", "text": "The bat is sitting on a branch." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1139, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4191e1f5b7aa4a50932c162a1f836bd4", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the green humanoid creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green humanoid creature is playing a white instrument." }, { "id": "B", "text": "The green humanoid creature is drinking from a white object." }, { "id": "C", "text": "The green humanoid creature is jumping over a white object." }, { "id": "D", "text": "The green humanoid creature is holding a white object in its mouth." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1140, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/fc3e15ac900b49349d3b9ed0cdab34e6", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character in the circular badge doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is waving with both hands." }, { "id": "B", "text": "The character is making a thumbs-up gesture." }, { "id": "C", "text": "The character is holding a ball in their hand." }, { "id": "D", "text": "The character is making a scissorhands sign with one hand." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1141, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ca517cda25624ad291ae086fc9c10b0d", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the flying saucer doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The flying saucer is moving in a zigzag pattern." }, { "id": "B", "text": "The flying saucer is flying straight." }, { "id": "C", "text": "The flying saucer is stationary." }, { "id": "D", "text": "The flying saucer is hovering and spinning." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1142, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a4fddaf0123e4e1e811b7fe29721bd89", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many colored structures are present in the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "6" }, { "id": "C", "text": "3" }, { "id": "D", "text": "5" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1143, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b99210a60283431eaea957cf34d1e240", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the character holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is holding a cab." }, { "id": "B", "text": "The character is holding nothing." }, { "id": "C", "text": "The character is holding a rope." }, { "id": "D", "text": "The character is holding a knife." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1144, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6dedd06861344b22bd94e3221ffbc4a1", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many times does the green humanoid creature bend down and pick up the object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "3" }, { "id": "C", "text": "1" }, { "id": "D", "text": "0" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1145, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/78d3cb1beaf34d2aafd1af5ee93ff54e", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many horns does the blue creature have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "1" }, { "id": "C", "text": "3" }, { "id": "D", "text": "4" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1146, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/506668927f434293b1c567a6ac8501fb", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the coffin?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The coffin closes its lid." }, { "id": "B", "text": "The coffin opens its lid." }, { "id": "C", "text": "The coffin remains open." }, { "id": "D", "text": "The coffin moves to a different location." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1147, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/634c58be4d244256b2dcd22415c9162a", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What does the green character with purple hair do with the yellow sign that reads 'let me help you'?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green character with purple hair puts the yellow sign on their head." }, { "id": "B", "text": "The green character with purple hair throws the yellow sign away." }, { "id": "C", "text": "The green character with purple hair holds the yellow sign and moves it around." }, { "id": "D", "text": "The green character with purple hair draws on the yellow sign with a marker." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1148, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/387300c36ec74ea0b805327218ec6e70", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the character holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is holding a large sword and a shield with a white emblem." }, { "id": "B", "text": "The character is holding a large sword and a shield with a red emblem." }, { "id": "C", "text": "The character is holding a hammer and a shield with a black emblem." }, { "id": "D", "text": "The character is holding a large sword and a shield with a yellow emblem." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1149, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7f98eb9cd0d3416e868319fb0092fae3", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many guns does the character hold?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character holds two guns." }, { "id": "B", "text": "The character holds three guns." }, { "id": "C", "text": "The character holds no guns." }, { "id": "D", "text": "The character holds one gun." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1150, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b3b75b26c677449db70f5e5087598e2e", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the yellow and black informational display on the rectangular wooden table?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The yellow and black informational display remains on the rectangular wooden table." }, { "id": "B", "text": "The yellow and black informational display rotates on the rectangular wooden table." }, { "id": "C", "text": "The yellow and black informational display raises from the rectangular wooden table." }, { "id": "D", "text": "The yellow and black informational display is replaced by a different object on the rectangular wooden table." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1151, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7503367b467c450c8ecb2789bef3eb1e", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the spider doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The spider is climbing a wall." }, { "id": "B", "text": "The spider is walking with its segmented legs." }, { "id": "C", "text": "The spider is spinning a web." }, { "id": "D", "text": "The spider is sitting still." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1152, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2df0ec8cbd5d4915a1aae4b72e7f1280", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the internal structure of the red cylindrical object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The internal structure of the red cylindrical object is red and yellow." }, { "id": "B", "text": "The internal structure of the red cylindrical object is yellow and red." }, { "id": "C", "text": "The internal structure of the red cylindrical object is yellow and orange." }, { "id": "D", "text": "The internal structure of the red cylindrical object is red and orange." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1153, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/571baf9cfcd74cc69eaa22d423678b25", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many arm-like appendages does the white spherical robot have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "3" }, { "id": "C", "text": "1" }, { "id": "D", "text": "4" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1154, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/03638bc34bae45bbbc1ca4dfe491c767", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the character wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a white lab coat, green pants, a pink tie, and blue shoes." }, { "id": "B", "text": "The character is wearing a black lab coat, red pants, a purple tie, and green shoes." }, { "id": "C", "text": "The character is wearing a yellow lab coat, blue pants, a pink tie, and green shoes." }, { "id": "D", "text": "The character is wearing a gray lab coat, green pants, a pink tie, and blue shoes." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1155, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/29c584aded38446f846b351be7d11c97", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the cat-like figure holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cat-like figure is holding a lollipop." }, { "id": "B", "text": "The cat-like figure is holding a toy and a ball." }, { "id": "C", "text": "The cat-like figure is holding a book and a pencil." }, { "id": "D", "text": "The cat-like figure is holding a spoon and a fork." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1156, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/fa463cb6fca94ad5aacbf886b55c6e7d", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "a wooden board" }, { "id": "B", "text": "a bun hairstyle" }, { "id": "C", "text": "a sandwich" }, { "id": "D", "text": "a giant burger" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1157, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2cf475d972f34363bdb59dd33c1c42f8", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the number of drawers opened in the wooden cabinet?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "3" }, { "id": "C", "text": "5" }, { "id": "D", "text": "4" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1158, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f25c5c4245094362a274ebc35ac61613", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the robot doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robot is walking on its feet." }, { "id": "B", "text": "The robot is shaking." }, { "id": "C", "text": "The robot is spinning in a circle." }, { "id": "D", "text": "The robot is lifting its arm up." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1159, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d3768223323e4462bbe6c048c9b400ab", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the flag on the mailbox?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The flag on the mailbox flips upside down." }, { "id": "B", "text": "The flag on the mailbox rotates 360 degrees." }, { "id": "C", "text": "The flag on the mailbox remains static." }, { "id": "D", "text": "The flag on the mailbox rotates 90 degrees." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1160, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ea2c2fa6b82c4c938c04a908b7b38d7f", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the bird doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bird is flying around the tree." }, { "id": "B", "text": "The bird is perching on the tree." }, { "id": "C", "text": "The bird is hiding in the tree." }, { "id": "D", "text": "The bird is sitting on the ground." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1161, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8fce79f0763849ef9edeba597bbaf9e3", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the bow tie on the purple animatronic rabbit?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bow tie is blue and white striped." }, { "id": "B", "text": "The bow tie is red and black striped." }, { "id": "C", "text": "The bow tie is red and white striped." }, { "id": "D", "text": "The bow tie is red and white checkered." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1162, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/95e46a38c91443cc97f16d250de777f7", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many screws are in the blue plate at the end of the video?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "4" }, { "id": "C", "text": "3" }, { "id": "D", "text": "6" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1163, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/bd5e4fb4da604041962d4f4963027fde", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the cylindrical object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cylindrical object expands." }, { "id": "B", "text": "The cylindrical object contracts." }, { "id": "C", "text": "The cylindrical object rotates." }, { "id": "D", "text": "The cylindrical object remains stationary." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1164, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0061bfce34004900a726a881942b3bdc", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the bear-like figure most likely made of?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bear-like figure is made of wood." }, { "id": "B", "text": "The bear-like figure is made of metal." }, { "id": "C", "text": "The bear-like figure is made of ice." }, { "id": "D", "text": "The bear-like figure is made of plastic." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1165, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1e6ad6b0643f404a9f3be8a05ccb0c25", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the object doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object is walking and twisting." }, { "id": "B", "text": "The object is jumping." }, { "id": "C", "text": "The object is standing still." }, { "id": "D", "text": "The object is spinning." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1166, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/341dfd96d87944e5a6fcd5be3cb23708", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the red object in the glass case?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The red object in the glass case remains stationary." }, { "id": "B", "text": "The red object in the glass case changes color." }, { "id": "C", "text": "The red object in the glass case disappears." }, { "id": "D", "text": "The red object in the glass case moves up and down." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1167, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/18749eb12a17404ea38855c51b20c24b", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the relationship between the arrow and the word 'inicio'?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The arrow points to the right and is labeled 'inicio'." }, { "id": "B", "text": "The arrow points downward and is labeled 'inicio'." }, { "id": "C", "text": "The arrow points to the left and is labeled 'inicio'." }, { "id": "D", "text": "The arrow points upward and is labeled 'inicio'." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1168, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d72d6312a3c84de9888716700fc58a16", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many white spheres are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "4" }, { "id": "C", "text": "3" }, { "id": "D", "text": "2" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1169, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1f2cd7e51a4a405c947fa8f952176407", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the character wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a green elf outfit with a hat, white beard, and black belt." }, { "id": "B", "text": "The character is wearing a red Santa Claus outfit with a hat, black beard, and black belt." }, { "id": "C", "text": "The character is wearing a red elf outfit with a hat, white beard, and black belt." }, { "id": "D", "text": "The character is wearing a red Santa Claus outfit with a hat, white beard, and black belt." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1170, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7ec390cfdb15468e9843853c3f419ebe", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a black hat with a white cross and a stained white dress." }, { "id": "B", "text": "The character is wearing a black hat with a white cross and a clean white dress." }, { "id": "C", "text": "The character is wearing a small black hat and a clean white dress." }, { "id": "D", "text": "The character is wearing a large black hat and a stained white dress." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1171, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/00ffbdc756524e1abf0e9f8f2f49a745", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the solar panel as the video progresses?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The solar panel rotates on the pole and changes its angle towards sky." }, { "id": "B", "text": "The solar panel moves up and down." }, { "id": "C", "text": "The solar panel changes color." }, { "id": "D", "text": "The solar panel remains stationary." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1172, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9a1173bae60043028716e9d0601153fd", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many teeth are visible in the pig?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "5" }, { "id": "C", "text": "2" }, { "id": "D", "text": "4" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1173, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/95335a4582e24288a32aaccf7364e933", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the microwave's door frame?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Blue" }, { "id": "B", "text": "Green" }, { "id": "C", "text": "Purple" }, { "id": "D", "text": "Pink" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1174, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e57b3bd6c108499b8235b76bf7ce8e82", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the cat's eyes as the video progresses?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cat's eyes become smaller." }, { "id": "B", "text": "The cat's eyes close." }, { "id": "C", "text": "The cat's eyes remain the same." }, { "id": "D", "text": "The cat's eyes blinked once." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1175, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/16cbf84e418e488abede00a8bfbe7a63", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character's primary action?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is jumping." }, { "id": "B", "text": "The character is punching and walking" }, { "id": "C", "text": "The character is walking." }, { "id": "D", "text": "The character is punching." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1176, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b1394e96fb1b4ebdb96a76524224358f", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the rusty metal barrel?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The rusty metal barrel turns into a different shape." }, { "id": "B", "text": "The rusty metal barrel remains unchanged throughout the video." }, { "id": "C", "text": "The rusty metal barrel progressively deforms and flattens over time." }, { "id": "D", "text": "The rusty metal barrel is replaced by a different object." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1177, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6643d624b9174b8591d391b9aabd0199", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the green handle of the cylindrical object as it rotates?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green handle remains stationary." }, { "id": "B", "text": "The green handle moves from the front to the side of the object." }, { "id": "C", "text": "The green handle rotates with the object." }, { "id": "D", "text": "The green handle moves from the side to the front of the object." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1178, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/17992c48ad2543e39e6cad76f89a4ee8", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the lanterns?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The lanterns rotate around the poles." }, { "id": "B", "text": "The lanterns swing in synchrony." }, { "id": "C", "text": "The lanterns move independently." }, { "id": "D", "text": "The lanterns remain stationary." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1179, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/20093324471a491aafed6019046d535f", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the green gear when the red gear rotates?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The green gear rotates in a random direction." }, { "id": "B", "text": "The green gear rotates in the same direction." }, { "id": "C", "text": "The green gear rotates in the opposite direction." }, { "id": "D", "text": "The green gear remains stationary." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1180, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ed5d066086a94bd786812cc9fb2b755d", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "How many directions the red blood are moving?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "2" }, { "id": "C", "text": "3" }, { "id": "D", "text": "4" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1181, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c27f71e432b84353a245fb9b65dcdbf2", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the pedestal fan?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The pedestal fan starts spinning and then stops." }, { "id": "B", "text": "The pedestal fan stops spinning and then turns off." }, { "id": "C", "text": "The pedestal fan starts spinning and then oscillates." }, { "id": "D", "text": "The pedestal fan remains stationary throughout the video." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1182, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c0bed0490cf045e8ab4eda6c3d249b1c", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the animatronic bear holding in its hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "top hat" }, { "id": "B", "text": "ball" }, { "id": "C", "text": "microphone" }, { "id": "D", "text": "broom" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1183, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/246e3426ddae439d9697d426472fa2eb", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the wooden table?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The wooden table lifts off the ground." }, { "id": "B", "text": "The wooden table folds up its legs." }, { "id": "C", "text": "The wooden table remains stable and unchanged." }, { "id": "D", "text": "The wooden table rotates on its legs." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1184, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/11744f7dbbb14b44a1e04f404246755b", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What action does the character perform?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character bends and looks around." }, { "id": "B", "text": "The character moves its hands in front of its body." }, { "id": "C", "text": "The character raises its arms and looks up." }, { "id": "D", "text": "The character stands upright and looks around." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1185, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/bcf111e592d64b6490003680cae9407f", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the central feature of the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The central feature of the 4D object is a triangular protrusion." }, { "id": "B", "text": "The central feature of the 4D object is a square protrusion." }, { "id": "C", "text": "The central feature of the 4D object is a circular cutout." }, { "id": "D", "text": "The central feature of the 4D object is a rectangular screen." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1186, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b818fd5e1c964010bde22059f0bb138e", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the inner surface of the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Pink" }, { "id": "B", "text": "Blue" }, { "id": "C", "text": "Purple" }, { "id": "D", "text": "White" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1187, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1e8c8d4f432c4610ad9035eafab82003", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the bird-like creature?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bird-like creature spins around." }, { "id": "B", "text": "The bird-like creature flies away." }, { "id": "C", "text": "The bird-like creature stands still." }, { "id": "D", "text": "The bird-like creature walks forward." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1188, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/616f748ed8a34537873cfba6fa8a2a8e", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the green cartoon character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Walking forward" }, { "id": "B", "text": "Jumping up and down" }, { "id": "C", "text": "Standing still" }, { "id": "D", "text": "Throwing a punch forward" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1189, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3ec16be39a8f4185a6900de3f16ee389", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which direction is the whale's tail moving?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The whale's tail is moving downwards." }, { "id": "B", "text": "The whale's tail is moving up and down." }, { "id": "C", "text": "The whale's tail is moving upwards." }, { "id": "D", "text": "The whale's tail is moving to the left." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1190, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/63b5d5cea42d42ec8ab2e0f188e9cb3c", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the cartoon character carrying?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "a white box" }, { "id": "B", "text": "a toy." }, { "id": "C", "text": "Pizza." }, { "id": "D", "text": "boots." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1191, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3dd1736679164e99be036737e72fbb47", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the owl doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The owl is flying." }, { "id": "B", "text": "The owl is sitting still." }, { "id": "C", "text": "The owl is turning its head to the left and observing something." }, { "id": "D", "text": "The owl is turning its head to the right." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1192, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/32e796c363b545128eb892643b647a57", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the pink cube?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The pink cube changes color." }, { "id": "B", "text": "The pink cube rotates in place." }, { "id": "C", "text": "The pink cube moves forward." }, { "id": "D", "text": "The pink cube disappears." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1193, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/42b9a50b851a43dfa8ac6fcc7d8ba25f", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the animatronic bear holding in its left hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "nothing" }, { "id": "B", "text": "a bow tie" }, { "id": "C", "text": "a top hat" }, { "id": "D", "text": "a microphone" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1194, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/550eec49ad184ae294cc9cdbd0cd5f2e", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A golden, smooth object resembling a peanut shell." }, { "id": "B", "text": "A golden, curved object resembling a peanut shell." }, { "id": "C", "text": "A golden, flat object resembling a peanut shell." }, { "id": "D", "text": "A golden, elongated, and wrinkled object resembling a peanut shell." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1195, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c890f28d4f244a60b66d8ea88a494f5d", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "What is happening with the spoon?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Move up and down" }, { "id": "B", "text": "Move left and right" }, { "id": "C", "text": "Pour white particles" }, { "id": "D", "text": "Scoop up white particles." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1196, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/45638629e81b4ac4b02254868255c147", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the pattern of the circular buttons on the black rectangular panel?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The pattern of the circular buttons on the black rectangular panel is all black." }, { "id": "B", "text": "The pattern of the circular buttons on the black rectangular panel is all white." }, { "id": "C", "text": "The pattern of the circular buttons on the black rectangular panel is random." }, { "id": "D", "text": "The pattern of the circular buttons on the black rectangular panel is alternating black and white." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1197, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/74110fd064524845b8b005a3026f68a8", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "How many candles are on the cake?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "12" }, { "id": "B", "text": "14" }, { "id": "C", "text": "8" }, { "id": "D", "text": "10" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1198, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/769ccee42fcb40669462b1bf53ab05de", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first step in the sequence of events shown?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The first step in the sequence is the gun being held by a person." }, { "id": "B", "text": "The first step in the sequence is the gun being aimed at a target." }, { "id": "C", "text": "The first step in the sequence is the gun being loaded with ammunition." }, { "id": "D", "text": "The first step in the sequence is the bullet being fired from the gun." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1199, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dcd0bc416b5440a38d64967ac124f27b", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many white spheres are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "4" }, { "id": "C", "text": "6" }, { "id": "D", "text": "3" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1200, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e70822505ce0467eb574ed5f82d7d65e", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the fairy-like creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The fairy-like creature is standing still and looking around." }, { "id": "B", "text": "The fairy-like creature is dancing." }, { "id": "C", "text": "The fairy-like creature is flying and spinning." }, { "id": "D", "text": "The fairy-like creature is sitting and waving its arms." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1201, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/64dc3e3755fd44dabe09325a89151a6c", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary action depicted?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The sword is being used to cut through a piece of wood." }, { "id": "B", "text": "The sword is transitioning from an upright to a horizontal position." }, { "id": "C", "text": "The sword is being held by a person." }, { "id": "D", "text": "The sword is spinning around." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1202, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1cfbd2ea67ad4cb59139dffae5553c1e", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many buttons are on the bear's overalls?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "3" }, { "id": "C", "text": "4" }, { "id": "D", "text": "2" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1203, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7fb1496976d34f258be5d50616db5c39", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the hexagonal objects with yellow centers?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The hexagonal objects with yellow centers are removed from the jar." }, { "id": "B", "text": "The hexagonal objects with yellow centers are transformed into a different shape." }, { "id": "C", "text": "The hexagonal objects with yellow centers remain stationary inside the jar." }, { "id": "D", "text": "The hexagonal objects with yellow centers float above the jar and then settle inside it." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1204, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7b964a95f17c477cb14556561a41a8cc", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many drawers are opened?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "five drawers." }, { "id": "B", "text": "two drawers." }, { "id": "C", "text": "three drawers." }, { "id": "D", "text": "four drawers." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1205, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/abe566b5732647739c36ea98b017387e", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the pirate wearing on his head?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The pirate is wearing a white hat with a skull emblem." }, { "id": "B", "text": "The pirate is wearing a red hat with a skull emblem." }, { "id": "C", "text": "The pirate is wearing a brown hat with a skull emblem." }, { "id": "D", "text": "The pirate is wearing a black hat with a skull emblem." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1206, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6da3bc6b432f4803880abd5fdebec879", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What does the humanoid robot do after standing with arms extended horizontally?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid robot walks forward." }, { "id": "B", "text": "The humanoid robot jumps up." }, { "id": "C", "text": "The humanoid robot stops and remains still." }, { "id": "D", "text": "The humanoid robot turns around." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1207, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ae0a57effc384809a41b71cab6262a53", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first action that occurs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The mechanical turret starts rotating." }, { "id": "B", "text": "The cylindrical components on either side move." }, { "id": "C", "text": "The legs of the mechanical device extend." }, { "id": "D", "text": "The vertical antenna-like structure extends." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1208, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c3917a8f97834c928ebc1a4f49671b01", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the wooden block with cylindrical pegs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The wooden block with cylindrical pegs remains static." }, { "id": "B", "text": "The wooden block with cylindrical pegs changes colors in a grid pattern." }, { "id": "C", "text": "The wooden block with cylindrical pegs gradually changes heights in a grid pattern." }, { "id": "D", "text": "The wooden block with cylindrical pegs rotates in a circular motion." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1209, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4565917d3e2f4cf187e94fdd56d45850", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many fingers does one hand of the humanoid figure have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "3" }, { "id": "C", "text": "4" }, { "id": "D", "text": "5" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1210, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cb43fcc6ea16454baecbbf6b9ccdc288", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is standing still." }, { "id": "B", "text": "The character is jumping." }, { "id": "C", "text": "The character is sitting." }, { "id": "D", "text": "The character is walking." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1211, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/78d3cb1beaf34d2aafd1af5ee93ff54e", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is sleeping." }, { "id": "B", "text": "The creature is running and jumping." }, { "id": "C", "text": "The creature is eating." }, { "id": "D", "text": "The creature is standing and displaying its muscular build." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1212, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/839e17744901457db6e4aff0cddef1f2", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many cylindrical thrusters are present on the spacecraft?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "8" }, { "id": "C", "text": "2" }, { "id": "D", "text": "6" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1213, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4560a4d40b8c4d02aad2b071682cbd05", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "In the video, how many balls are moving in the Newton's cradle?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "10" }, { "id": "B", "text": "3" }, { "id": "C", "text": "7" }, { "id": "D", "text": "2" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1214, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/74ab53ceaa30476b9bdcdf6f95a4561e", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which ball is moving down the track and into the lower section of the device?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Green ball" }, { "id": "B", "text": "Yellow ball" }, { "id": "C", "text": "Blue ball" }, { "id": "D", "text": "Red ball" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1215, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3107ec95706e42d981a3a61a38c6478a", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the character wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a helmet and a gauntlet with green stones." }, { "id": "B", "text": "The character is wearing a helmet and a gauntlet with red stones." }, { "id": "C", "text": "The character is wearing a helmet and a gauntlet with black stones." }, { "id": "D", "text": "The character is wearing a helmet and a gauntlet adorned with colorful stones." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1216, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/730bd53e458943e990667c4c4d97df96", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "How many blue cylindrical rods are protruding from the dome-shaped object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "7" }, { "id": "B", "text": "5" }, { "id": "C", "text": "3" }, { "id": "D", "text": "4" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1217, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/81b67367d7b14cfe87e4d6859f9b386f", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Translucent red" }, { "id": "B", "text": "Translucent blue" }, { "id": "C", "text": "Translucent purple" }, { "id": "D", "text": "Translucent green" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1218, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/572d17bbbd6e4e7e8bf34c507c347dd9", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the direction of rotation of the object when looking from top to bottom?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Counterclockwise" }, { "id": "B", "text": "Clockwise" }, { "id": "C", "text": "No rotation" }, { "id": "D", "text": "Random" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1219, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/417ee66ae65e4e6a9a28c68d5e42fe1f", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary action of the bag?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The bag is opening and closing." }, { "id": "B", "text": "The bag is being folded and unfolded." }, { "id": "C", "text": "The bag is spinning around." }, { "id": "D", "text": "The bag is being lifted and dropped." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1220, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b88bfa1512224245b515526dddf2e4c7", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the red and blue spherical objects?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The red spherical object open to reveal their interiors." }, { "id": "B", "text": "The red and blue spherical objects remain closed throughout the video." }, { "id": "C", "text": "The red and blue spherical objects collide and merge into one object." }, { "id": "D", "text": "The red and blue spherical objects spin independently without opening." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1221, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/66b9175af37441708114d70394e116dc", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the electronic device?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The electronic device transitions from a closed state to an open state, revealing its screen and keyboard." }, { "id": "B", "text": "The electronic device transitions from a closed state to an open state, but only partially reveals its screen and keyboard." }, { "id": "C", "text": "The electronic device remains in a closed state throughout the video." }, { "id": "D", "text": "The electronic device transitions from an open state to a closed state, hiding its screen and keyboard." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1222, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/67678eb1ebfc4a73a538124a77f09078", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the robot doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robot is sitting still." }, { "id": "B", "text": "The robot is twisting the body and dancing." }, { "id": "C", "text": "The robot is waving with its right hand." }, { "id": "D", "text": "The robot is walking forward." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1223, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/4e9ce4cb48bc4231999f80fbd1a1df28", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many times does the yellow, mouse-like character with black-tipped ears and a lightning bolt tail twirl around?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "4" }, { "id": "C", "text": "1" }, { "id": "D", "text": "0" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1224, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a3607d18fb544a979558ae60823bdcca", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the motor doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The motor is spinning in a circular motion." }, { "id": "B", "text": "The motor is rotating." }, { "id": "C", "text": "The motor is stationary." }, { "id": "D", "text": "The motor is moving up and down." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1225, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/08ade1b8d83641cb9c61a08e84dc4e9b", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many objects are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "4" }, { "id": "C", "text": "5" }, { "id": "D", "text": "3" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1226, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/81007af39d6c4150b88f2abf4cc32a33", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the animal doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The animal is standing." }, { "id": "B", "text": "The animal is sitting." }, { "id": "C", "text": "The animal is sleeping." }, { "id": "D", "text": "The animal is jumping." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1227, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f98a6672062446ee86ff086248e255b0", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the shape of the object being assembled on the green cutting mat?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Cube" }, { "id": "B", "text": "Rectangle" }, { "id": "C", "text": "Cylinder" }, { "id": "D", "text": "Square" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1228, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/49a01e710d9545ceb13dfd8c9fe11fea", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the cylindrical object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cylindrical object rotates on its three legs." }, { "id": "B", "text": "The cylindrical object lifts off the platform." }, { "id": "C", "text": "The cylindrical object remains stationary on the platform." }, { "id": "D", "text": "The cylindrical object falls off the platform." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1229, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/313a3cb2c7864ac392287bf2b65d8646", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the cat-like figure?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cat-like figure changes its facial expression." }, { "id": "B", "text": "The cat-like figure rotates around its own axis." }, { "id": "C", "text": "The cat-like figure moves downward and upward." }, { "id": "D", "text": "The cat-like figure lifts its crown off its head." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1230, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/00db1be0fcfc45dcad01e92e0814655e", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the word written on the front of the black t-shirt?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "bandana" }, { "id": "B", "text": "bandaaa" }, { "id": "C", "text": "bandada" }, { "id": "D", "text": "banana" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1231, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/85eaf9ad00124684bdad9e7d3f1ff880", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many curved arms does the green, star-shaped object have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "3" }, { "id": "C", "text": "4" }, { "id": "D", "text": "6" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1232, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/362dea3c681a45669cb0d90bee75652b", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary action depicted?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The table is being lifted up." }, { "id": "B", "text": "The table is folding down gradually." }, { "id": "C", "text": "The table is spinning around." }, { "id": "D", "text": "The table is unfolding gradually." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1233, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3338cda4fe694bc8869b7c587c078246", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is printed on the back of the varsity jacket?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "charly rocks" }, { "id": "B", "text": "rock and roll" }, { "id": "C", "text": "music lover" }, { "id": "D", "text": "all you need is rock" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1234, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/68fbd06dae624ae5a024563778ecbf84", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many push-ups does the person in the white tracksuit and black sneakers perform?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The person performs a total of 2 push-ups." }, { "id": "B", "text": "The person performs a total of 4 push-ups." }, { "id": "C", "text": "The person performs a total of 1 push-up." }, { "id": "D", "text": "The person performs a total of 3 push-ups." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1235, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/69c22585d10a4aceb57b78774d7f0edb", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the character wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a white and green suit with a helmet featuring multiple antenna-like protrusions." }, { "id": "B", "text": "The character is wearing a black and yellow suit with a helmet featuring multiple antenna-like protrusions and a backpack." }, { "id": "C", "text": "The character is wearing a black and yellow suit with a helmet featuring multiple antenna-like protrusions." }, { "id": "D", "text": "The character is wearing a red and blue suit with a helmet featuring multiple antenna-like protrusions." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1236, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/43e53a57f0244288a5cd7196ffd05366", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the character wearing on its head?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a black hat with a star symbol." }, { "id": "B", "text": "The character is wearing a white hat with an anchor symbol." }, { "id": "C", "text": "The character is wearing a yellow hat with a sun symbol." }, { "id": "D", "text": "The character is wearing a white hat with a moon symbol." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1237, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ec9267471b814546b850a9f61f4bf85f", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the white flower?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The white flower grows taller and taller." }, { "id": "B", "text": "The white flower blooms and then wilts." }, { "id": "C", "text": "The white flower turns into a green stem." }, { "id": "D", "text": "The white flower remains the same throughout the video." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1238, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8e482145eeed419980fabf073fcb13c9", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many tongues of fire are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Four" }, { "id": "B", "text": "Two" }, { "id": "C", "text": "Five" }, { "id": "D", "text": "Three" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1239, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/21efe4bdd8c0442aa08eaee5776e7df0", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the character wearing on its head?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a brown hat with a red clover." }, { "id": "B", "text": "The character is wearing a green hat with a red clover." }, { "id": "C", "text": "The character is wearing a green hat with a green clover." }, { "id": "D", "text": "The character is wearing a brown hat with a green clover." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1240, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0a7b58f0c5884490b10188c982ff038c", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many purple accents are there on the humanoid figure?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "6" }, { "id": "C", "text": "3" }, { "id": "D", "text": "2" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1241, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/53b3d878333a4ad6a15f36a7a9e83666", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the machine?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The machine rotates." }, { "id": "B", "text": "The machine goes down and then lifts up." }, { "id": "C", "text": "The machine lifts up and then goes down." }, { "id": "D", "text": "The machine remains stationary." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1242, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a924f4a1c3b7411db8d3b201a2c6db5f", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ " What color are the soles of the character's shoes?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "a gradient of colors, including red, yellow, and green." }, { "id": "B", "text": "yellow" }, { "id": "C", "text": "green" }, { "id": "D", "text": "blue" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1243, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2ecfb55304a043a2a86353f70cc1cf92", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the camera doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The camera is mounted on the side of the quadcopter drone and is spinning." }, { "id": "B", "text": "The camera is mounted on the bottom of the quadcopter drone and is moving left and right." }, { "id": "C", "text": "The camera is mounted on the top of the quadcopter drone and is stationary." }, { "id": "D", "text": "The camera is mounted underneath the quadcopter drone and is moving down." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1244, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b1adaef66f3f41c5be7d756471ad3c7e", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the object doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object is shrinking." }, { "id": "B", "text": "The object is rotating." }, { "id": "C", "text": "The object is expanding and shrinking." }, { "id": "D", "text": "The object is moving forward." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1245, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cfcd5dab6685467d9414c542ea39088b", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the shape of the blue component on the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The blue component is circular." }, { "id": "B", "text": "The blue component is square." }, { "id": "C", "text": "The blue component is rectangular." }, { "id": "D", "text": "The blue component is triangular." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1246, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/26ee0bbf2d794834a5ad24d1012c4173", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the small figure in the wooden box doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The small figure in the wooden box is sitting on the ground." }, { "id": "B", "text": "The small figure in the wooden box is being held by the blocky character's hands." }, { "id": "C", "text": "The small figure in the wooden box is riding on the back of the blocky character." }, { "id": "D", "text": "The small figure in the wooden box is turning the head and looking around." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1247, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cf723517faff4173b3867d55b4357e62", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the alien figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The alien figure is lying on the cylindrical platform with its arms outstretched." }, { "id": "B", "text": "The alien figure is floating above the cylindrical platform with its arms spread out." }, { "id": "C", "text": "The alien figure is standing and jumping on the cylindrical platform with its arms raised." }, { "id": "D", "text": "The alien figure is sitting on the cylindrical platform with its arms crossed." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1248, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/07cfde95e9d04c3a9584d950138ae73c", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the man wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The man is wearing a red suit and black tie." }, { "id": "B", "text": "The man is wearing a black suit and red tie." }, { "id": "C", "text": "The man is wearing a black hat and brown shoes." }, { "id": "D", "text": "The man is wearing a white shirt and blue jeans." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1249, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7baed2bc4c244ca18020efc4b40a9fa2", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure is spinning around." }, { "id": "B", "text": "The humanoid figure is walking forward." }, { "id": "C", "text": "The humanoid figure is jumping up and down." }, { "id": "D", "text": "The humanoid figure is raising and lowering its arms, and jumps once." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1250, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/62fc52e950374e808fe67e770ac9f7a1", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the humanoid figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The humanoid figure is sitting." }, { "id": "B", "text": "The humanoid figure is sleeping." }, { "id": "C", "text": "The humanoid figure is walking." }, { "id": "D", "text": "The humanoid figure is dancing." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1251, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/be9b46417caa4d3ba58670ab67473c4b", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the man doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The man is standing still." }, { "id": "B", "text": "The man is sitting." }, { "id": "C", "text": "The man is walking." }, { "id": "D", "text": "The man is dancing." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1252, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/51cc702ee89849fb8eeedc1687b897e9", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the shape of the robot's head?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Cylindrical" }, { "id": "B", "text": "Square" }, { "id": "C", "text": "Dome-shaped" }, { "id": "D", "text": "Round" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1253, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8bce5c28dc5b4eccb41d33d64c5c9360", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which action is the skull performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The skull is rotating on its axis." }, { "id": "B", "text": "The skull is chewing." }, { "id": "C", "text": "The skull is moving its jaw side to side." }, { "id": "D", "text": "The skull is closing its mouth." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1254, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/abc025f2d9414cfb8562e8dcfba6bf0a", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the character's primary action?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is taking a slow step forward." }, { "id": "B", "text": "The character is dancing energetically." }, { "id": "C", "text": "The character is sitting still." }, { "id": "D", "text": "The character is jumping up and down." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1255, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/54c6008aff164f14a31ab7296423ebca", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the first action that occurs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The desk top lowers." }, { "id": "B", "text": "The bottom drawer opens." }, { "id": "C", "text": "The desk top lifts." }, { "id": "D", "text": "The top drawer opens." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1256, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/590c1f2268a0417e875fe992f0dbe2df", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the creature holding?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "a short, sharp weapon" }, { "id": "B", "text": "a long, curved weapon" }, { "id": "C", "text": "a long, blunt weapon" }, { "id": "D", "text": "a long, sharp weapon" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1257, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e5772fe12e5e48829c2534a6d833af5e", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many legs does the futuristic device have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "0" }, { "id": "B", "text": "1" }, { "id": "C", "text": "3" }, { "id": "D", "text": "4" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1258, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8b0c907435f24be481ac84fb0c51531e", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary smell of the hand cream shown?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Mint" }, { "id": "B", "text": "Lime Juice" }, { "id": "C", "text": "Glycerine" }, { "id": "D", "text": "Lemon Juice" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1259, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5baf296795fc4beaa59390ba36001f77", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the water fountain doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Standing still" }, { "id": "B", "text": "Jumping up and down" }, { "id": "C", "text": "Walking and moving in a rhythmic pattern" }, { "id": "D", "text": "Spinning in circles" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1260, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2cd28b44bb3b47c0851314d797e53ebe", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Which dinosaur piece is being placed into the puzzle board first?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Brown dinosaur piece" }, { "id": "B", "text": "Green dinosaur piece" }, { "id": "C", "text": "Red dinosaur piece" }, { "id": "D", "text": "Blue dinosaur piece" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1261, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cfb795ad97644a008a3c8b4abd1f032d", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many shoulder pads does the green armored warrior have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "3" }, { "id": "C", "text": "1" }, { "id": "D", "text": "4" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1262, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/cde78a97ce3d4882b68463e4fb821d8b", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What happens to the spacecraft?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The spacecraft crashes its thrusters and is falling down." }, { "id": "B", "text": "The spacecraft changes its color to blue." }, { "id": "C", "text": "The spacecraft remains stationary and does not activate its thrusters." }, { "id": "D", "text": "The spacecraft activates its large hexagonal thrusters, adjusts antennas, and readies for departure." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1263, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ada6bb5ace844f7e9b080f20ba64248d", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many animals are inside the hollow tree trunk?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "5" }, { "id": "C", "text": "2" }, { "id": "D", "text": "3" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1264, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b699a3d0774e44c7a2812adbba48bf8f", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What action is the character performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is standing still and smoking a pipe." }, { "id": "B", "text": "The character is walking and holding a pipe." }, { "id": "C", "text": "The character is sitting and holding a pipe." }, { "id": "D", "text": "The character is preparing for a fight." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1265, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2308b7713a4843b3a2f8f690bf372e6a", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the penguin holding on its back?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "a rope" }, { "id": "B", "text": "a fish" }, { "id": "C", "text": "a large anchor" }, { "id": "D", "text": "a small anchor" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1266, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0b719be364654693898fc7de03ca741e", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "How many light blue spheres are connected by thin lines in the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "14" }, { "id": "B", "text": "10" }, { "id": "C", "text": "8" }, { "id": "D", "text": "12" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1267, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/fbde233657574586aa5e8fe83b2f4675", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the direction of the pedestal fan?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The pedestal fan turn up." }, { "id": "B", "text": "The pedestal fan turn down." }, { "id": "C", "text": "The pedestal fan turn left." }, { "id": "D", "text": "The pedestal fan turn right." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1268, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b38cceb530e54ca48072ecb75d4aaa42", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What color is the oval-shaped object after the book is opened??" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "brown" }, { "id": "B", "text": "yellow" }, { "id": "C", "text": "purple" }, { "id": "D", "text": "red" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1269, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6b1921865be14de982f9483040acaa2d", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the white square on the purple grid sphere?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The white square disappears from the sphere." }, { "id": "B", "text": "The white square moves across the surface of the sphere." }, { "id": "C", "text": "The white square remains stationary on the sphere." }, { "id": "D", "text": "The white square turns into a different color." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1270, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a2d4007983af4a7fb1fd7b870443b564", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many buttons are on the character's coat?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "5" }, { "id": "C", "text": "4" }, { "id": "D", "text": "3" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1271, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7e8eb5c8643e44cc9a9469f807c2cef8", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many red conveyor belts are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "4" }, { "id": "C", "text": "3" }, { "id": "D", "text": "2" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1272, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/431b2349ddf64008a9108a19f11b38bd", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the character wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a red hat, black scarf, and yellow outfit." }, { "id": "B", "text": "The character is wearing a green hat, blue scarf, and white outfit." }, { "id": "C", "text": "The character is wearing a yellow hat and gray outfit." }, { "id": "D", "text": "The character is wearing a yellow hat, red scarf, and black outfit with a blue object in one hand and a black object in the other." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1273, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8107e6184cc54b6aa7cc5c32d7fe8ba8", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "How many drawers are opened during the video" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "2" }, { "id": "C", "text": "3" }, { "id": "D", "text": "0" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1274, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6909e80e5ffc4f0dbe912047ad5e5fa1", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the figure wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure is wearing a red jacket with black sleeves and black pants." }, { "id": "B", "text": "The figure is wearing a black jacket with red sleeves and black pants." }, { "id": "C", "text": "The figure is wearing a blue jacket with yellow sleeves and black pants." }, { "id": "D", "text": "The figure is wearing a red jacket with yellow sleeves and black pants." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1275, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/757996c07d01431bbb665129c5d8b77a", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the fox doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The fox is sleeping." }, { "id": "B", "text": "The fox is sitting down." }, { "id": "C", "text": "The fox is eating." }, { "id": "D", "text": "The fox is dancing." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1276, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b81265a7fca644328036b297a8c87c6a", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the character in the blue interface labeled 'workshop' doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is standing with arms at their sides." }, { "id": "B", "text": "The character is lying down on the floor." }, { "id": "C", "text": "The character is sitting on the floor." }, { "id": "D", "text": "The character is jumping in the air." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1277, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/86a5167ef6cf4b80bb8217bf247705a0", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many times does the superhero figure perform a fly attempt?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "2" }, { "id": "C", "text": "1" }, { "id": "D", "text": "0" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1278, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ae29e0a2e4ac41ea800bbe834ee68f12", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many slices of garlic bread are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "3" }, { "id": "C", "text": "4" }, { "id": "D", "text": "2" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1279, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/d8e979afbdb2468f85457f04467f242d", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What action is the 3D-rendered figure performing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Running" }, { "id": "B", "text": "Jumping" }, { "id": "C", "text": "Standing" }, { "id": "D", "text": "Walking" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1280, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b2e18c2c89c54e0b985dcd9a4ccb1f75", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the person doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The person is sitting and reading." }, { "id": "B", "text": "The person is standing and looking around." }, { "id": "C", "text": "The person is dancing and saluting." }, { "id": "D", "text": "The person is is saluting with their right hand." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1281, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dada24d919c44f3d9fdfe63fa1cbe002", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many legs does the mechanical spider-like robot have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "8" }, { "id": "B", "text": "10" }, { "id": "C", "text": "4" }, { "id": "D", "text": "6" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1282, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/505709b7523947cf8f1f8f91b51ba5e7", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the cockpit of the spaceship?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "White" }, { "id": "B", "text": "Red" }, { "id": "C", "text": "Gray" }, { "id": "D", "text": "Blue" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1283, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6fd94c6a304840a79a7a33d29fcdc3c7", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the robot doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robot is falling down." }, { "id": "B", "text": "The robot is sitting on the ground." }, { "id": "C", "text": "The robot is standing still and looking around." }, { "id": "D", "text": "The robot is waving its arms and walking." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1284, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/2e686192899842b3ad732e0b49890d7f", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many signs or messages are hanging off the branches of the tree?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "5" }, { "id": "B", "text": "4" }, { "id": "C", "text": "2" }, { "id": "D", "text": "3" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1285, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b50e7aab3a4d48e592c7aa402b5b0420", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the blue spiral cone inside the green cylindrical container?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The blue spiral cone shrinks and disappears inside the green cylindrical container." }, { "id": "B", "text": "The blue spiral cone remains stationary inside the green cylindrical container." }, { "id": "C", "text": "The blue spiral cone rotates inside the green cylindrical container." }, { "id": "D", "text": "The blue spiral cone expands and fills the green cylindrical container." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1286, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/649334a3e5f3408d9700d4c442e6782a", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the cylindrical component of the 4D object doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cylindrical component is being lifted." }, { "id": "B", "text": "The cylindrical component is rotating." }, { "id": "C", "text": "The cylindrical component is stationary." }, { "id": "D", "text": "The cylindrical component is undergoing positional adjustments." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1287, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7ab846cf456e4825bd7b0612d6917c47", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the blocky figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The blocky figure is running." }, { "id": "B", "text": "The blocky figure is sitting still." }, { "id": "C", "text": "The blocky figure is performing a series of dance moves." }, { "id": "D", "text": "The blocky figure is jumping." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1288, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/6d97fb688b3c407abffa7c290d61e3e4", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many spikes does the mechanical hedgehog-like structure have excepth for its tail?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "9" }, { "id": "B", "text": "12" }, { "id": "C", "text": "8" }, { "id": "D", "text": "6" } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1289, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7259878512cf4a9c86bdf7a7b27aa10b", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many punches does the figure do?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "2" }, { "id": "C", "text": "1" }, { "id": "D", "text": "5" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1290, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/29161d051b0741f9864b94bd0ba88e2e", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the primary action of the rifle?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The rifle is standing still." }, { "id": "B", "text": "The rifle is rotating." }, { "id": "C", "text": "The rifle is shaking." }, { "id": "D", "text": "The rifle is moving forward." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1291, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/df2aa81da18c4bed8d92ec5c61845da1", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the order of the people from left to right if seeing their back?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "White shirt, pink shirt, yellow shirt" }, { "id": "B", "text": "Yellow shirt, pink shirt, white shirt" }, { "id": "C", "text": "Pink shirt, white shirt, yellow shirt" }, { "id": "D", "text": "White shirt, yellow shirt, pink shirt" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1292, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/7a331fc1de88474dbec27d5a1a43a60e", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the animal figure doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The animal figure is sitting still." }, { "id": "B", "text": "The animal figure is jumping up and down." }, { "id": "C", "text": "The animal figure is running forward." }, { "id": "D", "text": "The animal figure is moving its head up and down." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1293, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/651113c55dfe414889b102b4d10ed065", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many legs are detached from the body in the end?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "4" }, { "id": "C", "text": "1" }, { "id": "D", "text": "2" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1294, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ddde4fccc5554b24b0c57857ecdaaaaa", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the cabinet after it opens its doors?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cabinet closes its doors." }, { "id": "B", "text": "The cabinet returns to its initial position." }, { "id": "C", "text": "The cabinet rotates in a different direction." }, { "id": "D", "text": "The cabinet remains open." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1295, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/561fccb5b3314feda06da315b8d36553", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the position of the magazine in relation to the handgun?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The magazine is positioned below the handgun." }, { "id": "B", "text": "The magazine is positioned to the right of the handgun." }, { "id": "C", "text": "The magazine is positioned above the handgun." }, { "id": "D", "text": "The magazine is positioned to the left of the handgun." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1296, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/fc859162cc5043d1ade3d63b8c3e73bf", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the dinosaur-like creature?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Yellow and black" }, { "id": "B", "text": "Blue and white" }, { "id": "C", "text": "Green and brown" }, { "id": "D", "text": "Green and beige" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1297, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/0d551db95ddb4b31a9bc8b2dd1d77ecf", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many times does the creature turn around?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "3" }, { "id": "C", "text": "2" }, { "id": "D", "text": "4" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1298, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/700b0a7b336f4cb6ba5dea874bd76585", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the blue spherical object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The blue spherical object remains stationary throughout the video." }, { "id": "B", "text": "The blue spherical object bounces off the black and gold mechanical device." }, { "id": "C", "text": "The blue spherical object gradually rose from the top after the black and gold mechanical device opened the top panel." }, { "id": "D", "text": "The blue spherical object is absorbed by the black and gold mechanical device." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1299, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/905cce887cb74779968e1bed21d239a8", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many buttons are on the brown animatronic bear?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "2" }, { "id": "C", "text": "3" }, { "id": "D", "text": "4" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1300, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a58739b0ce784ecbba3e2b8811870142", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "Which gear is providing force?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Both gears" }, { "id": "B", "text": "Blue gear" }, { "id": "C", "text": "Green gear" }, { "id": "D", "text": "Neither gear" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1301, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/e7e4406972da4be0baa5ce3f059afa02", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the figure's axe?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The figure drops the axe and turns around." }, { "id": "B", "text": "The figure throws the axe and runs away." }, { "id": "C", "text": "The figure holds the axe and looks around." }, { "id": "D", "text": "The figure picks up the axe and swings it." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1302, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/77da3c8b2006462a8bed14ffdd4ba1a9", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many objects are inside the treasure chest in the final frame?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "4" }, { "id": "C", "text": "1" }, { "id": "D", "text": "3" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1303, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/a72e9c196bac47c4a2f478425b9d988d", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the animal doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The animal is sleeping on its back." }, { "id": "B", "text": "The animal is jumping in the air." }, { "id": "C", "text": "The animal is attacking." }, { "id": "D", "text": "The animal is lying down on the ground." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1304, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/366b7d0519874ef28b85bfe9f9460df2", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the character wearing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is wearing a black and white outfit." }, { "id": "B", "text": "The character is wearing a green and white outfit." }, { "id": "C", "text": "The character is wearing a red and white outfit." }, { "id": "D", "text": "The character is wearing a dark blue and white outfit." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1305, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/dbd3e2ccdc2748af957c34310163d848", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many parts of the car are being replaced?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "3" }, { "id": "B", "text": "4" }, { "id": "C", "text": "5" }, { "id": "D", "text": "2" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1306, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f8a674f5325743d69d14949bfa2cf80d", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the first action that occurs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "Only the left drawer opens" }, { "id": "B", "text": "The left and right drawer opens" }, { "id": "C", "text": "The middle drawer opens" }, { "id": "D", "text": "Only the right drawer opens" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1307, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/988295eb0739491f83538a52da2a2941", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "How many water drops appear?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "0" }, { "id": "B", "text": "1" }, { "id": "C", "text": "2" }, { "id": "D", "text": "3" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1308, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/ad266f58d2e04c5fb99ba5342c9a6f6d", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the dinosaur doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The dinosaur is standing still." }, { "id": "B", "text": "The dinosaur is jumping." }, { "id": "C", "text": "The dinosaur is walking." }, { "id": "D", "text": "The dinosaur is eating." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1309, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/762c7622d6e64747960fee2dc5146f06", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What is the character holding in its right hand?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A sword" }, { "id": "B", "text": "A shield" }, { "id": "C", "text": "A pickaxe" }, { "id": "D", "text": "A bow" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1310, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/57f74d6a8e334b78af143ad7eee99921", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What happens to the lid of the rectangular electronic device?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The lid of the rectangular electronic device remains closed throughout the video." }, { "id": "B", "text": "The lid of the rectangular electronic device opens and closes." }, { "id": "C", "text": "The lid of the rectangular electronic device opens and then remains open." }, { "id": "D", "text": "The lid of the rectangular electronic device opens and then closes, revealing a white keyboard." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1311, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/f9b9f0aaa0994cc8892ecbda59c35bb4", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is jumping." }, { "id": "B", "text": "The character is walking forward." }, { "id": "C", "text": "The character is standing still." }, { "id": "D", "text": "The character is running." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1312, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c76b9ea075bd419ab661191cccea8a7c", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the creature doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature is walking." }, { "id": "B", "text": "The creature is standing still." }, { "id": "C", "text": "The creature is flying." }, { "id": "D", "text": "The creature is swimming." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1313, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b6ded2b55e504b5da35c617fa5f31a95", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "How many spheres are there in the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "15" }, { "id": "B", "text": "20" }, { "id": "C", "text": "12" }, { "id": "D", "text": "16" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1314, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1f1e525aaece4242a54cba37ab636b4a", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many wheels does the rover have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The rover has six wheels." }, { "id": "B", "text": "The rover has eight wheels." }, { "id": "C", "text": "The rover has ten wheels." }, { "id": "D", "text": "The rover has four wheels." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1315, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/35179ce8ab13419f83dbf781ba9ec823", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many blue tail-like appendages does the character have?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "1" }, { "id": "B", "text": "3" }, { "id": "C", "text": "2" }, { "id": "D", "text": "4" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1316, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/af456d179fd146b0a948b1b3e7eaff5b", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the central feature of the circular saw blade?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "A central hub with blue fan-like blades" }, { "id": "B", "text": "A central hub with red fan-like blades" }, { "id": "C", "text": "A central hub with black fan-like blades" }, { "id": "D", "text": "A central hub with green fan-like blades" } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1317, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/9d340e85011c4ca589459b5e13ce2614", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the shape of the head of the 4D object?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The head of the 4D object is flat and rectangular." }, { "id": "B", "text": "The head of the 4D object is shaped like a bell." }, { "id": "C", "text": "The head of the 4D object is round and smooth." }, { "id": "D", "text": "The head of the 4D object resembles a siren or loudspeaker." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1318, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/13562ce25bc0467aa035a44d4427510c", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many Bitcoin symbols are present?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "2" }, { "id": "B", "text": "1" }, { "id": "C", "text": "3" }, { "id": "D", "text": "4" } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1319, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8fd6749b00b842748f47b24ee9f81a7d", "description": "Object Counting", "task_type": "Vision-Question-Answer", "question": [ "How many times does the figure perform a flip?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "4" }, { "id": "B", "text": "3" }, { "id": "C", "text": "2" }, { "id": "D", "text": "1" } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1320, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b70650b7164f4ee39254319a15f1b8ec", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the tiltrotor aircraft?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The tiltrotor aircraft takes off from the circular landing pad." }, { "id": "B", "text": "The tiltrotor aircraft flies over the circular landing pad." }, { "id": "C", "text": "The tiltrotor aircraft lands on the circular landing pad." }, { "id": "D", "text": "The tiltrotor aircraft remains stationary on the circular landing pad." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1321, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/bc6895cd8d344aeca83123f2a4096931", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the cat doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The cat is jumping up and down." }, { "id": "B", "text": "The cat is lying down on its side." }, { "id": "C", "text": "The cat is trembling and spinning its body." }, { "id": "D", "text": "The cat is sitting on its hind legs and looking around." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1322, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/3b5faae1d0ce40b5a0eb389ef8d3da95", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "Which of the following best describes the transformation of the piggy bank?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The piggy bank changes its color." }, { "id": "B", "text": "The piggy bank shrinks in size." }, { "id": "C", "text": "The piggy bank takes a step forward and shakes its head." }, { "id": "D", "text": "The piggy bank remains static throughout the video." } ], "answer": [ "D" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1323, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8dc50ffa892d4a41a70d12ff3557e11f", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the color of the creature's head?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The creature's head is beige." }, { "id": "B", "text": "The creature's head is red." }, { "id": "C", "text": "The creature's head is orange." }, { "id": "D", "text": "The creature's head is turquoise." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1324, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1a4d966c1107459194a0ebd6c500c4cf", "description": "Spatial Relationship", "task_type": "Vision-Question-Answer", "question": [ "What happens to the sword?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The sword remains on the pedestal throughout the video." }, { "id": "B", "text": "The sword moves from the pedestal to the floor." }, { "id": "C", "text": "The sword is never seen." }, { "id": "D", "text": "The sword moves from the floor to the pedestal." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1325, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/c2ebe5fa15344785975833ac0c79d005", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is running." }, { "id": "B", "text": "The character is flapping her wings." }, { "id": "C", "text": "The character is standing still." }, { "id": "D", "text": "The character is jumping." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1326, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/b574579d9f5840bd83d6b4abc7333b66", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the robot doing with the blue toothbrush?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The robot is using the blue toothbrush to clean the mirror." }, { "id": "B", "text": "The robot is holding the blue toothbrush in its left hand." }, { "id": "C", "text": "The robot is holding the blue toothbrush in its right hand." }, { "id": "D", "text": "The robot is brushing its teeth with the blue toothbrush." } ], "answer": [ "C" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1327, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5d830b77a1d747ff8f16a9c242f1c065", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the character doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The character is walking on a green rectangular platform." }, { "id": "B", "text": "The character is jumping on a green rectangular platform." }, { "id": "C", "text": "The character is running on a green rectangular platform." }, { "id": "D", "text": "The character is sitting on a green rectangular platform." } ], "answer": [ "B" ], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1328, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/1a1bbd5e3cd746888232fefa46b3f7d1", "description": "Action description", "task_type": "Vision-Question-Answer", "question": [ "What is the object doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The object is remaining static." }, { "id": "B", "text": "The object is spinning rapidly." }, { "id": "C", "text": "The object is changing color." }, { "id": "D", "text": "The object is morphing smoothly over time." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1329, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/5d25fe137c4b4e1e87def0564904a14c", "description": "Sequence-Based", "task_type": "Vision-Question-Answer", "question": [ "What is the action that occurs?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The airplane starts to rotate in mid-air." }, { "id": "B", "text": "The airplane starts to move forward." }, { "id": "C", "text": "The airplane begins to ascend." }, { "id": "D", "text": "The airplane begins to deploy its landing gear." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" }, { "index": 1330, "media_type": "Video", "media_paths": "./data/4D_Object_Question_Answering/8aed3493e0784874ad700cc1cee9c0bd", "description": "Object Visual description", "task_type": "Vision-Question-Answer", "question": [ "What is the tiger doing?" ], "question_type": "multi-choice", "annotations": {}, "options": [ { "id": "A", "text": "The tiger is jumping." }, { "id": "B", "text": "The tiger is sleeping." }, { "id": "C", "text": "The tiger is sitting." }, { "id": "D", "text": "The tiger is shouting." } ], "answer": [], "source": "4D-Bench", "domain": "Embodied_ai" } ]