diff --git a/.gitattributes b/.gitattributes index 34832b75648fde4a03b5ee7b1db774ffe3495a9a..79afb3879bae59b049d33cbdeaf4bd779e83fc19 100644 --- a/.gitattributes +++ b/.gitattributes @@ -52,3 +52,195 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text 10samples/sample_0007/overlays/overlay_accepted.png filter=lfs diff=lfs merge=lfs -text 10samples/sample_0007/overlays/overlay_intended.png filter=lfs diff=lfs merge=lfs -text 10samples/sample_0007/overlays/overlay_measured.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/detect_refine_person_in_red_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/detect_refine_red_illuminated_structure.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/diversify_input_person_in_red_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/diversify_input_red_illuminated_structure.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/main_image.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/ref_person_in_red_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/ref_red_illuminated_structure.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_bright_blue_panel.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_dark_door.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_door_frame.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_patterned_floor.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_patterned_surface.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_forming_heart.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_holding_red_object.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_manipulating_object.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_rectangular_blue_screen.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_background.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_background_drapery.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_surface.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_table_cloth.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_wooden_chair.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_wooden_structure.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/main_image.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_person_in_red_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_red_illuminated_book.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_shadowy_doorway.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_person_in_red_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_red_illuminated_book.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_shadowy_doorway.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/main_image.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_person_in_red_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_red_illuminated_book.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_shadowy_doorway.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_draped_tarp.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_painter_in_cap.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_stacked_storage_boxes.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/diversify_input_draped_tarp.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/main_image.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_blue_light_source.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_draped_tarp.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_paint_roller.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_cutlery_set.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_person_in_jacket.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_person_with_pan.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_pizza_prep_hands.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_wall_cabinet.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_cutlery_set.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_person_in_jacket.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_person_with_pan.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_pizza_prep_hands.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_wall_cabinet.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/main_image.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_cutlery_set.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_person_in_jacket.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_person_with_pan.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_pizza_prep_hands.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_salt_box.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_wall_cabinet.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_large_black_pot.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_person_at_sink.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_wooden_door.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_large_black_pot.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_person_at_sink.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_sink_faucet.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_wooden_door.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/main_image.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_gas_stove.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_large_black_pot.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_person_at_sink.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_silver_spoon.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_sink_faucet.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_water_pitcher.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_wooden_door.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_wrapped_cucumber.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_backpack.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_cable.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_dark_area.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_grid_patterned_floor.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_shadowy_shape.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_textured_fabric_area.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_textured_surface.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_typist.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_backpack.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_blue_lines.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_cable.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_dark_area.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_grid_patterned_floor.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_shadowy_shape.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_textured_fabric_area.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_textured_surface.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_typist.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/main_image.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_backpack.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_light_source.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_lines.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_dark_area.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_electronic_device.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_grid_patterned_floor.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_shadowy_shape.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_small_blue_rectangle.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_textured_fabric_area.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_textured_surface.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_thin_curved_object.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_typist.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_holding_part.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_with_small_object.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_helmeted_figure.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_person_by_screens.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_silhouette_with_phone.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_with_small_object.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_with_smoldering_tool.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_helmeted_figure.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_person_by_screens.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_silhouette_with_phone.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/main_image.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_blue_light_module.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_holding_part.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_passing_object.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_with_small_object.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_with_smoldering_tool.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_helmeted_figure.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_person_by_screens.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_reaching_hands.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_red_device.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_silhouette_with_phone.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_blue_cleaner_bottle.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_person_washing_sink.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_red_sink_mat.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_silver_faucet.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_yellow_dish_gloves.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_blue_cleaner_bottle.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_person_washing_sink.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_red_sink_mat.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_silver_faucet.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_yellow_dish_gloves.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/main_image.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_blue_cleaner_bottle.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_person_washing_sink.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_red_sink_mat.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_silver_faucet.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_yellow_dish_gloves.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_hands_gesturing.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_face_in_red_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_holding_blue_flashlight.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_reading_red_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_standing_background.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_hands_holding_triangular_object.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_holding_blue_flashlight.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_lying_reading_blue_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_reading_red_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_standing_background.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/main_image.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_gesturing.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_holding_small_object.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_holding_triangular_object.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_face_in_red_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_holding_blue_flashlight.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_lying_reading_blue_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_reading_red_light.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_standing_background.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_plastic_water_bottle.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_granite_counter.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_person_hands.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_textured_mat.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_wooden_cabinets.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_granite_counter.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_person_hands.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_textured_mat.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_wall_shelf.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_wooden_cabinets.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/main_image.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_box_grater.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_dirty_plate.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_glass_bottle.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_granite_counter.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_metal_tool.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_person_hands.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_striped_towel.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_textured_mat.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_wall_shelf.png filter=lfs diff=lfs merge=lfs -text +samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_wooden_cabinets.png filter=lfs diff=lfs merge=lfs -text diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/README.md b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ee52237eee8be96154aa14203a65ea21eb38b926 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/README.md @@ -0,0 +1,16 @@ +# samples_v8 + +Generated with `data_recipe_v8.md`: vocabulary-first planning, adaptive canvas selection, structured JSON compose prompts, no identity verification, no gate, SAM white-background reference postprocessing, and strict reference-completeness verification with regenerate-until-pass behavior. + +- chat model: `gcp/google/gemini-3.1-pro-preview` +- image model: `gcp/google/gemini-3-pro-image-preview` +- people references: `white_bg_full_body_front` +- non-person references: `white_bg_encyclopedia_photo` +- SAM postprocess: every generated reference is segmented with `sam_vit_b` and pasted onto pure `#ffffff` background +- reference verify max attempts per subject: `10` +- allowed canvases: `[{"aspect_ratio": "1:1", "size": [1024, 1024], "style": "photorealistic"}, {"aspect_ratio": "4:3", "size": [1152, 864], "style": "photorealistic"}, {"aspect_ratio": "3:4", "size": [864, 1152], "style": "photorealistic"}, {"aspect_ratio": "3:2", "size": [1248, 832], "style": "photorealistic"}, {"aspect_ratio": "2:3", "size": [832, 1248], "style": "photorealistic"}, {"aspect_ratio": "16:9", "size": [1280, 720], "style": "photorealistic"}, {"aspect_ratio": "9:16", "size": [720, 1280], "style": "photorealistic"}]` +- scenario mode: `general` +- pools: `vocab_task_pool`, `plan_pool`, `scene_pool`, `detection_pool`, `reference_pool` +- bbox overlay: `bbox_overlay.png` draws every planned subject bbox; a sample is rejected and regenerated if any planned subject is still missing after VLM detection retries +- detection max attempts per subject: `3` +- launch args: `{"compose_workers": 3, "detect_max_attempts": 3, "detect_workers": 3, "emit_workers": 4, "idle_sleep": 1.0, "image_inflight": 32, "image_interval": 0.05, "image_max_retries": 8, "max_retries": 3, "no_topup": false, "plan_workers": 6, "ref_verify_max_attempts": 10, "reference_workers": 6, "requeue_in_progress": true, "seed": 1781927993, "status_interval": 30.0, "subject_detect_workers": 24, "target_samples": 10}` diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/dataset.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/dataset.json new file mode 100644 index 0000000000000000000000000000000000000000..d37ec3e830e59312f57174657a2228b4ee3a649b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/dataset.json @@ -0,0 +1,3220 @@ +[ + { + "sample_id": "sample_000001", + "target_total": 3, + "target_people": 1, + "target_objects": 2, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 3, + "n_detected": 3, + "n_subjects": 3, + "subjects": [ + { + "name": "person_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76251.npy:person:0", + "source_name": "person", + "source_description": "Visible by their hands, which are illuminated in bright red light, holding a small object. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark setting, illuminated by a red light, holding and interacting with a small dark object.", + "sub_caption": "person: A person, prominently visible by their hands and forearms which are bathed in bright red light, holding and interacting with a small dark electronic object.. Scene role: Operating the small device in the foreground", + "measured_bbox": [ + 0.301, + 0.039, + 0.714, + 0.98 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_person_in_red_light_attempt_01.png", + "output": "references/ref_person_in_red_light.png", + "mask": "references/sam_mask_person_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 324.0, + 26.0, + 699.0, + 1013.0 + ], + "mask_score": 3.392402, + "mask_area_ratio": 0.165197, + "elapsed_seconds": 53.7174 + } + }, + { + "name": "red_illuminated_structure", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_2029.npy:object:1", + "source_name": "red illuminated structure", + "source_description": "A structure on the right side of the image, possibly a wall or barricade, strongly illuminated with red light, featuring a grid-like or textured pattern. Source dataset: Ego4D. Scene context: A dark, possibly outdoor or poorly lit indoor area illuminated by strong blue and red artificial lights.", + "sub_caption": "red illuminated structure: A sturdy wall or barricade-like structure featuring a textured, grid-like pattern, strongly illuminated by deep red ambient light.. Scene role: Forms the textured, atmospheric background behind the person", + "measured_bbox": [ + 0.6096, + 0.0, + 0.9961, + 0.991 + ], + "detection_confidence": "high", + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_illuminated_structure.png", + "raw_ref_image": "references/raw_ref_red_illuminated_structure_attempt_01.png", + "reference_verify": "references/reference_verify_red_illuminated_structure.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_red_illuminated_structure_attempt_01.png", + "output": "references/ref_red_illuminated_structure.png", + "mask": "references/sam_mask_red_illuminated_structure.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 0.0, + 1023.0, + 1023.0 + ], + "mask_score": 1.966617, + "mask_area_ratio": 0.597257, + "elapsed_seconds": 10.2671 + } + }, + { + "name": "green_indicator_light", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_294886.npy:object:1", + "source_name": "green light", + "source_description": "Small, bright green glowing indicator light, possibly an LED, located on the dark structure. Source dataset: Ego4D. Scene context: An extremely dark scene with only a faint silhouette of an object and a small green indicator light visible.", + "sub_caption": "green light: A small, bright green glowing LED indicator light piercing through the darkness.. Scene role: Glowing on a dark piece of equipment beside the person, contrasting sharply with the dominant red lighting", + "measured_bbox": [ + 0.1227, + 0.438, + 0.1605, + 0.4975 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_green_indicator_light.png", + "raw_ref_image": "references/raw_ref_green_indicator_light_attempt_01.png", + "reference_verify": "references/reference_verify_green_indicator_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_green_indicator_light_attempt_01.png", + "output": "references/ref_green_indicator_light.png", + "mask": "references/sam_mask_green_indicator_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 347.0, + 335.0, + 676.0, + 688.0 + ], + "mask_score": 3.439631, + "mask_area_ratio": 0.059673, + "elapsed_seconds": 10.1365 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } + }, + { + "sample_id": "sample_000003", + "target_total": 3, + "target_people": 1, + "target_objects": 2, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 3, + "n_detected": 3, + "n_subjects": 3, + "subjects": [ + { + "name": "person_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_135381.npy:person:0", + "source_name": "person", + "source_description": "Visible hands and arms of a person in a dark setting, illuminated by red light, interacting with objects. Source dataset: Ego4D. Scene context: A close-up view of a person's hands and arms interacting with objects in a dark setting, illuminated by red and white light.", + "sub_caption": "person: Visible hands and lower arms, dramatically illuminated by a deep red light, emerging from the dark surroundings.. Scene role: Interacting with the open book, fingers lightly resting on the visible pages.", + "measured_bbox": [ + 0.0, + 0.0, + 0.5655, + 1.0 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_person_in_red_light_attempt_01.png", + "output": "references/ref_person_in_red_light.png", + "mask": "references/sam_mask_person_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 351.0, + 54.0, + 667.0, + 1015.0 + ], + "mask_score": 3.471897, + "mask_area_ratio": 0.134706, + "elapsed_seconds": 8.8737 + } + }, + { + "name": "red_illuminated_book", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_236888.npy:object:0", + "source_name": "book", + "source_description": "An open book with pages visible, illuminated in a red hue, showing text and some graphical elements. Source dataset: Ego4D. Scene context: A dimly lit scene showing an open book, illuminated by a red light, being held or resting in the darkness.", + "sub_caption": "book: A large, open book with visible pages and faint layout elements, cast in a striking red hue from an overhead light source.. Scene role: Lying open flat on the surface directly beneath the person's hands.", + "measured_bbox": [ + 0.2245, + 0.3971, + 0.6708, + 0.6724 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_illuminated_book.png", + "raw_ref_image": "references/raw_ref_red_illuminated_book_attempt_01.png", + "reference_verify": "references/reference_verify_red_illuminated_book.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_red_illuminated_book_attempt_01.png", + "output": "references/ref_red_illuminated_book.png", + "mask": "references/sam_mask_red_illuminated_book.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 22.0, + 180.0, + 1002.0, + 858.0 + ], + "mask_score": 3.463648, + "mask_area_ratio": 0.502075, + "elapsed_seconds": 7.3171 + } + }, + { + "name": "shadowy_doorway", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_311165.npy:object:2", + "source_name": "doorway or opening", + "source_description": "A darker, arched or rectangular shape on the wall, suggesting an opening to another area. Source dataset: Ego4D. Scene context: A very dark, low-resolution scene indoors, mostly obscured by shadow with some faint brownish light indicating walls or structures.", + "sub_caption": "doorway or opening: A dark, arched doorway outline set into a shadowy, indistinct wall, suggesting a passage to another dark room.. Scene role: Positioned in the out-of-focus background to establish architectural depth behind the main illuminated desk area.", + "measured_bbox": [ + 0.5153, + 0.0267, + 0.8188, + 0.8345 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_shadowy_doorway.png", + "raw_ref_image": "references/raw_ref_shadowy_doorway_attempt_01.png", + "reference_verify": "references/reference_verify_shadowy_doorway.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_shadowy_doorway_attempt_01.png", + "output": "references/ref_shadowy_doorway.png", + "mask": "references/sam_mask_shadowy_doorway.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 141.0, + 11.0, + 899.0, + 969.0 + ], + "mask_score": 3.471354, + "mask_area_ratio": 0.542183, + "elapsed_seconds": 7.2941 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } + }, + { + "sample_id": "sample_000004", + "target_total": 5, + "target_people": 1, + "target_objects": 4, + "canvas_size": [ + 864, + 1152 + ], + "canvas_aspect_ratio": "3:4", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 5, + "n_detected": 5, + "n_subjects": 3, + "subjects": [ + { + "name": "blue_light_source", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_98001.npy:object:0", + "source_name": "blue light source", + "source_description": "A small, blurry blue light, appearing as a faint glow against the dark background. Source dataset: Ego4D. Scene context: A predominantly dark scene with a single, small, indistinct blue light source visible towards the right side.", + "sub_caption": "blue light source: A small, blurry blue light emitting a faint, cool glow against the darkness.. Scene role: Positioned in the deep background to the left, providing an atmospheric back-light and contrasting with the red lighting.", + "measured_bbox": [ + 0.5155, + 0.3724, + 0.553, + 0.4239 + ], + "detection_confidence": "high", + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light_source.png", + "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light_source.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_blue_light_source_attempt_01.png", + "output": "references/ref_blue_light_source.png", + "mask": "references/sam_mask_blue_light_source.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 132.0, + 159.0, + 891.0, + 864.0 + ], + "mask_score": 3.485053, + "mask_area_ratio": 0.292151, + "elapsed_seconds": 9.6106 + } + }, + { + "name": "paint_roller", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_95307.npy:object:0", + "source_name": "paint roller", + "source_description": "A standard paint roller with a dark cylindrical cover and a metal frame connecting to a handle. Source dataset: Ego4D. Scene context: A close-up view of a paint roller against a light-colored wall in a dimly lit setting.", + "sub_caption": "paint roller: A standard paint roller with a dark cylindrical cover, attached to a metal frame and handle.. Scene role: Resting on a nearby surface in the foreground, catching subtle highlights from the red light.", + "measured_bbox": [ + 0.2917, + 0.532, + 0.4405, + 0.7522 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_paint_roller.png", + "raw_ref_image": "references/raw_ref_paint_roller_attempt_01.png", + "reference_verify": "references/reference_verify_paint_roller.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_paint_roller_attempt_01.png", + "output": "references/ref_paint_roller.png", + "mask": "references/sam_mask_paint_roller.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 136.0, + 75.0, + 866.0, + 950.0 + ], + "mask_score": 3.459168, + "mask_area_ratio": 0.116944, + "elapsed_seconds": 7.1624 + } + }, + { + "name": "draped_tarp", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_37499.npy:object:1", + "source_name": "light-colored material", + "source_description": "A draped, light-colored or beige material, possibly part of a tent or covering, visible on the right side. Source dataset: Ego4D. Scene context: The scene is a dark, possibly outdoor or dimly lit setting, showing what appears to be a structure or object covered with a large piece of material or tarp.", + "sub_caption": "light-colored material: A draped, light-colored tarp or drop cloth covering large, indistinct shapes.. Scene role: Draped over items on the right side of the room, adding textural detail and reflecting the mixed ambient lighting.", + "measured_bbox": [ + 0.392, + 0.4207, + 0.8243, + 0.8862 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_draped_tarp.png", + "raw_ref_image": "references/raw_ref_draped_tarp_attempt_01.png", + "reference_verify": "references/reference_verify_draped_tarp.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_draped_tarp_attempt_01.png", + "output": "references/ref_draped_tarp.png", + "mask": "references/sam_mask_draped_tarp.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 189.0, + 1023.0, + 888.0 + ], + "mask_score": 3.482188, + "mask_area_ratio": 0.485075, + "elapsed_seconds": 7.4131 + } + } + ], + "not_emitted": [ + { + "name": "worker_in_cap", + "reason": "not_detected" + }, + { + "name": "stacked_boxes", + "reason": "not_detected" + } + ], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } + }, + { + "sample_id": "sample_000005", + "target_total": 6, + "target_people": 3, + "target_objects": 3, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 6, + "n_detected": 6, + "n_subjects": 6, + "subjects": [ + { + "name": "pizza_prep_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "EPIC-Kitchens:P11_106:person:0", + "source_name": "person", + "source_description": "The person's hands and lower arms are visible at the bottom of the frame, appearing to be in the middle of preparing food. Source dataset: EPIC-Kitchens. Scene context: A top-down view of a wooden kitchen table where two pizzas are being prepared with various ingredients like red onions, mushrooms, and tomatoes.", + "sub_caption": "person: Hands and lower arms visible, preparing a pizza on a wooden surface.. Scene role: In the foreground, actively making a pizza.", + "measured_bbox": [ + 0.532, + 0.2245, + 1.0, + 0.7871 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_pizza_prep_hands.png", + "raw_ref_image": "references/raw_ref_pizza_prep_hands_attempt_02.png", + "reference_verify": "references/reference_verify_pizza_prep_hands.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_pizza_prep_hands_attempt_02.png", + "output": "references/ref_pizza_prep_hands.png", + "mask": "references/sam_mask_pizza_prep_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 349.0, + 14.0, + 677.0, + 1016.0 + ], + "mask_score": 3.451951, + "mask_area_ratio": 0.150558, + "elapsed_seconds": 10.2196 + } + }, + { + "name": "person_in_jacket", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_22577.npy:person:0", + "source_name": "person", + "source_description": "A person visible mostly from the back, wearing a dark jacket over a red shirt with yellow text that includes the words 'STANLEY CUP'. The person has dark hair. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a red shirt with yellow text, seemingly engaged in an activity in a dimly lit indoor setting.", + "sub_caption": "person: Person seen mostly from the back, dark hair, wearing a dark jacket over a red shirt with yellow abstract patterns.. Scene role: Standing in the midground, facing away toward the kitchen cabinets.", + "measured_bbox": [ + 0.0, + 0.0, + 0.361, + 0.675 + ], + "detection_confidence": 1.0, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_in_jacket.png", + "raw_ref_image": "references/raw_ref_person_in_jacket_attempt_02.png", + "reference_verify": "references/reference_verify_person_in_jacket.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_in_jacket_attempt_02.png", + "output": "references/ref_person_in_jacket.png", + "mask": "references/sam_mask_person_in_jacket.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 334.0, + 15.0, + 684.0, + 1023.0 + ], + "mask_score": 3.414528, + "mask_area_ratio": 0.160464, + "elapsed_seconds": 10.2925 + } + }, + { + "name": "person_with_pan", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "source_description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall.", + "sub_caption": "person: Person seen from the torso down, holding a dark round object.. Scene role: Standing in the background near a blue wall.", + "measured_bbox": [ + 0.1195, + 0.0021, + 0.4483, + 0.6302 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_with_pan.png", + "raw_ref_image": "references/raw_ref_person_with_pan_attempt_02.png", + "reference_verify": "references/reference_verify_person_with_pan.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_with_pan_attempt_02.png", + "output": "references/ref_person_with_pan.png", + "mask": "references/sam_mask_person_with_pan.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 334.0, + 18.0, + 690.0, + 1023.0 + ], + "mask_score": 3.408831, + "mask_area_ratio": 0.163625, + "elapsed_seconds": 10.3028 + } + }, + { + "name": "salt_box", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P35_102:object:1", + "source_name": "box of salt", + "source_description": "A small cardboard box with blue and black text, sitting on the counter near the stove. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink containing dirty dishes, a frying pan on a drying rack, and various cooking utensils.", + "sub_caption": "box of salt: A small cardboard box with blue and black graphical patterns, resembling a salt container.. Scene role: Resting on the wooden counter next to the pizza prep area.", + "measured_bbox": [ + 0.5558, + 0.4006, + 0.6966, + 0.4937 + ], + "detection_confidence": 0.5, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_salt_box.png", + "raw_ref_image": "references/raw_ref_salt_box_attempt_01.png", + "reference_verify": "references/reference_verify_salt_box.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_salt_box_attempt_01.png", + "output": "references/ref_salt_box.png", + "mask": "references/sam_mask_salt_box.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 73.0, + 268.0, + 952.0, + 762.0 + ], + "mask_score": 3.471932, + "mask_area_ratio": 0.353847, + "elapsed_seconds": 10.266 + } + }, + { + "name": "cutlery_set", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_103:object:4", + "source_name": "cutlery", + "source_description": "A silver fork and a knife resting on the yellow plate inside the right sink bowl. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground.", + "sub_caption": "cutlery: A silver fork and a knife resting on a yellow plate.. Scene role: Placed on the counter in the foreground left.", + "measured_bbox": [ + 0.1913, + 0.8329, + 0.6116, + 0.9636 + ], + "detection_confidence": "high", + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_cutlery_set.png", + "raw_ref_image": "references/raw_ref_cutlery_set_attempt_02.png", + "reference_verify": "references/reference_verify_cutlery_set.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_cutlery_set_attempt_02.png", + "output": "references/ref_cutlery_set.png", + "mask": "references/sam_mask_cutlery_set.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 28.0, + 122.0, + 1013.0, + 887.0 + ], + "mask_score": 3.237995, + "mask_area_ratio": 0.452152, + "elapsed_seconds": 10.821 + } + }, + { + "name": "wall_cabinet", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_121:object:12", + "source_name": "cabinet", + "source_description": "A wooden cabinet positioned above the counter on the right side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter top with a stove, a pan, a bowl of yellow liquid, a wooden cutting board, a plate of food, a water bottle, a large water jug, and a small metal lid.", + "sub_caption": "cabinet: A wooden cabinet positioned above a kitchen counter.. Scene role: Mounted on the wall in the midground, directly above the counter.", + "measured_bbox": [ + 0.5897, + 0.0017, + 0.9469, + 0.1735 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wall_cabinet.png", + "raw_ref_image": "references/raw_ref_wall_cabinet_attempt_01.png", + "reference_verify": "references/reference_verify_wall_cabinet.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_wall_cabinet_attempt_01.png", + "output": "references/ref_wall_cabinet.png", + "mask": "references/sam_mask_wall_cabinet.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 15.0, + 31.0, + 1008.0, + 1013.0 + ], + "mask_score": 3.398914, + "mask_area_ratio": 0.735002, + "elapsed_seconds": 10.923 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } + }, + { + "sample_id": "sample_000006", + "target_total": 8, + "target_people": 1, + "target_objects": 7, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 8, + "n_detected": 8, + "n_subjects": 6, + "subjects": [ + { + "name": "person_at_sink", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "EPIC-Kitchens:P26_103:person:0", + "source_name": "person", + "source_description": "A person's left hand and lower body, including dark trousers and black shoes with white soles, are visible in the foreground, reaching towards the sink. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground.", + "sub_caption": "person: A person's left hand and lower body wearing dark trousers and black shoes with white soles.. Scene role: Standing in the foreground, reaching their left hand towards the kitchen faucet.", + "measured_bbox": [ + 0.0, + 0.3583, + 0.2441, + 1.0 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_at_sink.png", + "raw_ref_image": "references/raw_ref_person_at_sink_attempt_01.png", + "reference_verify": "references/reference_verify_person_at_sink.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_person_at_sink_attempt_01.png", + "output": "references/ref_person_at_sink.png", + "mask": "references/sam_mask_person_at_sink.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 320.0, + 24.0, + 689.0, + 1002.0 + ], + "mask_score": 3.450755, + "mask_area_ratio": 0.163542, + "elapsed_seconds": 31.0984 + } + }, + { + "name": "wooden_door", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P12_104:object:0", + "source_name": "door", + "source_description": "A light brown wooden door, closed, with a metallic door handle. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a closed wooden door with a metal handle, positioned in a room with a kitchen area visible to the left.", + "sub_caption": "door: A light brown wooden door, closed, with a metallic handle.. Scene role: Visible in the background on the kitchen wall.", + "measured_bbox": [ + 0.2139, + 0.0, + 0.3994, + 0.412 + ], + "detection_confidence": 1.0, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wooden_door.png", + "raw_ref_image": "references/raw_ref_wooden_door_attempt_01.png", + "reference_verify": "references/reference_verify_wooden_door.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wooden_door_attempt_01.png", + "output": "references/ref_wooden_door.png", + "mask": "references/sam_mask_wooden_door.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 256.0, + 26.0, + 767.0, + 1015.0 + ], + "mask_score": 3.446321, + "mask_area_ratio": 0.388947, + "elapsed_seconds": 9.29 + } + }, + { + "name": "wrapped_cucumber", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P37_101:object:1", + "source_name": "cucumber 2", + "source_description": "A long, green cucumber wrapped in clear plastic, resting horizontally on the dark countertop, slightly overlapping the other cucumber and positioned closer to the viewer. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a person holding two whole cucumbers over a dark kitchen counter, with an onion, garlic, a plastic container, a rice cooker, and a living area in the background.", + "sub_caption": "cucumber 2: A long, green cucumber wrapped in clear plastic.. Scene role: Resting horizontally on the dark countertop next to the sink.", + "measured_bbox": [ + 0.2215, + 0.4465, + 0.4029, + 0.5104 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wrapped_cucumber.png", + "raw_ref_image": "references/raw_ref_wrapped_cucumber_attempt_01.png", + "reference_verify": "references/reference_verify_wrapped_cucumber.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wrapped_cucumber_attempt_01.png", + "output": "references/ref_wrapped_cucumber.png", + "mask": "references/sam_mask_wrapped_cucumber.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 33.0, + 397.0, + 1011.0, + 624.0 + ], + "mask_score": 3.430612, + "mask_area_ratio": 0.118574, + "elapsed_seconds": 7.2551 + } + }, + { + "name": "gas_stove", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P22_105:object:12", + "source_name": "stove", + "source_description": "A gas stove with black grates on the left side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a stove, a sink, various utensils, bottles, and cabinets.", + "sub_caption": "stove: A kitchen gas stove with black grates.. Scene role: Positioned along the counter in the background.", + "measured_bbox": [ + 0.03, + 0.345, + 0.318, + 0.444 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_gas_stove.png", + "raw_ref_image": "references/raw_ref_gas_stove_attempt_01.png", + "reference_verify": "references/reference_verify_gas_stove.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_gas_stove_attempt_01.png", + "output": "references/ref_gas_stove.png", + "mask": "references/sam_mask_gas_stove.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 58.0, + 180.0, + 965.0, + 842.0 + ], + "mask_score": 3.470988, + "mask_area_ratio": 0.477615, + "elapsed_seconds": 7.3908 + } + }, + { + "name": "water_pitcher", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P07_107:object:13", + "source_name": "water pitcher", + "source_description": "A clear plastic water pitcher with a white handle and lid, sitting on the counter. Source dataset: EPIC-Kitchens. Scene context: A narrow kitchen space with a dark floor, light cabinets, a sink counter with various items, a radiator on the wall, and a trash can on the floor.", + "sub_caption": "water pitcher: A clear plastic water pitcher with a white handle and lid.. Scene role: Sitting upright on the counter.", + "measured_bbox": [ + 0.3327, + 0.2732, + 0.4536, + 0.4573 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_water_pitcher.png", + "raw_ref_image": "references/raw_ref_water_pitcher_attempt_01.png", + "reference_verify": "references/reference_verify_water_pitcher.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_water_pitcher_attempt_01.png", + "output": "references/ref_water_pitcher.png", + "mask": "references/sam_mask_water_pitcher.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 242.0, + 115.0, + 856.0, + 972.0 + ], + "mask_score": 3.323768, + "mask_area_ratio": 0.450877, + "elapsed_seconds": 7.2849 + } + }, + { + "name": "silver_spoon", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P04_103:object:9", + "source_name": "Silver Spoon", + "source_description": "Small silver metal spoon resting near the black spatula handle. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with a stove, toaster, and various cooking utensils scattered around.", + "sub_caption": "Silver Spoon: A small silver metal spoon.. Scene role: Laying flat on the messy countertop near the cucumber.", + "measured_bbox": [ + 0.3001, + 0.4801, + 0.3988, + 0.539 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_silver_spoon.png", + "raw_ref_image": "references/raw_ref_silver_spoon_attempt_01.png", + "reference_verify": "references/reference_verify_silver_spoon.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_silver_spoon_attempt_01.png", + "output": "references/ref_silver_spoon.png", + "mask": "references/sam_mask_silver_spoon.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 88.0, + 77.0, + 916.0, + 950.0 + ], + "mask_score": 3.449565, + "mask_area_ratio": 0.092279, + "elapsed_seconds": 7.0866 + } + } + ], + "not_emitted": [ + { + "name": "black_pot", + "reason": "not_detected" + }, + { + "name": "kitchen_faucet", + "reason": "not_detected" + } + ], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } + }, + { + "sample_id": "sample_000007", + "target_total": 14, + "target_people": 1, + "target_objects": 13, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 14, + "n_detected": 14, + "n_subjects": 14, + "subjects": [ + { + "name": "typist", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39231.npy:person:0", + "source_name": "typist", + "source_description": "Visible as a dark silhouette with one hand interacting with the illuminated keyboard. Source dataset: Ego4D. Scene context: A close-up view of a person typing on a keyboard illuminated with blue backlighting in a dark room.", + "sub_caption": "typist: Visible as a dark silhouette with one hand interacting with an illuminated keyboard.. Scene role: Typing at the desk in the center of the frame, serving as the main subject of the scene.", + "measured_bbox": [ + 0.5473, + 0.0, + 0.9968, + 1.0 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_typist.png", + "raw_ref_image": "references/raw_ref_typist_attempt_01.png", + "reference_verify": "references/reference_verify_typist.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_typist.png", + "mask": "references/sam_mask_typist.png" + } + }, + { + "name": "textured_fabric_area", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_293237.npy:object:0", + "source_name": "textured fabric area", + "source_description": "A faint blueish, textured area on the left side of the image, possibly fabric or a rough surface. Source dataset: Ego4D. Scene context: An extremely dark scene with minimal visibility, showing only a few faint, blurry shapes and small points of light.", + "sub_caption": "textured fabric area: A faint blueish, textured area that resembles rough fabric.. Scene role: Draped loosely over the back of the typist's chair, catching some of the ambient blue light.", + "measured_bbox": [ + 0.5434, + 0.5078, + 0.7285, + 0.8985 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_textured_fabric_area.png", + "raw_ref_image": "references/raw_ref_textured_fabric_area_attempt_01.png", + "reference_verify": "references/reference_verify_textured_fabric_area.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_textured_fabric_area.png", + "mask": "references/sam_mask_textured_fabric_area.png" + } + }, + { + "name": "textured_surface", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39224.npy:object:0", + "source_name": "textured surface", + "source_description": "A dark surface featuring a repeating pattern of small, raised shapes that catch the faint blue light, resembling a woven or dimpled fabric texture. Source dataset: Ego4D. Scene context: A close-up view of a textured surface, possibly fabric, illuminated by a faint blue light in an otherwise dark environment.", + "sub_caption": "textured surface: A dark surface featuring a repeating pattern of small, raised shapes, resembling a dimpled texture.. Scene role: Acting as a large desk mat or mousepad underneath the glowing keyboard.", + "measured_bbox": [ + 0.3547, + 0.1364, + 0.8811, + 0.479 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_textured_surface.png", + "raw_ref_image": "references/raw_ref_textured_surface_attempt_01.png", + "reference_verify": "references/reference_verify_textured_surface.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_textured_surface.png", + "mask": "references/sam_mask_textured_surface.png" + } + }, + { + "name": "blue_lines", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_260800.npy:object:0", + "source_name": "blue lines", + "source_description": "Faint, indistinct blue lines in the dark. Source dataset: Ego4D. Scene context: A very dark, almost pitch-black scene with only faint, indistinct blue lines visible in the lower right.", + "sub_caption": "blue lines: Faint, indistinct blue lines glowing in the dark.. Scene role: An LED light strip illuminating the edge of the desk.", + "measured_bbox": [ + 0.3714, + 0.3069, + 0.5828, + 0.4931 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_lines.png", + "raw_ref_image": "references/raw_ref_blue_lines_attempt_01.png", + "reference_verify": "references/reference_verify_blue_lines.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_lines.png", + "mask": "references/sam_mask_blue_lines.png" + } + }, + { + "name": "grid_patterned_floor", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_196981.npy:object:0", + "source_name": "floor", + "source_description": "A dark, grid-patterned floor surface, likely made of tiles or a similar material, covering the right side of the image. Source dataset: Ego4D. Scene context: A close-up view of a floor corner with a grid-patterned surface meeting a solid, light-colored wall.", + "sub_caption": "floor: A dark, grid-patterned floor surface, likely made of tiles.. Scene role: Visible in the lower portion of the room beneath the desk area.", + "measured_bbox": [ + 0.0039, + 0.5389, + 0.6911, + 0.9893 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_grid_patterned_floor.png", + "raw_ref_image": "references/raw_ref_grid_patterned_floor_attempt_01.png", + "reference_verify": "references/reference_verify_grid_patterned_floor.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_grid_patterned_floor.png", + "mask": "references/sam_mask_grid_patterned_floor.png" + } + }, + { + "name": "shadowy_shape", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_165180.npy:object:2", + "source_name": "shadowy shape", + "source_description": "A large, unidentifiable shadowy shape in the center of the scene. Source dataset: Ego4D. Scene context: A very dark, low-visibility scene, possibly outdoors at night or in a deeply shadowed area, with vague shapes illuminated by faint ambient light.", + "sub_caption": "shadowy shape: A large, unidentifiable shadowy shape.. Scene role: Looming in the background, suggesting stacked boxes or furniture in the darkness.", + "measured_bbox": [ + 0.0709, + 0.0492, + 0.3072, + 0.2887 + ], + "detection_confidence": 0.8, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_shadowy_shape.png", + "raw_ref_image": "references/raw_ref_shadowy_shape_attempt_01.png", + "reference_verify": "references/reference_verify_shadowy_shape.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_shadowy_shape.png", + "mask": "references/sam_mask_shadowy_shape.png" + } + }, + { + "name": "blue_light_source", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_97566.npy:object:0", + "source_name": "blue light source", + "source_description": "A small, indistinct, hazy blue glowing area in the otherwise black image, appearing somewhat irregular in shape. Source dataset: Ego4D. Scene context: A very dark, almost completely black scene with a single, small, hazy blue glowing object or light source visible towards the right side.", + "sub_caption": "blue light source: A small, hazy blue glowing area, appearing slightly irregular in shape.. Scene role: A glowing component or hub device resting on the desk.", + "measured_bbox": [ + 0.3661, + 0.1534, + 0.448, + 0.2196 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light_source.png", + "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light_source.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_light_source.png", + "mask": "references/sam_mask_blue_light_source.png" + } + }, + { + "name": "thin_curved_object", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_259561.npy:object:0", + "source_name": "thin curved object", + "source_description": "A thin, metallic or reflective curved object held between the hands in the center of the image. Source dataset: Ego4D. Scene context: A close-up view of hands manipulating objects in a very dark setting, with only a few items partially visible under weak lighting.", + "sub_caption": "thin curved object: A thin, metallic or reflective curved object.. Scene role: Resting on the desk near the keyboard, resembling the band of a pair of headphones reflecting the monitor light.", + "measured_bbox": [ + 0.5384, + 0.1284, + 0.674, + 0.181 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_thin_curved_object.png", + "raw_ref_image": "references/raw_ref_thin_curved_object_attempt_01.png", + "reference_verify": "references/reference_verify_thin_curved_object.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_thin_curved_object.png", + "mask": "references/sam_mask_thin_curved_object.png" + } + }, + { + "name": "electronic_device", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "source_description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device.", + "sub_caption": "electronic device: A faint red rectangular shape with some texture, resembling a phone.. Scene role: Lying on the desk near the typist's left arm, casting a slight red glow that contrasts with the blue lights.", + "measured_bbox": [ + 0.4103, + 0.2767, + 0.5015, + 0.3797 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_electronic_device.png", + "raw_ref_image": "references/raw_ref_electronic_device_attempt_01.png", + "reference_verify": "references/reference_verify_electronic_device.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_electronic_device.png", + "mask": "references/sam_mask_electronic_device.png" + } + }, + { + "name": "blue_light", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_98584.npy:object:0", + "source_name": "blue light", + "source_description": "a small blue light source in a dark setting Source dataset: Ego4D. Scene context: A dark image with a small blue light.", + "sub_caption": "blue light: A small blue light source.. Scene role: A standby light on a computer tower sitting on the floor.", + "measured_bbox": [ + 0.523, + 0.747, + 0.539, + 0.788 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light.png", + "raw_ref_image": "references/raw_ref_blue_light_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_light.png", + "mask": "references/sam_mask_blue_light.png" + } + }, + { + "name": "cable", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P02_137:object:11", + "source_name": "cable", + "source_description": "A light-colored cable trailing down from the counter area towards the floor near the dark bag. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a washing machine, a large exercise ball, and various items on a counter near a window.", + "sub_caption": "cable: A light-colored cable trailing down from the counter area towards the floor.. Scene role: Hanging down from the edge of the desk toward the floor, connecting devices.", + "measured_bbox": [ + 0.3307, + 0.1951, + 0.482, + 0.9804 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_cable.png", + "raw_ref_image": "references/raw_ref_cable_attempt_01.png", + "reference_verify": "references/reference_verify_cable.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_cable.png", + "mask": "references/sam_mask_cable.png" + } + }, + { + "name": "backpack", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P07_104:object:15", + "source_name": "backpack", + "source_description": "A blue and black backpack partially visible on the floor in the bottom left corner. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with unwashed dishes, cleaning supplies, a bunch of bananas, and an oven with a colorful towel hanging on its handle.", + "sub_caption": "backpack: A blue and black backpack.. Scene role: Resting quietly on the grid-patterned floor near the trailing cable.", + "measured_bbox": [ + 0.2708, + 0.6305, + 0.4662, + 0.9996 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_backpack.png", + "raw_ref_image": "references/raw_ref_backpack_attempt_01.png", + "reference_verify": "references/reference_verify_backpack.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_backpack.png", + "mask": "references/sam_mask_backpack.png" + } + }, + { + "name": "small_blue_rectangle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_25245.npy:object:1", + "source_name": "small blue rectangle", + "source_description": "A tiny, bright blue rectangular glow in the bottom right corner. Source dataset: Ego4D. Scene context: The image is almost completely dark, with a faint blue shape visible towards the center-right.", + "sub_caption": "small blue rectangle: A tiny, bright blue rectangular glow.. Scene role: A small secondary display or digital clock sitting on the corner of the desk.", + "measured_bbox": [ + 0.7651, + 0.0635, + 0.8412, + 0.1295 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_small_blue_rectangle.png", + "raw_ref_image": "references/raw_ref_small_blue_rectangle_attempt_01.png", + "reference_verify": "references/reference_verify_small_blue_rectangle.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_small_blue_rectangle.png", + "mask": "references/sam_mask_small_blue_rectangle.png" + } + }, + { + "name": "dark_area", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192730.npy:object:3", + "source_name": "dark area", + "source_description": "A deeply shadowed region dominating the left side of the scene. Source dataset: Ego4D. Scene context: A dark room with a bright light reflecting off a wall, possibly near a doorway or closet.", + "sub_caption": "dark area: A deeply shadowed region dominating the space.. Scene role: Filling the left side of the room, creating an atmospheric, isolated mood around the typist's setup.", + "measured_bbox": [ + 0.003, + 0.0, + 0.3044, + 0.5863 + ], + "detection_confidence": 0.8, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_dark_area.png", + "raw_ref_image": "references/raw_ref_dark_area_attempt_09.png", + "reference_verify": "references/reference_verify_dark_area.json", + "reference_verify_passed": true, + "reference_attempts": 9, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_09.png", + "output": "references/ref_dark_area.png", + "mask": "references/sam_mask_dark_area.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 88.0, + 52.0, + 897.0, + 902.0 + ], + "mask_score": 3.468953, + "mask_area_ratio": 0.45142, + "elapsed_seconds": 7.1708 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } + }, + { + "sample_id": "sample_000008", + "target_total": 10, + "target_people": 8, + "target_objects": 2, + "canvas_size": [ + 1280, + 720 + ], + "canvas_aspect_ratio": "16:9", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 10, + "n_detected": 10, + "n_subjects": 10, + "subjects": [ + { + "name": "person_by_screens", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_165518.npy:person:0", + "source_name": "person", + "source_description": "A figure visible in the center, mostly obscured by darkness, wearing dark clothing. Only a faint outline and some lighter parts of clothing or skin are visible. Source dataset: Ego4D. Scene context: A very dark, low-light indoor scene, possibly a room or stage, with a person partially visible in the center and illuminated rectangular panels or screens visible in the background and foreground.", + "sub_caption": "person: a shadowy figure in dark clothing, faintly outlined by the glow of rectangular panels. Scene role: standing in the background, monitoring the glowing screens", + "measured_bbox": [ + 0.1313, + 0.1321, + 0.2804, + 0.5589 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_by_screens.png", + "raw_ref_image": "references/raw_ref_person_by_screens_attempt_02.png", + "reference_verify": "references/reference_verify_person_by_screens.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_person_by_screens_attempt_02.png", + "output": "references/ref_person_by_screens.png", + "mask": "references/sam_mask_person_by_screens.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 339.0, + 55.0, + 687.0, + 1009.0 + ], + "mask_score": 3.450623, + "mask_area_ratio": 0.151286, + "elapsed_seconds": 10.4397 + } + }, + { + "name": "reaching_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282541.npy:person:0", + "source_name": "person", + "source_description": "Visible hands reaching forward. Source dataset: Ego4D. Scene context: A very dark image with red and blue lights, showing a person's hands.", + "sub_caption": "person: a pair of hands reaching forward, catching sharp reflections of red and blue ambient light. Scene role: reaching across the workbench to grab a tool", + "measured_bbox": [ + 0.4313, + 0.3821, + 0.5678, + 0.5521 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_reaching_hands.png", + "raw_ref_image": "references/raw_ref_reaching_hands_attempt_01.png", + "reference_verify": "references/reference_verify_reaching_hands.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_reaching_hands_attempt_01.png", + "output": "references/ref_reaching_hands.png", + "mask": "references/sam_mask_reaching_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 388.0, + 183.0, + 637.0, + 867.0 + ], + "mask_score": 3.461161, + "mask_area_ratio": 0.070283, + "elapsed_seconds": 10.0646 + } + }, + { + "name": "hands_holding_part", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76427.npy:person:0", + "source_name": "person", + "source_description": "Visible only by their hands, which are illuminated by red light. The hands are positioned as if holding or manipulating something. Source dataset: Ego4D. Scene context: A very dark image mostly showing black space, with dim reddish lighting catching what appears to be a person's hands holding an object.", + "sub_caption": "person: hands bathed in deep red light, carefully gripping a mechanical object. Scene role: holding a component steady on the workbench", + "measured_bbox": [ + 0.2769, + 0.4337, + 0.3938, + 0.5372 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_holding_part.png", + "raw_ref_image": "references/raw_ref_hands_holding_part_attempt_01.png", + "reference_verify": "references/reference_verify_hands_holding_part.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_holding_part_attempt_01.png", + "output": "references/ref_hands_holding_part.png", + "mask": "references/sam_mask_hands_holding_part.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 345.0, + 90.0, + 675.0, + 1015.0 + ], + "mask_score": 3.449955, + "mask_area_ratio": 0.13985, + "elapsed_seconds": 9.9678 + } + }, + { + "name": "hands_with_small_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76415.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, illuminated in red light. The left hand is open, and the right hand is partially obscured, appearing to hold a small object. Source dataset: Ego4D. Scene context: Two hands are visible in a dark environment, possibly holding or manipulating a small object.", + "sub_caption": "person: hands illuminated by a red glow, with one hand open and the other pinching a tiny object. Scene role: inspecting a small microchip", + "measured_bbox": [ + 0.3697, + 0.5225, + 0.5251, + 0.7126 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_with_small_object.png", + "raw_ref_image": "references/raw_ref_hands_with_small_object_attempt_01.png", + "reference_verify": "references/reference_verify_hands_with_small_object.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_small_object_attempt_01.png", + "output": "references/ref_hands_with_small_object.png", + "mask": "references/sam_mask_hands_with_small_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 317.0, + 60.0, + 684.0, + 992.0 + ], + "mask_score": 3.441398, + "mask_area_ratio": 0.128698, + "elapsed_seconds": 10.4408 + } + }, + { + "name": "hands_with_smoldering_tool", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_202366.npy:person:0", + "source_name": "person", + "source_description": "Visible only by their hands, illuminated by a red light, holding and manipulating a smoking device. Source dataset: Ego4D. Scene context: A close-up view of hands holding a smoking device, illuminated by a red light in a very dark environment.", + "sub_caption": "person: hands lit by red light, grasping a smoking device or tool. Scene role: soldering a wire, emitting a trail of smoke", + "measured_bbox": [ + 0.7136, + 0.5556, + 0.8779, + 0.7618 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_with_smoldering_tool.png", + "raw_ref_image": "references/raw_ref_hands_with_smoldering_tool_attempt_03.png", + "reference_verify": "references/reference_verify_hands_with_smoldering_tool.json", + "reference_verify_passed": true, + "reference_attempts": 3, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_smoldering_tool_attempt_03.png", + "output": "references/ref_hands_with_smoldering_tool.png", + "mask": "references/sam_mask_hands_with_smoldering_tool.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 344.0, + 43.0, + 685.0, + 1000.0 + ], + "mask_score": 3.455653, + "mask_area_ratio": 0.157475, + "elapsed_seconds": 10.0065 + } + }, + { + "name": "helmeted_figure", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282799.npy:person:0", + "source_name": "person", + "source_description": "A person whose features are mostly obscured by darkness; they are wearing a helmet and what appears to be a clear face shield. Source dataset: Ego4D. Scene context: A highly obscured and poorly lit scene where a person wearing a helmet and a face shield is somewhat visible.", + "sub_caption": "person: a person partially obscured by darkness wearing a protective helmet and a clear face shield reflecting ambient light. Scene role: leaning closely over the workbench to inspect the soldering work", + "measured_bbox": [ + 0.717, + 0.081, + 1.0, + 0.862 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_helmeted_figure.png", + "raw_ref_image": "references/raw_ref_helmeted_figure_attempt_02.png", + "reference_verify": "references/reference_verify_helmeted_figure.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_helmeted_figure_attempt_02.png", + "output": "references/ref_helmeted_figure.png", + "mask": "references/sam_mask_helmeted_figure.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 333.0, + 17.0, + 696.0, + 1018.0 + ], + "mask_score": 3.313146, + "mask_area_ratio": 0.166679, + "elapsed_seconds": 10.3423 + } + }, + { + "name": "silhouette_with_phone", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_2030.npy:person:0", + "source_name": "person", + "source_description": "A person visible mainly as a dark silhouette against the red and purple light, holding a rectangular object that appears to be a phone. Source dataset: Ego4D. Scene context: A dark, low-light scene illuminated by red and purple light, with a person holding a phone.", + "sub_caption": "person: a dark silhouette starkly contrasted against bright red and purple lighting, holding up a rectangular device. Scene role: standing on the side, illuminating the workspace with a phone flashlight", + "measured_bbox": [ + 0.5717, + 0.0262, + 0.857, + 0.6433 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_silhouette_with_phone.png", + "raw_ref_image": "references/raw_ref_silhouette_with_phone_attempt_01.png", + "reference_verify": "references/reference_verify_silhouette_with_phone.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_silhouette_with_phone_attempt_01.png", + "output": "references/ref_silhouette_with_phone.png", + "mask": "references/sam_mask_silhouette_with_phone.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 317.0, + 16.0, + 694.0, + 1018.0 + ], + "mask_score": 3.400937, + "mask_area_ratio": 0.174557, + "elapsed_seconds": 10.3827 + } + }, + { + "name": "hands_passing_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76362.npy:person:0", + "source_name": "person", + "source_description": "Visible by their hands, which are illuminated by a red light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in a very dark environment with some red and blue light reflections.", + "sub_caption": "person: hands bathed in dim red and blue reflections, tightly holding an unidentifiable dark object. Scene role: passing a heavy piece of hardware to another worker", + "measured_bbox": [ + 0.6035, + 0.4687, + 0.7144, + 0.5946 + ], + "detection_confidence": "high", + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_passing_object.png", + "raw_ref_image": "references/raw_ref_hands_passing_object_attempt_02.png", + "reference_verify": "references/reference_verify_hands_passing_object.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_passing_object_attempt_02.png", + "output": "references/ref_hands_passing_object.png", + "mask": "references/sam_mask_hands_passing_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 338.0, + 17.0, + 690.0, + 1014.0 + ], + "mask_score": 3.418609, + "mask_area_ratio": 0.166451, + "elapsed_seconds": 11.3527 + } + }, + { + "name": "blue_light_module", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_25200.npy:object:0", + "source_name": "blue light", + "source_description": "A small, bright blue rectangular light glowing faintly in the lower right area of the dark scene. Source dataset: Ego4D. Scene context: The image is almost completely dark, showing only a small, faint blue rectangular light source near the bottom right.", + "sub_caption": "blue light: a small, intensely bright blue rectangular light glowing through the shadows. Scene role: sitting on the edge of the workbench, casting a blue beam across the tools", + "measured_bbox": [ + 0.2793, + 0.8032, + 0.3903, + 0.9054 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light_module.png", + "raw_ref_image": "references/raw_ref_blue_light_module_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light_module.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_blue_light_module_attempt_01.png", + "output": "references/ref_blue_light_module.png", + "mask": "references/sam_mask_blue_light_module.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 4.0, + 250.0, + 978.0, + 796.0 + ], + "mask_score": 3.46793, + "mask_area_ratio": 0.374003, + "elapsed_seconds": 10.496 + } + }, + { + "name": "red_device", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "source_description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device.", + "sub_caption": "electronic device: a faintly glowing red rectangular electronic device with a textured surface. Scene role: lying flat on the workbench next to the busy hands, functioning as a diagnostic remote", + "measured_bbox": [ + 0.7228, + 0.7939, + 0.902, + 0.8912 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_device.png", + "raw_ref_image": "references/raw_ref_red_device_attempt_01.png", + "reference_verify": "references/reference_verify_red_device.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_red_device_attempt_01.png", + "output": "references/ref_red_device.png", + "mask": "references/sam_mask_red_device.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 76.0, + 282.0, + 989.0, + 762.0 + ], + "mask_score": 3.430848, + "mask_area_ratio": 0.198863, + "elapsed_seconds": 11.2516 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } + }, + { + "sample_id": "sample_000009", + "target_total": 5, + "target_people": 1, + "target_objects": 4, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 5, + "n_detected": 5, + "n_subjects": 5, + "subjects": [ + { + "name": "person_washing_sink", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_120803.npy:person:0", + "source_name": "person", + "source_description": "A person holding an object, only their hands and parts of their dark clothing are visible. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in low light conditions.", + "sub_caption": "person: A person wearing dark, long-sleeved clothing, with only their hands and forearms visible as they reach into the frame.. Scene role: Actively gripping the blue cleaner bottle over the kitchen sink.", + "measured_bbox": [ + 0.0921, + 0.3527, + 1.0, + 1.0 + ], + "detection_confidence": 950, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_washing_sink.png", + "raw_ref_image": "references/raw_ref_person_washing_sink_attempt_01.png", + "reference_verify": "references/reference_verify_person_washing_sink.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_person_washing_sink_attempt_01.png", + "output": "references/ref_person_washing_sink.png", + "mask": "references/sam_mask_person_washing_sink.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 350.0, + 14.0, + 674.0, + 1023.0 + ], + "mask_score": 3.431432, + "mask_area_ratio": 0.156425, + "elapsed_seconds": 10.3393 + } + }, + { + "name": "blue_cleaner_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P30_102:object:6", + "source_name": "cleaner bottle", + "source_description": "Blue plastic bottle with a label, possibly a cleaning product, located behind the sink. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink, dish rack, toaster, cutting board, and various utensils and containers.", + "sub_caption": "cleaner bottle: A bright blue, unlabelled plastic spray bottle with a nozzle top, typical of liquid cleaning solutions.. Scene role: Held in the person's hands, positioned just above the sink basin.", + "measured_bbox": [ + 0.4107, + 0.2797, + 0.632, + 0.8174 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_cleaner_bottle.png", + "raw_ref_image": "references/raw_ref_blue_cleaner_bottle_attempt_01.png", + "reference_verify": "references/reference_verify_blue_cleaner_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_blue_cleaner_bottle_attempt_01.png", + "output": "references/ref_blue_cleaner_bottle.png", + "mask": "references/sam_mask_blue_cleaner_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 327.0, + 55.0, + 696.0, + 968.0 + ], + "mask_score": 3.452606, + "mask_area_ratio": 0.185988, + "elapsed_seconds": 10.6007 + } + }, + { + "name": "red_sink_mat", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P27_104:object:6", + "source_name": "red mat", + "source_description": "A textured red mat lining the bottom of the right sink basin. Source dataset: EPIC-Kitchens. Scene context: A kitchen sink area with dirty dishes in both basins, a hand holding a smartphone recording the scene, and various items on the counter.", + "sub_caption": "red mat: A vibrant red, textured rubber mat featuring a grid or perforated pattern.. Scene role: Placed flat against the bottom of the stainless steel sink basin, visible beneath the hands.", + "measured_bbox": [ + 0.3049, + 0.5104, + 0.8328, + 0.9302 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_sink_mat.png", + "raw_ref_image": "references/raw_ref_red_sink_mat_attempt_01.png", + "reference_verify": "references/reference_verify_red_sink_mat.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_red_sink_mat_attempt_01.png", + "output": "references/ref_red_sink_mat.png", + "mask": "references/sam_mask_red_sink_mat.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 21.0, + 148.0, + 1001.0, + 881.0 + ], + "mask_score": 3.479561, + "mask_area_ratio": 0.582802, + "elapsed_seconds": 10.3866 + } + }, + { + "name": "silver_faucet", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P37_103:object:3", + "source_name": "faucet", + "source_description": "Silver metal kitchen faucet attached to the sink. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter and sink with bowls, raw chicken, and cleaning supplies.", + "sub_caption": "faucet: A polished, curved silver metal kitchen faucet with a standard spout.. Scene role: Rising from the back of the sink counter, partially occluded by the person's hands and bottle.", + "measured_bbox": [ + 0.5344, + 0.0136, + 0.718, + 0.3772 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_silver_faucet.png", + "raw_ref_image": "references/raw_ref_silver_faucet_attempt_01.png", + "reference_verify": "references/reference_verify_silver_faucet.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_silver_faucet_attempt_01.png", + "output": "references/ref_silver_faucet.png", + "mask": "references/sam_mask_silver_faucet.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 115.0, + 62.0, + 895.0, + 985.0 + ], + "mask_score": 3.425959, + "mask_area_ratio": 0.163316, + "elapsed_seconds": 10.2359 + } + }, + { + "name": "yellow_dish_gloves", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P02_128:object:7", + "source_name": "yellow dish gloves", + "source_description": "A pair of yellow rubber gloves lying flat on the countertop near the sink. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen space featuring a washing machine, sink area, and various cleaning and kitchen supplies on countertops and the floor.", + "sub_caption": "yellow dish gloves: A pair of thick, bright yellow rubber dishwashing gloves, slightly crumpled and glossy.. Scene role: Laying flat on the countertop directly next to the sink rim, ready to be worn.", + "measured_bbox": [ + 0.0, + 0.3471, + 0.2191, + 0.8264 + ], + "detection_confidence": 1.0, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_yellow_dish_gloves.png", + "raw_ref_image": "references/raw_ref_yellow_dish_gloves_attempt_01.png", + "reference_verify": "references/reference_verify_yellow_dish_gloves.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_yellow_dish_gloves_attempt_01.png", + "output": "references/ref_yellow_dish_gloves.png", + "mask": "references/sam_mask_yellow_dish_gloves.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 138.0, + 66.0, + 850.0, + 952.0 + ], + "mask_score": 3.462321, + "mask_area_ratio": 0.269736, + "elapsed_seconds": 10.2407 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } + }, + { + "sample_id": "sample_000010", + "target_total": 9, + "target_people": 8, + "target_objects": 1, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 9, + "n_detected": 9, + "n_subjects": 9, + "subjects": [ + { + "name": "person_reading_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_236885.npy:person:0", + "source_name": "person", + "source_description": "A person is reading a book, partially visible in the red light, with only part of their arm and hand shown holding the book. Source dataset: Ego4D. Scene context: A person is reading a book illuminated by a red light in an otherwise dark room.", + "sub_caption": "person: A person partially visible in the shadows, holding and reading a book that is strongly illuminated by a red light.. Scene role: Sitting on the left side of the room, focused intently on reading their book in the red glow.", + "measured_bbox": [ + 0.0127, + 0.1501, + 0.4525, + 0.6235 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_reading_red_light.png", + "raw_ref_image": "references/raw_ref_person_reading_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_reading_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_reading_red_light_attempt_01.png", + "output": "references/ref_person_reading_red_light.png", + "mask": "references/sam_mask_person_reading_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 339.0, + 16.0, + 680.0, + 1023.0 + ], + "mask_score": 3.324489, + "mask_area_ratio": 0.157365, + "elapsed_seconds": 7.1471 + } + }, + { + "name": "person_lying_reading_blue_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_97416.npy:person:0", + "source_name": "person", + "source_description": "A person lying on a bed. Source dataset: Ego4D. Scene context: An indoor scene featuring a person lying on a bed reading a book with a small blue light.", + "sub_caption": "person: A person lying down comfortably, visible in the dark while reading a book illuminated by a small, crisp blue light.. Scene role: Lying on a bed in the back right corner, quietly reading separate from the main group.", + "measured_bbox": [ + 0.6944, + 0.2802, + 0.9406, + 0.4237 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_lying_reading_blue_light.png", + "raw_ref_image": "references/raw_ref_person_lying_reading_blue_light_attempt_02.png", + "reference_verify": "references/reference_verify_person_lying_reading_blue_light.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_lying_reading_blue_light_attempt_02.png", + "output": "references/ref_person_lying_reading_blue_light.png", + "mask": "references/sam_mask_person_lying_reading_blue_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 360.0, + 75.0, + 665.0, + 980.0 + ], + "mask_score": 3.501601, + "mask_area_ratio": 0.137436, + "elapsed_seconds": 7.1743 + } + }, + { + "name": "person_holding_blue_flashlight", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_42340.npy:person:0", + "source_name": "person", + "source_description": "A person is visible in the lower right, holding a blue flashlight. Only their hand and part of their arm, covered by a dark sleeve, are visible. Source dataset: Ego4D. Scene context: A person is holding a lit blue flashlight in a dark environment.", + "sub_caption": "person: An arm covered by a dark sleeve, with the hand firmly holding a bright blue flashlight that cuts through the darkness.. Scene role: Positioned in the lower foreground, pointing the blue flashlight into the room to provide illumination.", + "measured_bbox": [ + 0.222, + 0.574, + 0.493, + 0.981 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_holding_blue_flashlight.png", + "raw_ref_image": "references/raw_ref_person_holding_blue_flashlight_attempt_05.png", + "reference_verify": "references/reference_verify_person_holding_blue_flashlight.json", + "reference_verify_passed": true, + "reference_attempts": 5, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_holding_blue_flashlight_attempt_05.png", + "output": "references/ref_person_holding_blue_flashlight.png", + "mask": "references/sam_mask_person_holding_blue_flashlight.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 331.0, + 17.0, + 704.0, + 1017.0 + ], + "mask_score": 3.354952, + "mask_area_ratio": 0.150028, + "elapsed_seconds": 7.2854 + } + }, + { + "name": "person_standing_background", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "source_description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall.", + "sub_caption": "person: A person seen from the torso down, standing in the dim room and holding a dark round object near a dimly lit blue wall.. Scene role: Standing quietly in the background, observing the room while holding a round object.", + "measured_bbox": [ + 0.4462, + 0.0267, + 0.5613, + 0.5921 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_standing_background.png", + "raw_ref_image": "references/raw_ref_person_standing_background_attempt_01.png", + "reference_verify": "references/reference_verify_person_standing_background.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_standing_background_attempt_01.png", + "output": "references/ref_person_standing_background.png", + "mask": "references/sam_mask_person_standing_background.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 337.0, + 25.0, + 678.0, + 1014.0 + ], + "mask_score": 3.31662, + "mask_area_ratio": 0.134835, + "elapsed_seconds": 7.1397 + } + }, + { + "name": "person_face_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76449.npy:person:0", + "source_name": "person", + "source_description": "Partially visible due to very low lighting; red light illuminates skin on what looks like hands or arms, and possibly part of the face or shoulder. Source dataset: Ego4D. Scene context: A very dark scene with sparse red lighting illuminating what appears to be a person's hands and part of their face or shoulder in the center, and a glowing rectangular object in the upper right corner.", + "sub_caption": "person: A person emerging from the darkness, with deep red light catching the skin on their hands, arms, and part of their face.. Scene role: Sitting near the center of the gathering, looking toward the others while bathed in red ambient light.", + "measured_bbox": [ + 0.5873, + 0.345, + 0.6506, + 0.4705 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_face_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_face_in_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_face_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_face_in_red_light_attempt_01.png", + "output": "references/ref_person_face_in_red_light.png", + "mask": "references/sam_mask_person_face_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 357.0, + 50.0, + 662.0, + 997.0 + ], + "mask_score": 3.468133, + "mask_area_ratio": 0.138924, + "elapsed_seconds": 7.09 + } + }, + { + "name": "hands_holding_small_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_259526.npy:person:0", + "source_name": "hands", + "source_description": "Visible hands, illuminated in reddish light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of hands holding something, with abstract colored shapes or fabrics in the dark background.", + "sub_caption": "hands: A pair of hands illuminated in a reddish light, holding a small object against the dark environment.. Scene role: Sitting within the group, holding a small item in the pool of red light.", + "measured_bbox": [ + 0.5539, + 0.5673, + 0.6564, + 0.6516 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_holding_small_object.png", + "raw_ref_image": "references/raw_ref_hands_holding_small_object_attempt_01.png", + "reference_verify": "references/reference_verify_hands_holding_small_object.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_small_object_attempt_01.png", + "output": "references/ref_hands_holding_small_object.png", + "mask": "references/sam_mask_hands_holding_small_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 347.0, + 66.0, + 678.0, + 1002.0 + ], + "mask_score": 3.446404, + "mask_area_ratio": 0.148472, + "elapsed_seconds": 7.2113 + } + }, + { + "name": "hands_holding_triangular_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76363.npy:person:0", + "source_name": "person", + "source_description": "Only hands are visible, illuminated by a red light source, positioned towards the top right. Source dataset: Ego4D. Scene context: A very dark image showing hands illuminated by red light and a small triangular object with blue and white patterns.", + "sub_caption": "person: Hands clearly visible under a red light source, carefully holding a small triangular object with patterns.. Scene role: Showing the patterned triangular object to the group nearby.", + "measured_bbox": [ + 0.6869, + 0.6745, + 0.898, + 0.9021 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_holding_triangular_object.png", + "raw_ref_image": "references/raw_ref_hands_holding_triangular_object_attempt_05.png", + "reference_verify": "references/reference_verify_hands_holding_triangular_object.json", + "reference_verify_passed": true, + "reference_attempts": 5, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_triangular_object_attempt_05.png", + "output": "references/ref_hands_holding_triangular_object.png", + "mask": "references/sam_mask_hands_holding_triangular_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 371.0, + 176.0, + 647.0, + 904.0 + ], + "mask_score": 3.472167, + "mask_area_ratio": 0.075788, + "elapsed_seconds": 7.6612 + } + }, + { + "name": "hands_gesturing", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282746.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, lit by red light, holding something or gesturing. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark room illuminated by stage lights.", + "sub_caption": "person: A person's hands catching the red stage-like lighting, caught mid-gesture.. Scene role: Actively gesturing and conversing with the central group.", + "measured_bbox": [ + 0.233, + 0.5315, + 0.3583, + 0.6782 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_gesturing.png", + "raw_ref_image": "references/raw_ref_hands_gesturing_attempt_01.png", + "reference_verify": "references/reference_verify_hands_gesturing.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_gesturing_attempt_01.png", + "output": "references/ref_hands_gesturing.png", + "mask": "references/sam_mask_hands_gesturing.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 350.0, + 64.0, + 677.0, + 979.0 + ], + "mask_score": 3.460945, + "mask_area_ratio": 0.132824, + "elapsed_seconds": 7.2309 + } + }, + { + "name": "plastic_water_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_124:object:6", + "source_name": "water bottle", + "source_description": "A clear plastic bottle, likely containing water, with a blue and white label, standing on the counter to the right. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a stovetop with a pan cooking food, accompanied by various kitchen items on the adjacent counter spaces.", + "sub_caption": "water bottle: A clear plastic bottle containing water, catching the colorful reflections of the scattered red and blue lights.. Scene role: Resting upright on the floor in the center of the group, reflecting the dramatic lighting.", + "measured_bbox": [ + 0.5155, + 0.7075, + 0.5653, + 0.9622 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_plastic_water_bottle.png", + "raw_ref_image": "references/raw_ref_plastic_water_bottle_attempt_03.png", + "reference_verify": "references/reference_verify_plastic_water_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 3, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_plastic_water_bottle_attempt_03.png", + "output": "references/ref_plastic_water_bottle.png", + "mask": "references/sam_mask_plastic_water_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 356.0, + 72.0, + 666.0, + 982.0 + ], + "mask_score": 3.471713, + "mask_area_ratio": 0.17085, + "elapsed_seconds": 9.3022 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } + }, + { + "sample_id": "sample_000011", + "target_total": 10, + "target_people": 1, + "target_objects": 9, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 10, + "n_detected": 10, + "n_subjects": 10, + "subjects": [ + { + "name": "person_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76422.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, illuminated by a red light, and they appear to be holding a small object. Source dataset: Ego4D. Scene context: A dark scene with a person's hands holding what appears to be a small device or tool illuminated by red light.", + "sub_caption": "person: The person's hands are visible, illuminated by a strong red light, holding a small object or tool.. Scene role: Working intently, holding the metal tool over the granite countertop.", + "measured_bbox": [ + 0.1787, + 0.2332, + 0.4617, + 0.5792 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_hands.png", + "raw_ref_image": "references/raw_ref_person_hands_attempt_01.png", + "reference_verify": "references/reference_verify_person_hands.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_person_hands_attempt_01.png", + "output": "references/ref_person_hands.png", + "mask": "references/sam_mask_person_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 338.0, + 36.0, + 704.0, + 1013.0 + ], + "mask_score": 3.415329, + "mask_area_ratio": 0.148867, + "elapsed_seconds": 7.2976 + } + }, + { + "name": "textured_mat", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39154.npy:object:0", + "source_name": "textured surface", + "source_description": "A dark surface covered with a repeating pattern of small, raised bumps or dots, illuminated by a blue light. Source dataset: Ego4D. Scene context: A close-up view of a textured surface illuminated with blue light in a dark environment.", + "sub_caption": "textured surface: A dark surface covered with a repeating pattern of small, raised bumps or dots, catching faint blue light reflections.. Scene role: Lying flat on the granite countertop under the person's hands.", + "measured_bbox": [ + 0.1138, + 0.5866, + 0.5947, + 0.7728 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_textured_mat.png", + "raw_ref_image": "references/raw_ref_textured_mat_attempt_01.png", + "reference_verify": "references/reference_verify_textured_mat.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_textured_mat_attempt_01.png", + "output": "references/ref_textured_mat.png", + "mask": "references/sam_mask_textured_mat.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 29.0, + 292.0, + 994.0, + 732.0 + ], + "mask_score": 3.388931, + "mask_area_ratio": 0.280238, + "elapsed_seconds": 7.3108 + } + }, + { + "name": "wall_shelf", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_309243.npy:object:1", + "source_name": "shelf", + "source_description": "A dark, multi-tiered shelf attached to the wall on the right side, holding various unidentifiable small items. Source dataset: Ego4D. Scene context: A dimly lit room illuminated entirely by strong red light, where a person is sitting and holding a child in their lap.", + "sub_caption": "shelf: A dark, multi-tiered shelf attached to the wall, holding various unidentifiable small items.. Scene role: Attached to the wall in the shadowy background above the counter.", + "measured_bbox": [ + 0.5206, + 0.0681, + 0.6781, + 0.3789 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wall_shelf.png", + "raw_ref_image": "references/raw_ref_wall_shelf_attempt_01.png", + "reference_verify": "references/reference_verify_wall_shelf.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wall_shelf_attempt_01.png", + "output": "references/ref_wall_shelf.png", + "mask": "references/sam_mask_wall_shelf.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 176.0, + 42.0, + 861.0, + 940.0 + ], + "mask_score": 3.354082, + "mask_area_ratio": 0.233056, + "elapsed_seconds": 7.1907 + } + }, + { + "name": "striped_towel", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P03_118:object:22", + "source_name": "towel", + "source_description": "A black and white striped towel hanging from the oven door handle. Source dataset: EPIC-Kitchens. Scene context: A narrow, slightly messy kitchen with dark grey countertops, white cabinets, and wooden flooring.", + "sub_caption": "towel: A black and white striped towel.. Scene role: Hanging down from a lower cabinet handle just below the edge of the countertop.", + "measured_bbox": [ + 0.6082, + 0.7266, + 0.7242, + 0.9973 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_striped_towel.png", + "raw_ref_image": "references/raw_ref_striped_towel_attempt_01.png", + "reference_verify": "references/reference_verify_striped_towel.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_striped_towel_attempt_01.png", + "output": "references/ref_striped_towel.png", + "mask": "references/sam_mask_striped_towel.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 125.0, + 53.0, + 897.0, + 971.0 + ], + "mask_score": 3.50075, + "mask_area_ratio": 0.517391, + "elapsed_seconds": 7.4653 + } + }, + { + "name": "glass_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P30_113:object:5", + "source_name": "bottle", + "source_description": "A clear glass bottle with amber liquid standing on the counter near the cutting board. Source dataset: EPIC-Kitchens. Scene context: A person's hand is visible in the foreground of a kitchen with light wood cabinets, dark countertops, a white washing machine, and a white refrigerator.", + "sub_caption": "bottle: A clear glass bottle filled with amber liquid.. Scene role: Standing on the granite countertop to the side, catching dim reflections.", + "measured_bbox": [ + 0.2049, + 0.4778, + 0.2764, + 0.6209 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_glass_bottle.png", + "raw_ref_image": "references/raw_ref_glass_bottle_attempt_01.png", + "reference_verify": "references/reference_verify_glass_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_glass_bottle_attempt_01.png", + "output": "references/ref_glass_bottle.png", + "mask": "references/sam_mask_glass_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 198.0, + 0.0, + 820.0, + 1008.0 + ], + "mask_score": 3.251198, + "mask_area_ratio": 0.458004, + "elapsed_seconds": 7.2584 + } + }, + { + "name": "dirty_plate", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_102:object:5", + "source_name": "plate", + "source_description": "A partially visible plate or flat dish on the table, seemingly containing food or remnants. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a table covered in a blue and green plaid tablecloth, holding various items like bottles and cans, and a dark chair nearby.", + "sub_caption": "plate: A partially visible plate containing food remnants.. Scene role: Sitting on the counter near the bottle in the dim light.", + "measured_bbox": [ + 0.003, + 0.5981, + 0.2, + 0.6793 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_dirty_plate.png", + "raw_ref_image": "references/raw_ref_dirty_plate_attempt_01.png", + "reference_verify": "references/reference_verify_dirty_plate.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_dirty_plate_attempt_01.png", + "output": "references/ref_dirty_plate.png", + "mask": "references/sam_mask_dirty_plate.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 33.0, + 334.0, + 963.0, + 685.0 + ], + "mask_score": 3.317592, + "mask_area_ratio": 0.170906, + "elapsed_seconds": 7.0879 + } + }, + { + "name": "metal_tool", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_15996.npy:object:0", + "source_name": "dark object", + "source_description": "A dark, silhouetted object with a thin, elongated structure, appearing to be a tool like pliers or scissors, visible against a dimly lit reddish background. Source dataset: Ego4D. Scene context: A close-up view of what appears to be a tool or mechanical part in shadows, possibly a pair of pliers.", + "sub_caption": "dark object: A dark, silhouetted tool with a thin, elongated structure resembling pliers.. Scene role: Held firmly by the person's hands under the red light.", + "measured_bbox": [ + 0.203, + 0.348, + 0.476, + 0.468 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_metal_tool.png", + "raw_ref_image": "references/raw_ref_metal_tool_attempt_01.png", + "reference_verify": "references/reference_verify_metal_tool.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_metal_tool_attempt_01.png", + "output": "references/ref_metal_tool.png", + "mask": "references/sam_mask_metal_tool.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 51.0, + 66.0, + 993.0, + 937.0 + ], + "mask_score": 0.918062, + "mask_area_ratio": 0.928193, + "elapsed_seconds": 7.1989 + } + }, + { + "name": "box_grater", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P34_111:object:34", + "source_name": "grater", + "source_description": "A metal box grater partially visible on the far left counter. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen sink area with a window overlooking a patio, featuring various plants, cleaning supplies, and kitchen items.", + "sub_caption": "grater: A metal box grater.. Scene role: Resting on the counter towards the background left.", + "measured_bbox": [ + 0.4842, + 0.4749, + 0.5853, + 0.5496 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_box_grater.png", + "raw_ref_image": "references/raw_ref_box_grater_attempt_01.png", + "reference_verify": "references/reference_verify_box_grater.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_box_grater_attempt_01.png", + "output": "references/ref_box_grater.png", + "mask": "references/sam_mask_box_grater.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 236.0, + 16.0, + 787.0, + 1015.0 + ], + "mask_score": 3.453517, + "mask_area_ratio": 0.346949, + "elapsed_seconds": 7.1571 + } + }, + { + "name": "wooden_cabinets", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P28_106:object:8", + "source_name": "kitchen cabinets (right)", + "source_description": "A row of light brown wooden cabinets with metal handles along the right side. Source dataset: EPIC-Kitchens. Scene context: An overhead view of a kitchen floor with cabinets, a sink, and a dishwasher on the sides.", + "sub_caption": "kitchen cabinets (right): A row of light brown wooden cabinets with metal handles.. Scene role: Lining the right side of the kitchen in the background shadows.", + "measured_bbox": [ + 0.7319, + 0.5034, + 0.9945, + 0.9951 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wooden_cabinets.png", + "raw_ref_image": "references/raw_ref_wooden_cabinets_attempt_01.png", + "reference_verify": "references/reference_verify_wooden_cabinets.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wooden_cabinets_attempt_01.png", + "output": "references/ref_wooden_cabinets.png", + "mask": "references/sam_mask_wooden_cabinets.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 36.0, + 253.0, + 986.0, + 809.0 + ], + "mask_score": 3.454364, + "mask_area_ratio": 0.365833, + "elapsed_seconds": 7.226 + } + }, + { + "name": "granite_counter", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P04_108:object:19", + "source_name": "granite countertop", + "source_description": "A grey, white, and black speckled stone countertop surface visible in the foreground and near the sink/stove. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen scene with items scattered on the counter, floor, and a large blue delivery bag visible on the right.", + "sub_caption": "granite countertop: A grey, white, and black speckled stone countertop.. Scene role: The main horizontal surface spanning the lower half of the frame, holding the scattered items.", + "measured_bbox": [ + 0.003, + 0.5275, + 0.6738, + 0.883 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_granite_counter.png", + "raw_ref_image": "references/raw_ref_granite_counter_attempt_01.png", + "reference_verify": "references/reference_verify_granite_counter.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_granite_counter_attempt_01.png", + "output": "references/ref_granite_counter.png", + "mask": "references/sam_mask_granite_counter.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 71.0, + 1023.0, + 940.0 + ], + "mask_score": 3.480751, + "mask_area_ratio": 0.58655, + "elapsed_seconds": 7.1983 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } + } +] diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/dataset.jsonl b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/dataset.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d3e83c76d3bb6104030dad13258537e77051f272 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/dataset.jsonl @@ -0,0 +1,10 @@ +{"sample_id": "sample_000001", "target_total": 3, "target_people": 1, "target_objects": 2, "canvas_size": [1248, 832], "canvas_aspect_ratio": "3:2", "main_image": "main_image.png", "bbox_overlay": "bbox_overlay.png", "plan": "plan.json", "detections": "detections.json", "vocab_task": "vocab_task.json", "n_planned": 3, "n_detected": 3, "n_subjects": 3, "subjects": [{"name": "person_in_red_light", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_76251.npy:person:0", "source_name": "person", "source_description": "Visible by their hands, which are illuminated in bright red light, holding a small object. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark setting, illuminated by a red light, holding and interacting with a small dark object.", "sub_caption": "person: A person, prominently visible by their hands and forearms which are bathed in bright red light, holding and interacting with a small dark electronic object.. Scene role: Operating the small device in the foreground", "measured_bbox": [0.301, 0.039, 0.714, 0.98], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_in_red_light.png", "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", "reference_verify": "references/reference_verify_person_in_red_light.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_person_in_red_light_attempt_01.png", "output": "references/ref_person_in_red_light.png", "mask": "references/sam_mask_person_in_red_light.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [324.0, 26.0, 699.0, 1013.0], "mask_score": 3.392402, "mask_area_ratio": 0.165197, "elapsed_seconds": 53.7174}}, {"name": "red_illuminated_structure", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_2029.npy:object:1", "source_name": "red illuminated structure", "source_description": "A structure on the right side of the image, possibly a wall or barricade, strongly illuminated with red light, featuring a grid-like or textured pattern. Source dataset: Ego4D. Scene context: A dark, possibly outdoor or poorly lit indoor area illuminated by strong blue and red artificial lights.", "sub_caption": "red illuminated structure: A sturdy wall or barricade-like structure featuring a textured, grid-like pattern, strongly illuminated by deep red ambient light.. Scene role: Forms the textured, atmospheric background behind the person", "measured_bbox": [0.6096, 0.0, 0.9961, 0.991], "detection_confidence": "high", "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_red_illuminated_structure.png", "raw_ref_image": "references/raw_ref_red_illuminated_structure_attempt_01.png", "reference_verify": "references/reference_verify_red_illuminated_structure.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_red_illuminated_structure_attempt_01.png", "output": "references/ref_red_illuminated_structure.png", "mask": "references/sam_mask_red_illuminated_structure.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [0.0, 0.0, 1023.0, 1023.0], "mask_score": 1.966617, "mask_area_ratio": 0.597257, "elapsed_seconds": 10.2671}}, {"name": "green_indicator_light", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_294886.npy:object:1", "source_name": "green light", "source_description": "Small, bright green glowing indicator light, possibly an LED, located on the dark structure. Source dataset: Ego4D. Scene context: An extremely dark scene with only a faint silhouette of an object and a small green indicator light visible.", "sub_caption": "green light: A small, bright green glowing LED indicator light piercing through the darkness.. Scene role: Glowing on a dark piece of equipment beside the person, contrasting sharply with the dominant red lighting", "measured_bbox": [0.1227, 0.438, 0.1605, 0.4975], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_green_indicator_light.png", "raw_ref_image": "references/raw_ref_green_indicator_light_attempt_01.png", "reference_verify": "references/reference_verify_green_indicator_light.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_green_indicator_light_attempt_01.png", "output": "references/ref_green_indicator_light.png", "mask": "references/sam_mask_green_indicator_light.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [347.0, 335.0, 676.0, 688.0], "mask_score": 3.439631, "mask_area_ratio": 0.059673, "elapsed_seconds": 10.1365}}], "not_emitted": [], "model_ids": {"chat_model": "gcp/google/gemini-3.1-pro-preview", "image_model": "gcp/google/gemini-3-pro-image-preview"}} +{"sample_id": "sample_000003", "target_total": 3, "target_people": 1, "target_objects": 2, "canvas_size": [1152, 864], "canvas_aspect_ratio": "4:3", "main_image": "main_image.png", "bbox_overlay": "bbox_overlay.png", "plan": "plan.json", "detections": "detections.json", "vocab_task": "vocab_task.json", "n_planned": 3, "n_detected": 3, "n_subjects": 3, "subjects": [{"name": "person_in_red_light", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_135381.npy:person:0", "source_name": "person", "source_description": "Visible hands and arms of a person in a dark setting, illuminated by red light, interacting with objects. Source dataset: Ego4D. Scene context: A close-up view of a person's hands and arms interacting with objects in a dark setting, illuminated by red and white light.", "sub_caption": "person: Visible hands and lower arms, dramatically illuminated by a deep red light, emerging from the dark surroundings.. Scene role: Interacting with the open book, fingers lightly resting on the visible pages.", "measured_bbox": [0.0, 0.0, 0.5655, 1.0], "detection_confidence": 0.9, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_in_red_light.png", "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", "reference_verify": "references/reference_verify_person_in_red_light.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_person_in_red_light_attempt_01.png", "output": "references/ref_person_in_red_light.png", "mask": "references/sam_mask_person_in_red_light.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [351.0, 54.0, 667.0, 1015.0], "mask_score": 3.471897, "mask_area_ratio": 0.134706, "elapsed_seconds": 8.8737}}, {"name": "red_illuminated_book", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_236888.npy:object:0", "source_name": "book", "source_description": "An open book with pages visible, illuminated in a red hue, showing text and some graphical elements. Source dataset: Ego4D. Scene context: A dimly lit scene showing an open book, illuminated by a red light, being held or resting in the darkness.", "sub_caption": "book: A large, open book with visible pages and faint layout elements, cast in a striking red hue from an overhead light source.. Scene role: Lying open flat on the surface directly beneath the person's hands.", "measured_bbox": [0.2245, 0.3971, 0.6708, 0.6724], "detection_confidence": 0.98, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_red_illuminated_book.png", "raw_ref_image": "references/raw_ref_red_illuminated_book_attempt_01.png", "reference_verify": "references/reference_verify_red_illuminated_book.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_red_illuminated_book_attempt_01.png", "output": "references/ref_red_illuminated_book.png", "mask": "references/sam_mask_red_illuminated_book.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [22.0, 180.0, 1002.0, 858.0], "mask_score": 3.463648, "mask_area_ratio": 0.502075, "elapsed_seconds": 7.3171}}, {"name": "shadowy_doorway", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_311165.npy:object:2", "source_name": "doorway or opening", "source_description": "A darker, arched or rectangular shape on the wall, suggesting an opening to another area. Source dataset: Ego4D. Scene context: A very dark, low-resolution scene indoors, mostly obscured by shadow with some faint brownish light indicating walls or structures.", "sub_caption": "doorway or opening: A dark, arched doorway outline set into a shadowy, indistinct wall, suggesting a passage to another dark room.. Scene role: Positioned in the out-of-focus background to establish architectural depth behind the main illuminated desk area.", "measured_bbox": [0.5153, 0.0267, 0.8188, 0.8345], "detection_confidence": 0.9, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_shadowy_doorway.png", "raw_ref_image": "references/raw_ref_shadowy_doorway_attempt_01.png", "reference_verify": "references/reference_verify_shadowy_doorway.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_shadowy_doorway_attempt_01.png", "output": "references/ref_shadowy_doorway.png", "mask": "references/sam_mask_shadowy_doorway.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [141.0, 11.0, 899.0, 969.0], "mask_score": 3.471354, "mask_area_ratio": 0.542183, "elapsed_seconds": 7.2941}}], "not_emitted": [], "model_ids": {"chat_model": "gcp/google/gemini-3.1-pro-preview", "image_model": "gcp/google/gemini-3-pro-image-preview"}} +{"sample_id": "sample_000004", "target_total": 5, "target_people": 1, "target_objects": 4, "canvas_size": [864, 1152], "canvas_aspect_ratio": "3:4", "main_image": "main_image.png", "bbox_overlay": "bbox_overlay.png", "plan": "plan.json", "detections": "detections.json", "vocab_task": "vocab_task.json", "n_planned": 5, "n_detected": 5, "n_subjects": 3, "subjects": [{"name": "blue_light_source", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_98001.npy:object:0", "source_name": "blue light source", "source_description": "A small, blurry blue light, appearing as a faint glow against the dark background. Source dataset: Ego4D. Scene context: A predominantly dark scene with a single, small, indistinct blue light source visible towards the right side.", "sub_caption": "blue light source: A small, blurry blue light emitting a faint, cool glow against the darkness.. Scene role: Positioned in the deep background to the left, providing an atmospheric back-light and contrasting with the red lighting.", "measured_bbox": [0.5155, 0.3724, 0.553, 0.4239], "detection_confidence": "high", "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_blue_light_source.png", "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", "reference_verify": "references/reference_verify_blue_light_source.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_blue_light_source_attempt_01.png", "output": "references/ref_blue_light_source.png", "mask": "references/sam_mask_blue_light_source.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [132.0, 159.0, 891.0, 864.0], "mask_score": 3.485053, "mask_area_ratio": 0.292151, "elapsed_seconds": 9.6106}}, {"name": "paint_roller", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_95307.npy:object:0", "source_name": "paint roller", "source_description": "A standard paint roller with a dark cylindrical cover and a metal frame connecting to a handle. Source dataset: Ego4D. Scene context: A close-up view of a paint roller against a light-colored wall in a dimly lit setting.", "sub_caption": "paint roller: A standard paint roller with a dark cylindrical cover, attached to a metal frame and handle.. Scene role: Resting on a nearby surface in the foreground, catching subtle highlights from the red light.", "measured_bbox": [0.2917, 0.532, 0.4405, 0.7522], "detection_confidence": 0.99, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_paint_roller.png", "raw_ref_image": "references/raw_ref_paint_roller_attempt_01.png", "reference_verify": "references/reference_verify_paint_roller.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_paint_roller_attempt_01.png", "output": "references/ref_paint_roller.png", "mask": "references/sam_mask_paint_roller.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [136.0, 75.0, 866.0, 950.0], "mask_score": 3.459168, "mask_area_ratio": 0.116944, "elapsed_seconds": 7.1624}}, {"name": "draped_tarp", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_37499.npy:object:1", "source_name": "light-colored material", "source_description": "A draped, light-colored or beige material, possibly part of a tent or covering, visible on the right side. Source dataset: Ego4D. Scene context: The scene is a dark, possibly outdoor or dimly lit setting, showing what appears to be a structure or object covered with a large piece of material or tarp.", "sub_caption": "light-colored material: A draped, light-colored tarp or drop cloth covering large, indistinct shapes.. Scene role: Draped over items on the right side of the room, adding textural detail and reflecting the mixed ambient lighting.", "measured_bbox": [0.392, 0.4207, 0.8243, 0.8862], "detection_confidence": 0.98, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_draped_tarp.png", "raw_ref_image": "references/raw_ref_draped_tarp_attempt_01.png", "reference_verify": "references/reference_verify_draped_tarp.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_draped_tarp_attempt_01.png", "output": "references/ref_draped_tarp.png", "mask": "references/sam_mask_draped_tarp.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [0.0, 189.0, 1023.0, 888.0], "mask_score": 3.482188, "mask_area_ratio": 0.485075, "elapsed_seconds": 7.4131}}], "not_emitted": [{"name": "worker_in_cap", "reason": "not_detected"}, {"name": "stacked_boxes", "reason": "not_detected"}], "model_ids": {"chat_model": "gcp/google/gemini-3.1-pro-preview", "image_model": "gcp/google/gemini-3-pro-image-preview"}} +{"sample_id": "sample_000005", "target_total": 6, "target_people": 3, "target_objects": 3, "canvas_size": [1152, 864], "canvas_aspect_ratio": "4:3", "main_image": "main_image.png", "bbox_overlay": "bbox_overlay.png", "plan": "plan.json", "detections": "detections.json", "vocab_task": "vocab_task.json", "n_planned": 6, "n_detected": 6, "n_subjects": 6, "subjects": [{"name": "pizza_prep_hands", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "EPIC-Kitchens:P11_106:person:0", "source_name": "person", "source_description": "The person's hands and lower arms are visible at the bottom of the frame, appearing to be in the middle of preparing food. Source dataset: EPIC-Kitchens. Scene context: A top-down view of a wooden kitchen table where two pizzas are being prepared with various ingredients like red onions, mushrooms, and tomatoes.", "sub_caption": "person: Hands and lower arms visible, preparing a pizza on a wooden surface.. Scene role: In the foreground, actively making a pizza.", "measured_bbox": [0.532, 0.2245, 1.0, 0.7871], "detection_confidence": 0.9, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_pizza_prep_hands.png", "raw_ref_image": "references/raw_ref_pizza_prep_hands_attempt_02.png", "reference_verify": "references/reference_verify_pizza_prep_hands.json", "reference_verify_passed": true, "reference_attempts": 2, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_pizza_prep_hands_attempt_02.png", "output": "references/ref_pizza_prep_hands.png", "mask": "references/sam_mask_pizza_prep_hands.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [349.0, 14.0, 677.0, 1016.0], "mask_score": 3.451951, "mask_area_ratio": 0.150558, "elapsed_seconds": 10.2196}}, {"name": "person_in_jacket", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_22577.npy:person:0", "source_name": "person", "source_description": "A person visible mostly from the back, wearing a dark jacket over a red shirt with yellow text that includes the words 'STANLEY CUP'. The person has dark hair. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a red shirt with yellow text, seemingly engaged in an activity in a dimly lit indoor setting.", "sub_caption": "person: Person seen mostly from the back, dark hair, wearing a dark jacket over a red shirt with yellow abstract patterns.. Scene role: Standing in the midground, facing away toward the kitchen cabinets.", "measured_bbox": [0.0, 0.0, 0.361, 0.675], "detection_confidence": 1.0, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_in_jacket.png", "raw_ref_image": "references/raw_ref_person_in_jacket_attempt_02.png", "reference_verify": "references/reference_verify_person_in_jacket.json", "reference_verify_passed": true, "reference_attempts": 2, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_in_jacket_attempt_02.png", "output": "references/ref_person_in_jacket.png", "mask": "references/sam_mask_person_in_jacket.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [334.0, 15.0, 684.0, 1023.0], "mask_score": 3.414528, "mask_area_ratio": 0.160464, "elapsed_seconds": 10.2925}}, {"name": "person_with_pan", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", "source_name": "person", "source_description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall.", "sub_caption": "person: Person seen from the torso down, holding a dark round object.. Scene role: Standing in the background near a blue wall.", "measured_bbox": [0.1195, 0.0021, 0.4483, 0.6302], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_with_pan.png", "raw_ref_image": "references/raw_ref_person_with_pan_attempt_02.png", "reference_verify": "references/reference_verify_person_with_pan.json", "reference_verify_passed": true, "reference_attempts": 2, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_with_pan_attempt_02.png", "output": "references/ref_person_with_pan.png", "mask": "references/sam_mask_person_with_pan.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [334.0, 18.0, 690.0, 1023.0], "mask_score": 3.408831, "mask_area_ratio": 0.163625, "elapsed_seconds": 10.3028}}, {"name": "salt_box", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P35_102:object:1", "source_name": "box of salt", "source_description": "A small cardboard box with blue and black text, sitting on the counter near the stove. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink containing dirty dishes, a frying pan on a drying rack, and various cooking utensils.", "sub_caption": "box of salt: A small cardboard box with blue and black graphical patterns, resembling a salt container.. Scene role: Resting on the wooden counter next to the pizza prep area.", "measured_bbox": [0.5558, 0.4006, 0.6966, 0.4937], "detection_confidence": 0.5, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_salt_box.png", "raw_ref_image": "references/raw_ref_salt_box_attempt_01.png", "reference_verify": "references/reference_verify_salt_box.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_salt_box_attempt_01.png", "output": "references/ref_salt_box.png", "mask": "references/sam_mask_salt_box.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [73.0, 268.0, 952.0, 762.0], "mask_score": 3.471932, "mask_area_ratio": 0.353847, "elapsed_seconds": 10.266}}, {"name": "cutlery_set", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P26_103:object:4", "source_name": "cutlery", "source_description": "A silver fork and a knife resting on the yellow plate inside the right sink bowl. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground.", "sub_caption": "cutlery: A silver fork and a knife resting on a yellow plate.. Scene role: Placed on the counter in the foreground left.", "measured_bbox": [0.1913, 0.8329, 0.6116, 0.9636], "detection_confidence": "high", "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_cutlery_set.png", "raw_ref_image": "references/raw_ref_cutlery_set_attempt_02.png", "reference_verify": "references/reference_verify_cutlery_set.json", "reference_verify_passed": true, "reference_attempts": 2, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_cutlery_set_attempt_02.png", "output": "references/ref_cutlery_set.png", "mask": "references/sam_mask_cutlery_set.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [28.0, 122.0, 1013.0, 887.0], "mask_score": 3.237995, "mask_area_ratio": 0.452152, "elapsed_seconds": 10.821}}, {"name": "wall_cabinet", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P26_121:object:12", "source_name": "cabinet", "source_description": "A wooden cabinet positioned above the counter on the right side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter top with a stove, a pan, a bowl of yellow liquid, a wooden cutting board, a plate of food, a water bottle, a large water jug, and a small metal lid.", "sub_caption": "cabinet: A wooden cabinet positioned above a kitchen counter.. Scene role: Mounted on the wall in the midground, directly above the counter.", "measured_bbox": [0.5897, 0.0017, 0.9469, 0.1735], "detection_confidence": 0.99, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_wall_cabinet.png", "raw_ref_image": "references/raw_ref_wall_cabinet_attempt_01.png", "reference_verify": "references/reference_verify_wall_cabinet.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_wall_cabinet_attempt_01.png", "output": "references/ref_wall_cabinet.png", "mask": "references/sam_mask_wall_cabinet.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [15.0, 31.0, 1008.0, 1013.0], "mask_score": 3.398914, "mask_area_ratio": 0.735002, "elapsed_seconds": 10.923}}], "not_emitted": [], "model_ids": {"chat_model": "gcp/google/gemini-3.1-pro-preview", "image_model": "gcp/google/gemini-3-pro-image-preview"}} +{"sample_id": "sample_000006", "target_total": 8, "target_people": 1, "target_objects": 7, "canvas_size": [1152, 864], "canvas_aspect_ratio": "4:3", "main_image": "main_image.png", "bbox_overlay": "bbox_overlay.png", "plan": "plan.json", "detections": "detections.json", "vocab_task": "vocab_task.json", "n_planned": 8, "n_detected": 8, "n_subjects": 6, "subjects": [{"name": "person_at_sink", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "EPIC-Kitchens:P26_103:person:0", "source_name": "person", "source_description": "A person's left hand and lower body, including dark trousers and black shoes with white soles, are visible in the foreground, reaching towards the sink. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground.", "sub_caption": "person: A person's left hand and lower body wearing dark trousers and black shoes with white soles.. Scene role: Standing in the foreground, reaching their left hand towards the kitchen faucet.", "measured_bbox": [0.0, 0.3583, 0.2441, 1.0], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_at_sink.png", "raw_ref_image": "references/raw_ref_person_at_sink_attempt_01.png", "reference_verify": "references/reference_verify_person_at_sink.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_person_at_sink_attempt_01.png", "output": "references/ref_person_at_sink.png", "mask": "references/sam_mask_person_at_sink.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [320.0, 24.0, 689.0, 1002.0], "mask_score": 3.450755, "mask_area_ratio": 0.163542, "elapsed_seconds": 31.0984}}, {"name": "wooden_door", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P12_104:object:0", "source_name": "door", "source_description": "A light brown wooden door, closed, with a metallic door handle. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a closed wooden door with a metal handle, positioned in a room with a kitchen area visible to the left.", "sub_caption": "door: A light brown wooden door, closed, with a metallic handle.. Scene role: Visible in the background on the kitchen wall.", "measured_bbox": [0.2139, 0.0, 0.3994, 0.412], "detection_confidence": 1.0, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_wooden_door.png", "raw_ref_image": "references/raw_ref_wooden_door_attempt_01.png", "reference_verify": "references/reference_verify_wooden_door.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wooden_door_attempt_01.png", "output": "references/ref_wooden_door.png", "mask": "references/sam_mask_wooden_door.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [256.0, 26.0, 767.0, 1015.0], "mask_score": 3.446321, "mask_area_ratio": 0.388947, "elapsed_seconds": 9.29}}, {"name": "wrapped_cucumber", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P37_101:object:1", "source_name": "cucumber 2", "source_description": "A long, green cucumber wrapped in clear plastic, resting horizontally on the dark countertop, slightly overlapping the other cucumber and positioned closer to the viewer. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a person holding two whole cucumbers over a dark kitchen counter, with an onion, garlic, a plastic container, a rice cooker, and a living area in the background.", "sub_caption": "cucumber 2: A long, green cucumber wrapped in clear plastic.. Scene role: Resting horizontally on the dark countertop next to the sink.", "measured_bbox": [0.2215, 0.4465, 0.4029, 0.5104], "detection_confidence": 0.99, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_wrapped_cucumber.png", "raw_ref_image": "references/raw_ref_wrapped_cucumber_attempt_01.png", "reference_verify": "references/reference_verify_wrapped_cucumber.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wrapped_cucumber_attempt_01.png", "output": "references/ref_wrapped_cucumber.png", "mask": "references/sam_mask_wrapped_cucumber.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [33.0, 397.0, 1011.0, 624.0], "mask_score": 3.430612, "mask_area_ratio": 0.118574, "elapsed_seconds": 7.2551}}, {"name": "gas_stove", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P22_105:object:12", "source_name": "stove", "source_description": "A gas stove with black grates on the left side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a stove, a sink, various utensils, bottles, and cabinets.", "sub_caption": "stove: A kitchen gas stove with black grates.. Scene role: Positioned along the counter in the background.", "measured_bbox": [0.03, 0.345, 0.318, 0.444], "detection_confidence": 0.99, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_gas_stove.png", "raw_ref_image": "references/raw_ref_gas_stove_attempt_01.png", "reference_verify": "references/reference_verify_gas_stove.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_gas_stove_attempt_01.png", "output": "references/ref_gas_stove.png", "mask": "references/sam_mask_gas_stove.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [58.0, 180.0, 965.0, 842.0], "mask_score": 3.470988, "mask_area_ratio": 0.477615, "elapsed_seconds": 7.3908}}, {"name": "water_pitcher", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P07_107:object:13", "source_name": "water pitcher", "source_description": "A clear plastic water pitcher with a white handle and lid, sitting on the counter. Source dataset: EPIC-Kitchens. Scene context: A narrow kitchen space with a dark floor, light cabinets, a sink counter with various items, a radiator on the wall, and a trash can on the floor.", "sub_caption": "water pitcher: A clear plastic water pitcher with a white handle and lid.. Scene role: Sitting upright on the counter.", "measured_bbox": [0.3327, 0.2732, 0.4536, 0.4573], "detection_confidence": 0.99, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_water_pitcher.png", "raw_ref_image": "references/raw_ref_water_pitcher_attempt_01.png", "reference_verify": "references/reference_verify_water_pitcher.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_water_pitcher_attempt_01.png", "output": "references/ref_water_pitcher.png", "mask": "references/sam_mask_water_pitcher.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [242.0, 115.0, 856.0, 972.0], "mask_score": 3.323768, "mask_area_ratio": 0.450877, "elapsed_seconds": 7.2849}}, {"name": "silver_spoon", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P04_103:object:9", "source_name": "Silver Spoon", "source_description": "Small silver metal spoon resting near the black spatula handle. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with a stove, toaster, and various cooking utensils scattered around.", "sub_caption": "Silver Spoon: A small silver metal spoon.. Scene role: Laying flat on the messy countertop near the cucumber.", "measured_bbox": [0.3001, 0.4801, 0.3988, 0.539], "detection_confidence": 0.99, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_silver_spoon.png", "raw_ref_image": "references/raw_ref_silver_spoon_attempt_01.png", "reference_verify": "references/reference_verify_silver_spoon.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_silver_spoon_attempt_01.png", "output": "references/ref_silver_spoon.png", "mask": "references/sam_mask_silver_spoon.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [88.0, 77.0, 916.0, 950.0], "mask_score": 3.449565, "mask_area_ratio": 0.092279, "elapsed_seconds": 7.0866}}], "not_emitted": [{"name": "black_pot", "reason": "not_detected"}, {"name": "kitchen_faucet", "reason": "not_detected"}], "model_ids": {"chat_model": "gcp/google/gemini-3.1-pro-preview", "image_model": "gcp/google/gemini-3-pro-image-preview"}} +{"sample_id": "sample_000007", "target_total": 14, "target_people": 1, "target_objects": 13, "canvas_size": [1248, 832], "canvas_aspect_ratio": "3:2", "main_image": "main_image.png", "bbox_overlay": "bbox_overlay.png", "plan": "plan.json", "detections": "detections.json", "vocab_task": "vocab_task.json", "n_planned": 14, "n_detected": 14, "n_subjects": 14, "subjects": [{"name": "typist", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_39231.npy:person:0", "source_name": "typist", "source_description": "Visible as a dark silhouette with one hand interacting with the illuminated keyboard. Source dataset: Ego4D. Scene context: A close-up view of a person typing on a keyboard illuminated with blue backlighting in a dark room.", "sub_caption": "typist: Visible as a dark silhouette with one hand interacting with an illuminated keyboard.. Scene role: Typing at the desk in the center of the frame, serving as the main subject of the scene.", "measured_bbox": [0.5473, 0.0, 0.9968, 1.0], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_typist.png", "raw_ref_image": "references/raw_ref_typist_attempt_01.png", "reference_verify": "references/reference_verify_typist.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_typist.png", "mask": "references/sam_mask_typist.png"}}, {"name": "textured_fabric_area", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_293237.npy:object:0", "source_name": "textured fabric area", "source_description": "A faint blueish, textured area on the left side of the image, possibly fabric or a rough surface. Source dataset: Ego4D. Scene context: An extremely dark scene with minimal visibility, showing only a few faint, blurry shapes and small points of light.", "sub_caption": "textured fabric area: A faint blueish, textured area that resembles rough fabric.. Scene role: Draped loosely over the back of the typist's chair, catching some of the ambient blue light.", "measured_bbox": [0.5434, 0.5078, 0.7285, 0.8985], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_textured_fabric_area.png", "raw_ref_image": "references/raw_ref_textured_fabric_area_attempt_01.png", "reference_verify": "references/reference_verify_textured_fabric_area.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_textured_fabric_area.png", "mask": "references/sam_mask_textured_fabric_area.png"}}, {"name": "textured_surface", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_39224.npy:object:0", "source_name": "textured surface", "source_description": "A dark surface featuring a repeating pattern of small, raised shapes that catch the faint blue light, resembling a woven or dimpled fabric texture. Source dataset: Ego4D. Scene context: A close-up view of a textured surface, possibly fabric, illuminated by a faint blue light in an otherwise dark environment.", "sub_caption": "textured surface: A dark surface featuring a repeating pattern of small, raised shapes, resembling a dimpled texture.. Scene role: Acting as a large desk mat or mousepad underneath the glowing keyboard.", "measured_bbox": [0.3547, 0.1364, 0.8811, 0.479], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_textured_surface.png", "raw_ref_image": "references/raw_ref_textured_surface_attempt_01.png", "reference_verify": "references/reference_verify_textured_surface.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_textured_surface.png", "mask": "references/sam_mask_textured_surface.png"}}, {"name": "blue_lines", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_260800.npy:object:0", "source_name": "blue lines", "source_description": "Faint, indistinct blue lines in the dark. Source dataset: Ego4D. Scene context: A very dark, almost pitch-black scene with only faint, indistinct blue lines visible in the lower right.", "sub_caption": "blue lines: Faint, indistinct blue lines glowing in the dark.. Scene role: An LED light strip illuminating the edge of the desk.", "measured_bbox": [0.3714, 0.3069, 0.5828, 0.4931], "detection_confidence": 0.9, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_blue_lines.png", "raw_ref_image": "references/raw_ref_blue_lines_attempt_01.png", "reference_verify": "references/reference_verify_blue_lines.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_blue_lines.png", "mask": "references/sam_mask_blue_lines.png"}}, {"name": "grid_patterned_floor", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_196981.npy:object:0", "source_name": "floor", "source_description": "A dark, grid-patterned floor surface, likely made of tiles or a similar material, covering the right side of the image. Source dataset: Ego4D. Scene context: A close-up view of a floor corner with a grid-patterned surface meeting a solid, light-colored wall.", "sub_caption": "floor: A dark, grid-patterned floor surface, likely made of tiles.. Scene role: Visible in the lower portion of the room beneath the desk area.", "measured_bbox": [0.0039, 0.5389, 0.6911, 0.9893], "detection_confidence": 0.9, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_grid_patterned_floor.png", "raw_ref_image": "references/raw_ref_grid_patterned_floor_attempt_01.png", "reference_verify": "references/reference_verify_grid_patterned_floor.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_grid_patterned_floor.png", "mask": "references/sam_mask_grid_patterned_floor.png"}}, {"name": "shadowy_shape", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_165180.npy:object:2", "source_name": "shadowy shape", "source_description": "A large, unidentifiable shadowy shape in the center of the scene. Source dataset: Ego4D. Scene context: A very dark, low-visibility scene, possibly outdoors at night or in a deeply shadowed area, with vague shapes illuminated by faint ambient light.", "sub_caption": "shadowy shape: A large, unidentifiable shadowy shape.. Scene role: Looming in the background, suggesting stacked boxes or furniture in the darkness.", "measured_bbox": [0.0709, 0.0492, 0.3072, 0.2887], "detection_confidence": 0.8, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_shadowy_shape.png", "raw_ref_image": "references/raw_ref_shadowy_shape_attempt_01.png", "reference_verify": "references/reference_verify_shadowy_shape.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_shadowy_shape.png", "mask": "references/sam_mask_shadowy_shape.png"}}, {"name": "blue_light_source", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_97566.npy:object:0", "source_name": "blue light source", "source_description": "A small, indistinct, hazy blue glowing area in the otherwise black image, appearing somewhat irregular in shape. Source dataset: Ego4D. Scene context: A very dark, almost completely black scene with a single, small, hazy blue glowing object or light source visible towards the right side.", "sub_caption": "blue light source: A small, hazy blue glowing area, appearing slightly irregular in shape.. Scene role: A glowing component or hub device resting on the desk.", "measured_bbox": [0.3661, 0.1534, 0.448, 0.2196], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_blue_light_source.png", "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", "reference_verify": "references/reference_verify_blue_light_source.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_blue_light_source.png", "mask": "references/sam_mask_blue_light_source.png"}}, {"name": "thin_curved_object", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_259561.npy:object:0", "source_name": "thin curved object", "source_description": "A thin, metallic or reflective curved object held between the hands in the center of the image. Source dataset: Ego4D. Scene context: A close-up view of hands manipulating objects in a very dark setting, with only a few items partially visible under weak lighting.", "sub_caption": "thin curved object: A thin, metallic or reflective curved object.. Scene role: Resting on the desk near the keyboard, resembling the band of a pair of headphones reflecting the monitor light.", "measured_bbox": [0.5384, 0.1284, 0.674, 0.181], "detection_confidence": 0.9, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_thin_curved_object.png", "raw_ref_image": "references/raw_ref_thin_curved_object_attempt_01.png", "reference_verify": "references/reference_verify_thin_curved_object.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_thin_curved_object.png", "mask": "references/sam_mask_thin_curved_object.png"}}, {"name": "electronic_device", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", "source_name": "electronic device", "source_description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device.", "sub_caption": "electronic device: A faint red rectangular shape with some texture, resembling a phone.. Scene role: Lying on the desk near the typist's left arm, casting a slight red glow that contrasts with the blue lights.", "measured_bbox": [0.4103, 0.2767, 0.5015, 0.3797], "detection_confidence": 0.99, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_electronic_device.png", "raw_ref_image": "references/raw_ref_electronic_device_attempt_01.png", "reference_verify": "references/reference_verify_electronic_device.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_electronic_device.png", "mask": "references/sam_mask_electronic_device.png"}}, {"name": "blue_light", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_98584.npy:object:0", "source_name": "blue light", "source_description": "a small blue light source in a dark setting Source dataset: Ego4D. Scene context: A dark image with a small blue light.", "sub_caption": "blue light: A small blue light source.. Scene role: A standby light on a computer tower sitting on the floor.", "measured_bbox": [0.523, 0.747, 0.539, 0.788], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_blue_light.png", "raw_ref_image": "references/raw_ref_blue_light_attempt_01.png", "reference_verify": "references/reference_verify_blue_light.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_blue_light.png", "mask": "references/sam_mask_blue_light.png"}}, {"name": "cable", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P02_137:object:11", "source_name": "cable", "source_description": "A light-colored cable trailing down from the counter area towards the floor near the dark bag. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a washing machine, a large exercise ball, and various items on a counter near a window.", "sub_caption": "cable: A light-colored cable trailing down from the counter area towards the floor.. Scene role: Hanging down from the edge of the desk toward the floor, connecting devices.", "measured_bbox": [0.3307, 0.1951, 0.482, 0.9804], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_cable.png", "raw_ref_image": "references/raw_ref_cable_attempt_01.png", "reference_verify": "references/reference_verify_cable.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_cable.png", "mask": "references/sam_mask_cable.png"}}, {"name": "backpack", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P07_104:object:15", "source_name": "backpack", "source_description": "A blue and black backpack partially visible on the floor in the bottom left corner. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with unwashed dishes, cleaning supplies, a bunch of bananas, and an oven with a colorful towel hanging on its handle.", "sub_caption": "backpack: A blue and black backpack.. Scene role: Resting quietly on the grid-patterned floor near the trailing cable.", "measured_bbox": [0.2708, 0.6305, 0.4662, 0.9996], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_backpack.png", "raw_ref_image": "references/raw_ref_backpack_attempt_01.png", "reference_verify": "references/reference_verify_backpack.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_backpack.png", "mask": "references/sam_mask_backpack.png"}}, {"name": "small_blue_rectangle", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_25245.npy:object:1", "source_name": "small blue rectangle", "source_description": "A tiny, bright blue rectangular glow in the bottom right corner. Source dataset: Ego4D. Scene context: The image is almost completely dark, with a faint blue shape visible towards the center-right.", "sub_caption": "small blue rectangle: A tiny, bright blue rectangular glow.. Scene role: A small secondary display or digital clock sitting on the corner of the desk.", "measured_bbox": [0.7651, 0.0635, 0.8412, 0.1295], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_small_blue_rectangle.png", "raw_ref_image": "references/raw_ref_small_blue_rectangle_attempt_01.png", "reference_verify": "references/reference_verify_small_blue_rectangle.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"cached": true, "output": "references/ref_small_blue_rectangle.png", "mask": "references/sam_mask_small_blue_rectangle.png"}}, {"name": "dark_area", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_192730.npy:object:3", "source_name": "dark area", "source_description": "A deeply shadowed region dominating the left side of the scene. Source dataset: Ego4D. Scene context: A dark room with a bright light reflecting off a wall, possibly near a doorway or closet.", "sub_caption": "dark area: A deeply shadowed region dominating the space.. Scene role: Filling the left side of the room, creating an atmospheric, isolated mood around the typist's setup.", "measured_bbox": [0.003, 0.0, 0.3044, 0.5863], "detection_confidence": 0.8, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_dark_area.png", "raw_ref_image": "references/raw_ref_dark_area_attempt_09.png", "reference_verify": "references/reference_verify_dark_area.json", "reference_verify_passed": true, "reference_attempts": 9, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_09.png", "output": "references/ref_dark_area.png", "mask": "references/sam_mask_dark_area.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [88.0, 52.0, 897.0, 902.0], "mask_score": 3.468953, "mask_area_ratio": 0.45142, "elapsed_seconds": 7.1708}}], "not_emitted": [], "model_ids": {"chat_model": "gcp/google/gemini-3.1-pro-preview", "image_model": "gcp/google/gemini-3-pro-image-preview"}} +{"sample_id": "sample_000008", "target_total": 10, "target_people": 8, "target_objects": 2, "canvas_size": [1280, 720], "canvas_aspect_ratio": "16:9", "main_image": "main_image.png", "bbox_overlay": "bbox_overlay.png", "plan": "plan.json", "detections": "detections.json", "vocab_task": "vocab_task.json", "n_planned": 10, "n_detected": 10, "n_subjects": 10, "subjects": [{"name": "person_by_screens", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_165518.npy:person:0", "source_name": "person", "source_description": "A figure visible in the center, mostly obscured by darkness, wearing dark clothing. Only a faint outline and some lighter parts of clothing or skin are visible. Source dataset: Ego4D. Scene context: A very dark, low-light indoor scene, possibly a room or stage, with a person partially visible in the center and illuminated rectangular panels or screens visible in the background and foreground.", "sub_caption": "person: a shadowy figure in dark clothing, faintly outlined by the glow of rectangular panels. Scene role: standing in the background, monitoring the glowing screens", "measured_bbox": [0.1313, 0.1321, 0.2804, 0.5589], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_by_screens.png", "raw_ref_image": "references/raw_ref_person_by_screens_attempt_02.png", "reference_verify": "references/reference_verify_person_by_screens.json", "reference_verify_passed": true, "reference_attempts": 2, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_person_by_screens_attempt_02.png", "output": "references/ref_person_by_screens.png", "mask": "references/sam_mask_person_by_screens.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [339.0, 55.0, 687.0, 1009.0], "mask_score": 3.450623, "mask_area_ratio": 0.151286, "elapsed_seconds": 10.4397}}, {"name": "reaching_hands", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_282541.npy:person:0", "source_name": "person", "source_description": "Visible hands reaching forward. Source dataset: Ego4D. Scene context: A very dark image with red and blue lights, showing a person's hands.", "sub_caption": "person: a pair of hands reaching forward, catching sharp reflections of red and blue ambient light. Scene role: reaching across the workbench to grab a tool", "measured_bbox": [0.4313, 0.3821, 0.5678, 0.5521], "detection_confidence": 0.9, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_reaching_hands.png", "raw_ref_image": "references/raw_ref_reaching_hands_attempt_01.png", "reference_verify": "references/reference_verify_reaching_hands.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_reaching_hands_attempt_01.png", "output": "references/ref_reaching_hands.png", "mask": "references/sam_mask_reaching_hands.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [388.0, 183.0, 637.0, 867.0], "mask_score": 3.461161, "mask_area_ratio": 0.070283, "elapsed_seconds": 10.0646}}, {"name": "hands_holding_part", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_76427.npy:person:0", "source_name": "person", "source_description": "Visible only by their hands, which are illuminated by red light. The hands are positioned as if holding or manipulating something. Source dataset: Ego4D. Scene context: A very dark image mostly showing black space, with dim reddish lighting catching what appears to be a person's hands holding an object.", "sub_caption": "person: hands bathed in deep red light, carefully gripping a mechanical object. Scene role: holding a component steady on the workbench", "measured_bbox": [0.2769, 0.4337, 0.3938, 0.5372], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_hands_holding_part.png", "raw_ref_image": "references/raw_ref_hands_holding_part_attempt_01.png", "reference_verify": "references/reference_verify_hands_holding_part.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_holding_part_attempt_01.png", "output": "references/ref_hands_holding_part.png", "mask": "references/sam_mask_hands_holding_part.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [345.0, 90.0, 675.0, 1015.0], "mask_score": 3.449955, "mask_area_ratio": 0.13985, "elapsed_seconds": 9.9678}}, {"name": "hands_with_small_object", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_76415.npy:person:0", "source_name": "person", "source_description": "The person's hands are visible, illuminated in red light. The left hand is open, and the right hand is partially obscured, appearing to hold a small object. Source dataset: Ego4D. Scene context: Two hands are visible in a dark environment, possibly holding or manipulating a small object.", "sub_caption": "person: hands illuminated by a red glow, with one hand open and the other pinching a tiny object. Scene role: inspecting a small microchip", "measured_bbox": [0.3697, 0.5225, 0.5251, 0.7126], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_hands_with_small_object.png", "raw_ref_image": "references/raw_ref_hands_with_small_object_attempt_01.png", "reference_verify": "references/reference_verify_hands_with_small_object.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_small_object_attempt_01.png", "output": "references/ref_hands_with_small_object.png", "mask": "references/sam_mask_hands_with_small_object.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [317.0, 60.0, 684.0, 992.0], "mask_score": 3.441398, "mask_area_ratio": 0.128698, "elapsed_seconds": 10.4408}}, {"name": "hands_with_smoldering_tool", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_202366.npy:person:0", "source_name": "person", "source_description": "Visible only by their hands, illuminated by a red light, holding and manipulating a smoking device. Source dataset: Ego4D. Scene context: A close-up view of hands holding a smoking device, illuminated by a red light in a very dark environment.", "sub_caption": "person: hands lit by red light, grasping a smoking device or tool. Scene role: soldering a wire, emitting a trail of smoke", "measured_bbox": [0.7136, 0.5556, 0.8779, 0.7618], "detection_confidence": 0.98, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_hands_with_smoldering_tool.png", "raw_ref_image": "references/raw_ref_hands_with_smoldering_tool_attempt_03.png", "reference_verify": "references/reference_verify_hands_with_smoldering_tool.json", "reference_verify_passed": true, "reference_attempts": 3, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_smoldering_tool_attempt_03.png", "output": "references/ref_hands_with_smoldering_tool.png", "mask": "references/sam_mask_hands_with_smoldering_tool.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [344.0, 43.0, 685.0, 1000.0], "mask_score": 3.455653, "mask_area_ratio": 0.157475, "elapsed_seconds": 10.0065}}, {"name": "helmeted_figure", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_282799.npy:person:0", "source_name": "person", "source_description": "A person whose features are mostly obscured by darkness; they are wearing a helmet and what appears to be a clear face shield. Source dataset: Ego4D. Scene context: A highly obscured and poorly lit scene where a person wearing a helmet and a face shield is somewhat visible.", "sub_caption": "person: a person partially obscured by darkness wearing a protective helmet and a clear face shield reflecting ambient light. Scene role: leaning closely over the workbench to inspect the soldering work", "measured_bbox": [0.717, 0.081, 1.0, 0.862], "detection_confidence": 0.98, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_helmeted_figure.png", "raw_ref_image": "references/raw_ref_helmeted_figure_attempt_02.png", "reference_verify": "references/reference_verify_helmeted_figure.json", "reference_verify_passed": true, "reference_attempts": 2, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_helmeted_figure_attempt_02.png", "output": "references/ref_helmeted_figure.png", "mask": "references/sam_mask_helmeted_figure.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [333.0, 17.0, 696.0, 1018.0], "mask_score": 3.313146, "mask_area_ratio": 0.166679, "elapsed_seconds": 10.3423}}, {"name": "silhouette_with_phone", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_2030.npy:person:0", "source_name": "person", "source_description": "A person visible mainly as a dark silhouette against the red and purple light, holding a rectangular object that appears to be a phone. Source dataset: Ego4D. Scene context: A dark, low-light scene illuminated by red and purple light, with a person holding a phone.", "sub_caption": "person: a dark silhouette starkly contrasted against bright red and purple lighting, holding up a rectangular device. Scene role: standing on the side, illuminating the workspace with a phone flashlight", "measured_bbox": [0.5717, 0.0262, 0.857, 0.6433], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_silhouette_with_phone.png", "raw_ref_image": "references/raw_ref_silhouette_with_phone_attempt_01.png", "reference_verify": "references/reference_verify_silhouette_with_phone.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_silhouette_with_phone_attempt_01.png", "output": "references/ref_silhouette_with_phone.png", "mask": "references/sam_mask_silhouette_with_phone.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [317.0, 16.0, 694.0, 1018.0], "mask_score": 3.400937, "mask_area_ratio": 0.174557, "elapsed_seconds": 10.3827}}, {"name": "hands_passing_object", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_76362.npy:person:0", "source_name": "person", "source_description": "Visible by their hands, which are illuminated by a red light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in a very dark environment with some red and blue light reflections.", "sub_caption": "person: hands bathed in dim red and blue reflections, tightly holding an unidentifiable dark object. Scene role: passing a heavy piece of hardware to another worker", "measured_bbox": [0.6035, 0.4687, 0.7144, 0.5946], "detection_confidence": "high", "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_hands_passing_object.png", "raw_ref_image": "references/raw_ref_hands_passing_object_attempt_02.png", "reference_verify": "references/reference_verify_hands_passing_object.json", "reference_verify_passed": true, "reference_attempts": 2, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_passing_object_attempt_02.png", "output": "references/ref_hands_passing_object.png", "mask": "references/sam_mask_hands_passing_object.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [338.0, 17.0, 690.0, 1014.0], "mask_score": 3.418609, "mask_area_ratio": 0.166451, "elapsed_seconds": 11.3527}}, {"name": "blue_light_module", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_25200.npy:object:0", "source_name": "blue light", "source_description": "A small, bright blue rectangular light glowing faintly in the lower right area of the dark scene. Source dataset: Ego4D. Scene context: The image is almost completely dark, showing only a small, faint blue rectangular light source near the bottom right.", "sub_caption": "blue light: a small, intensely bright blue rectangular light glowing through the shadows. Scene role: sitting on the edge of the workbench, casting a blue beam across the tools", "measured_bbox": [0.2793, 0.8032, 0.3903, 0.9054], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_blue_light_module.png", "raw_ref_image": "references/raw_ref_blue_light_module_attempt_01.png", "reference_verify": "references/reference_verify_blue_light_module.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_blue_light_module_attempt_01.png", "output": "references/ref_blue_light_module.png", "mask": "references/sam_mask_blue_light_module.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [4.0, 250.0, 978.0, 796.0], "mask_score": 3.46793, "mask_area_ratio": 0.374003, "elapsed_seconds": 10.496}}, {"name": "red_device", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", "source_name": "electronic device", "source_description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device.", "sub_caption": "electronic device: a faintly glowing red rectangular electronic device with a textured surface. Scene role: lying flat on the workbench next to the busy hands, functioning as a diagnostic remote", "measured_bbox": [0.7228, 0.7939, 0.902, 0.8912], "detection_confidence": 0.98, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_red_device.png", "raw_ref_image": "references/raw_ref_red_device_attempt_01.png", "reference_verify": "references/reference_verify_red_device.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_red_device_attempt_01.png", "output": "references/ref_red_device.png", "mask": "references/sam_mask_red_device.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [76.0, 282.0, 989.0, 762.0], "mask_score": 3.430848, "mask_area_ratio": 0.198863, "elapsed_seconds": 11.2516}}], "not_emitted": [], "model_ids": {"chat_model": "gcp/google/gemini-3.1-pro-preview", "image_model": "gcp/google/gemini-3-pro-image-preview"}} +{"sample_id": "sample_000009", "target_total": 5, "target_people": 1, "target_objects": 4, "canvas_size": [1152, 864], "canvas_aspect_ratio": "4:3", "main_image": "main_image.png", "bbox_overlay": "bbox_overlay.png", "plan": "plan.json", "detections": "detections.json", "vocab_task": "vocab_task.json", "n_planned": 5, "n_detected": 5, "n_subjects": 5, "subjects": [{"name": "person_washing_sink", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_120803.npy:person:0", "source_name": "person", "source_description": "A person holding an object, only their hands and parts of their dark clothing are visible. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in low light conditions.", "sub_caption": "person: A person wearing dark, long-sleeved clothing, with only their hands and forearms visible as they reach into the frame.. Scene role: Actively gripping the blue cleaner bottle over the kitchen sink.", "measured_bbox": [0.0921, 0.3527, 1.0, 1.0], "detection_confidence": 950, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_washing_sink.png", "raw_ref_image": "references/raw_ref_person_washing_sink_attempt_01.png", "reference_verify": "references/reference_verify_person_washing_sink.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_person_washing_sink_attempt_01.png", "output": "references/ref_person_washing_sink.png", "mask": "references/sam_mask_person_washing_sink.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [350.0, 14.0, 674.0, 1023.0], "mask_score": 3.431432, "mask_area_ratio": 0.156425, "elapsed_seconds": 10.3393}}, {"name": "blue_cleaner_bottle", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P30_102:object:6", "source_name": "cleaner bottle", "source_description": "Blue plastic bottle with a label, possibly a cleaning product, located behind the sink. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink, dish rack, toaster, cutting board, and various utensils and containers.", "sub_caption": "cleaner bottle: A bright blue, unlabelled plastic spray bottle with a nozzle top, typical of liquid cleaning solutions.. Scene role: Held in the person's hands, positioned just above the sink basin.", "measured_bbox": [0.4107, 0.2797, 0.632, 0.8174], "detection_confidence": 0.99, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_blue_cleaner_bottle.png", "raw_ref_image": "references/raw_ref_blue_cleaner_bottle_attempt_01.png", "reference_verify": "references/reference_verify_blue_cleaner_bottle.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_blue_cleaner_bottle_attempt_01.png", "output": "references/ref_blue_cleaner_bottle.png", "mask": "references/sam_mask_blue_cleaner_bottle.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [327.0, 55.0, 696.0, 968.0], "mask_score": 3.452606, "mask_area_ratio": 0.185988, "elapsed_seconds": 10.6007}}, {"name": "red_sink_mat", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P27_104:object:6", "source_name": "red mat", "source_description": "A textured red mat lining the bottom of the right sink basin. Source dataset: EPIC-Kitchens. Scene context: A kitchen sink area with dirty dishes in both basins, a hand holding a smartphone recording the scene, and various items on the counter.", "sub_caption": "red mat: A vibrant red, textured rubber mat featuring a grid or perforated pattern.. Scene role: Placed flat against the bottom of the stainless steel sink basin, visible beneath the hands.", "measured_bbox": [0.3049, 0.5104, 0.8328, 0.9302], "detection_confidence": 0.98, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_red_sink_mat.png", "raw_ref_image": "references/raw_ref_red_sink_mat_attempt_01.png", "reference_verify": "references/reference_verify_red_sink_mat.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_red_sink_mat_attempt_01.png", "output": "references/ref_red_sink_mat.png", "mask": "references/sam_mask_red_sink_mat.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [21.0, 148.0, 1001.0, 881.0], "mask_score": 3.479561, "mask_area_ratio": 0.582802, "elapsed_seconds": 10.3866}}, {"name": "silver_faucet", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P37_103:object:3", "source_name": "faucet", "source_description": "Silver metal kitchen faucet attached to the sink. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter and sink with bowls, raw chicken, and cleaning supplies.", "sub_caption": "faucet: A polished, curved silver metal kitchen faucet with a standard spout.. Scene role: Rising from the back of the sink counter, partially occluded by the person's hands and bottle.", "measured_bbox": [0.5344, 0.0136, 0.718, 0.3772], "detection_confidence": 0.99, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_silver_faucet.png", "raw_ref_image": "references/raw_ref_silver_faucet_attempt_01.png", "reference_verify": "references/reference_verify_silver_faucet.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_silver_faucet_attempt_01.png", "output": "references/ref_silver_faucet.png", "mask": "references/sam_mask_silver_faucet.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [115.0, 62.0, 895.0, 985.0], "mask_score": 3.425959, "mask_area_ratio": 0.163316, "elapsed_seconds": 10.2359}}, {"name": "yellow_dish_gloves", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P02_128:object:7", "source_name": "yellow dish gloves", "source_description": "A pair of yellow rubber gloves lying flat on the countertop near the sink. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen space featuring a washing machine, sink area, and various cleaning and kitchen supplies on countertops and the floor.", "sub_caption": "yellow dish gloves: A pair of thick, bright yellow rubber dishwashing gloves, slightly crumpled and glossy.. Scene role: Laying flat on the countertop directly next to the sink rim, ready to be worn.", "measured_bbox": [0.0, 0.3471, 0.2191, 0.8264], "detection_confidence": 1.0, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_yellow_dish_gloves.png", "raw_ref_image": "references/raw_ref_yellow_dish_gloves_attempt_01.png", "reference_verify": "references/reference_verify_yellow_dish_gloves.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_yellow_dish_gloves_attempt_01.png", "output": "references/ref_yellow_dish_gloves.png", "mask": "references/sam_mask_yellow_dish_gloves.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [138.0, 66.0, 850.0, 952.0], "mask_score": 3.462321, "mask_area_ratio": 0.269736, "elapsed_seconds": 10.2407}}], "not_emitted": [], "model_ids": {"chat_model": "gcp/google/gemini-3.1-pro-preview", "image_model": "gcp/google/gemini-3-pro-image-preview"}} +{"sample_id": "sample_000010", "target_total": 9, "target_people": 8, "target_objects": 1, "canvas_size": [1248, 832], "canvas_aspect_ratio": "3:2", "main_image": "main_image.png", "bbox_overlay": "bbox_overlay.png", "plan": "plan.json", "detections": "detections.json", "vocab_task": "vocab_task.json", "n_planned": 9, "n_detected": 9, "n_subjects": 9, "subjects": [{"name": "person_reading_red_light", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_236885.npy:person:0", "source_name": "person", "source_description": "A person is reading a book, partially visible in the red light, with only part of their arm and hand shown holding the book. Source dataset: Ego4D. Scene context: A person is reading a book illuminated by a red light in an otherwise dark room.", "sub_caption": "person: A person partially visible in the shadows, holding and reading a book that is strongly illuminated by a red light.. Scene role: Sitting on the left side of the room, focused intently on reading their book in the red glow.", "measured_bbox": [0.0127, 0.1501, 0.4525, 0.6235], "detection_confidence": 0.9, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_reading_red_light.png", "raw_ref_image": "references/raw_ref_person_reading_red_light_attempt_01.png", "reference_verify": "references/reference_verify_person_reading_red_light.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_reading_red_light_attempt_01.png", "output": "references/ref_person_reading_red_light.png", "mask": "references/sam_mask_person_reading_red_light.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [339.0, 16.0, 680.0, 1023.0], "mask_score": 3.324489, "mask_area_ratio": 0.157365, "elapsed_seconds": 7.1471}}, {"name": "person_lying_reading_blue_light", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_97416.npy:person:0", "source_name": "person", "source_description": "A person lying on a bed. Source dataset: Ego4D. Scene context: An indoor scene featuring a person lying on a bed reading a book with a small blue light.", "sub_caption": "person: A person lying down comfortably, visible in the dark while reading a book illuminated by a small, crisp blue light.. Scene role: Lying on a bed in the back right corner, quietly reading separate from the main group.", "measured_bbox": [0.6944, 0.2802, 0.9406, 0.4237], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_lying_reading_blue_light.png", "raw_ref_image": "references/raw_ref_person_lying_reading_blue_light_attempt_02.png", "reference_verify": "references/reference_verify_person_lying_reading_blue_light.json", "reference_verify_passed": true, "reference_attempts": 2, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_lying_reading_blue_light_attempt_02.png", "output": "references/ref_person_lying_reading_blue_light.png", "mask": "references/sam_mask_person_lying_reading_blue_light.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [360.0, 75.0, 665.0, 980.0], "mask_score": 3.501601, "mask_area_ratio": 0.137436, "elapsed_seconds": 7.1743}}, {"name": "person_holding_blue_flashlight", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_42340.npy:person:0", "source_name": "person", "source_description": "A person is visible in the lower right, holding a blue flashlight. Only their hand and part of their arm, covered by a dark sleeve, are visible. Source dataset: Ego4D. Scene context: A person is holding a lit blue flashlight in a dark environment.", "sub_caption": "person: An arm covered by a dark sleeve, with the hand firmly holding a bright blue flashlight that cuts through the darkness.. Scene role: Positioned in the lower foreground, pointing the blue flashlight into the room to provide illumination.", "measured_bbox": [0.222, 0.574, 0.493, 0.981], "detection_confidence": 0.9, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_holding_blue_flashlight.png", "raw_ref_image": "references/raw_ref_person_holding_blue_flashlight_attempt_05.png", "reference_verify": "references/reference_verify_person_holding_blue_flashlight.json", "reference_verify_passed": true, "reference_attempts": 5, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_holding_blue_flashlight_attempt_05.png", "output": "references/ref_person_holding_blue_flashlight.png", "mask": "references/sam_mask_person_holding_blue_flashlight.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [331.0, 17.0, 704.0, 1017.0], "mask_score": 3.354952, "mask_area_ratio": 0.150028, "elapsed_seconds": 7.2854}}, {"name": "person_standing_background", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", "source_name": "person", "source_description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall.", "sub_caption": "person: A person seen from the torso down, standing in the dim room and holding a dark round object near a dimly lit blue wall.. Scene role: Standing quietly in the background, observing the room while holding a round object.", "measured_bbox": [0.4462, 0.0267, 0.5613, 0.5921], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_standing_background.png", "raw_ref_image": "references/raw_ref_person_standing_background_attempt_01.png", "reference_verify": "references/reference_verify_person_standing_background.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_standing_background_attempt_01.png", "output": "references/ref_person_standing_background.png", "mask": "references/sam_mask_person_standing_background.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [337.0, 25.0, 678.0, 1014.0], "mask_score": 3.31662, "mask_area_ratio": 0.134835, "elapsed_seconds": 7.1397}}, {"name": "person_face_in_red_light", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_76449.npy:person:0", "source_name": "person", "source_description": "Partially visible due to very low lighting; red light illuminates skin on what looks like hands or arms, and possibly part of the face or shoulder. Source dataset: Ego4D. Scene context: A very dark scene with sparse red lighting illuminating what appears to be a person's hands and part of their face or shoulder in the center, and a glowing rectangular object in the upper right corner.", "sub_caption": "person: A person emerging from the darkness, with deep red light catching the skin on their hands, arms, and part of their face.. Scene role: Sitting near the center of the gathering, looking toward the others while bathed in red ambient light.", "measured_bbox": [0.5873, 0.345, 0.6506, 0.4705], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_face_in_red_light.png", "raw_ref_image": "references/raw_ref_person_face_in_red_light_attempt_01.png", "reference_verify": "references/reference_verify_person_face_in_red_light.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_face_in_red_light_attempt_01.png", "output": "references/ref_person_face_in_red_light.png", "mask": "references/sam_mask_person_face_in_red_light.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [357.0, 50.0, 662.0, 997.0], "mask_score": 3.468133, "mask_area_ratio": 0.138924, "elapsed_seconds": 7.09}}, {"name": "hands_holding_small_object", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_259526.npy:person:0", "source_name": "hands", "source_description": "Visible hands, illuminated in reddish light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of hands holding something, with abstract colored shapes or fabrics in the dark background.", "sub_caption": "hands: A pair of hands illuminated in a reddish light, holding a small object against the dark environment.. Scene role: Sitting within the group, holding a small item in the pool of red light.", "measured_bbox": [0.5539, 0.5673, 0.6564, 0.6516], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_hands_holding_small_object.png", "raw_ref_image": "references/raw_ref_hands_holding_small_object_attempt_01.png", "reference_verify": "references/reference_verify_hands_holding_small_object.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_small_object_attempt_01.png", "output": "references/ref_hands_holding_small_object.png", "mask": "references/sam_mask_hands_holding_small_object.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [347.0, 66.0, 678.0, 1002.0], "mask_score": 3.446404, "mask_area_ratio": 0.148472, "elapsed_seconds": 7.2113}}, {"name": "hands_holding_triangular_object", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_76363.npy:person:0", "source_name": "person", "source_description": "Only hands are visible, illuminated by a red light source, positioned towards the top right. Source dataset: Ego4D. Scene context: A very dark image showing hands illuminated by red light and a small triangular object with blue and white patterns.", "sub_caption": "person: Hands clearly visible under a red light source, carefully holding a small triangular object with patterns.. Scene role: Showing the patterned triangular object to the group nearby.", "measured_bbox": [0.6869, 0.6745, 0.898, 0.9021], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_hands_holding_triangular_object.png", "raw_ref_image": "references/raw_ref_hands_holding_triangular_object_attempt_05.png", "reference_verify": "references/reference_verify_hands_holding_triangular_object.json", "reference_verify_passed": true, "reference_attempts": 5, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_triangular_object_attempt_05.png", "output": "references/ref_hands_holding_triangular_object.png", "mask": "references/sam_mask_hands_holding_triangular_object.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [371.0, 176.0, 647.0, 904.0], "mask_score": 3.472167, "mask_area_ratio": 0.075788, "elapsed_seconds": 7.6612}}, {"name": "hands_gesturing", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_282746.npy:person:0", "source_name": "person", "source_description": "The person's hands are visible, lit by red light, holding something or gesturing. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark room illuminated by stage lights.", "sub_caption": "person: A person's hands catching the red stage-like lighting, caught mid-gesture.. Scene role: Actively gesturing and conversing with the central group.", "measured_bbox": [0.233, 0.5315, 0.3583, 0.6782], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_hands_gesturing.png", "raw_ref_image": "references/raw_ref_hands_gesturing_attempt_01.png", "reference_verify": "references/reference_verify_hands_gesturing.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_gesturing_attempt_01.png", "output": "references/ref_hands_gesturing.png", "mask": "references/sam_mask_hands_gesturing.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [350.0, 64.0, 677.0, 979.0], "mask_score": 3.460945, "mask_area_ratio": 0.132824, "elapsed_seconds": 7.2309}}, {"name": "plastic_water_bottle", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P26_124:object:6", "source_name": "water bottle", "source_description": "A clear plastic bottle, likely containing water, with a blue and white label, standing on the counter to the right. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a stovetop with a pan cooking food, accompanied by various kitchen items on the adjacent counter spaces.", "sub_caption": "water bottle: A clear plastic bottle containing water, catching the colorful reflections of the scattered red and blue lights.. Scene role: Resting upright on the floor in the center of the group, reflecting the dramatic lighting.", "measured_bbox": [0.5155, 0.7075, 0.5653, 0.9622], "detection_confidence": 0.98, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_plastic_water_bottle.png", "raw_ref_image": "references/raw_ref_plastic_water_bottle_attempt_03.png", "reference_verify": "references/reference_verify_plastic_water_bottle.json", "reference_verify_passed": true, "reference_attempts": 3, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_plastic_water_bottle_attempt_03.png", "output": "references/ref_plastic_water_bottle.png", "mask": "references/sam_mask_plastic_water_bottle.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [356.0, 72.0, 666.0, 982.0], "mask_score": 3.471713, "mask_area_ratio": 0.17085, "elapsed_seconds": 9.3022}}], "not_emitted": [], "model_ids": {"chat_model": "gcp/google/gemini-3.1-pro-preview", "image_model": "gcp/google/gemini-3-pro-image-preview"}} +{"sample_id": "sample_000011", "target_total": 10, "target_people": 1, "target_objects": 9, "canvas_size": [1248, 832], "canvas_aspect_ratio": "3:2", "main_image": "main_image.png", "bbox_overlay": "bbox_overlay.png", "plan": "plan.json", "detections": "detections.json", "vocab_task": "vocab_task.json", "n_planned": 10, "n_detected": 10, "n_subjects": 10, "subjects": [{"name": "person_hands", "is_person": true, "subject_type": "person", "source_set": "people_set", "source_image_id": "Ego4D:ego4d_video/EGO_76422.npy:person:0", "source_name": "person", "source_description": "The person's hands are visible, illuminated by a red light, and they appear to be holding a small object. Source dataset: Ego4D. Scene context: A dark scene with a person's hands holding what appears to be a small device or tool illuminated by red light.", "sub_caption": "person: The person's hands are visible, illuminated by a strong red light, holding a small object or tool.. Scene role: Working intently, holding the metal tool over the granite countertop.", "measured_bbox": [0.1787, 0.2332, 0.4617, 0.5792], "detection_confidence": 0.95, "ref_style": "white_bg_full_body_front", "ref_image": "references/ref_person_hands.png", "raw_ref_image": "references/raw_ref_person_hands_attempt_01.png", "reference_verify": "references/reference_verify_person_hands.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_person_hands_attempt_01.png", "output": "references/ref_person_hands.png", "mask": "references/sam_mask_person_hands.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [338.0, 36.0, 704.0, 1013.0], "mask_score": 3.415329, "mask_area_ratio": 0.148867, "elapsed_seconds": 7.2976}}, {"name": "textured_mat", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_39154.npy:object:0", "source_name": "textured surface", "source_description": "A dark surface covered with a repeating pattern of small, raised bumps or dots, illuminated by a blue light. Source dataset: Ego4D. Scene context: A close-up view of a textured surface illuminated with blue light in a dark environment.", "sub_caption": "textured surface: A dark surface covered with a repeating pattern of small, raised bumps or dots, catching faint blue light reflections.. Scene role: Lying flat on the granite countertop under the person's hands.", "measured_bbox": [0.1138, 0.5866, 0.5947, 0.7728], "detection_confidence": 0.99, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_textured_mat.png", "raw_ref_image": "references/raw_ref_textured_mat_attempt_01.png", "reference_verify": "references/reference_verify_textured_mat.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_textured_mat_attempt_01.png", "output": "references/ref_textured_mat.png", "mask": "references/sam_mask_textured_mat.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [29.0, 292.0, 994.0, 732.0], "mask_score": 3.388931, "mask_area_ratio": 0.280238, "elapsed_seconds": 7.3108}}, {"name": "wall_shelf", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_309243.npy:object:1", "source_name": "shelf", "source_description": "A dark, multi-tiered shelf attached to the wall on the right side, holding various unidentifiable small items. Source dataset: Ego4D. Scene context: A dimly lit room illuminated entirely by strong red light, where a person is sitting and holding a child in their lap.", "sub_caption": "shelf: A dark, multi-tiered shelf attached to the wall, holding various unidentifiable small items.. Scene role: Attached to the wall in the shadowy background above the counter.", "measured_bbox": [0.5206, 0.0681, 0.6781, 0.3789], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_wall_shelf.png", "raw_ref_image": "references/raw_ref_wall_shelf_attempt_01.png", "reference_verify": "references/reference_verify_wall_shelf.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wall_shelf_attempt_01.png", "output": "references/ref_wall_shelf.png", "mask": "references/sam_mask_wall_shelf.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [176.0, 42.0, 861.0, 940.0], "mask_score": 3.354082, "mask_area_ratio": 0.233056, "elapsed_seconds": 7.1907}}, {"name": "striped_towel", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P03_118:object:22", "source_name": "towel", "source_description": "A black and white striped towel hanging from the oven door handle. Source dataset: EPIC-Kitchens. Scene context: A narrow, slightly messy kitchen with dark grey countertops, white cabinets, and wooden flooring.", "sub_caption": "towel: A black and white striped towel.. Scene role: Hanging down from a lower cabinet handle just below the edge of the countertop.", "measured_bbox": [0.6082, 0.7266, 0.7242, 0.9973], "detection_confidence": 0.98, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_striped_towel.png", "raw_ref_image": "references/raw_ref_striped_towel_attempt_01.png", "reference_verify": "references/reference_verify_striped_towel.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_striped_towel_attempt_01.png", "output": "references/ref_striped_towel.png", "mask": "references/sam_mask_striped_towel.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [125.0, 53.0, 897.0, 971.0], "mask_score": 3.50075, "mask_area_ratio": 0.517391, "elapsed_seconds": 7.4653}}, {"name": "glass_bottle", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P30_113:object:5", "source_name": "bottle", "source_description": "A clear glass bottle with amber liquid standing on the counter near the cutting board. Source dataset: EPIC-Kitchens. Scene context: A person's hand is visible in the foreground of a kitchen with light wood cabinets, dark countertops, a white washing machine, and a white refrigerator.", "sub_caption": "bottle: A clear glass bottle filled with amber liquid.. Scene role: Standing on the granite countertop to the side, catching dim reflections.", "measured_bbox": [0.2049, 0.4778, 0.2764, 0.6209], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_glass_bottle.png", "raw_ref_image": "references/raw_ref_glass_bottle_attempt_01.png", "reference_verify": "references/reference_verify_glass_bottle.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_glass_bottle_attempt_01.png", "output": "references/ref_glass_bottle.png", "mask": "references/sam_mask_glass_bottle.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [198.0, 0.0, 820.0, 1008.0], "mask_score": 3.251198, "mask_area_ratio": 0.458004, "elapsed_seconds": 7.2584}}, {"name": "dirty_plate", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P26_102:object:5", "source_name": "plate", "source_description": "A partially visible plate or flat dish on the table, seemingly containing food or remnants. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a table covered in a blue and green plaid tablecloth, holding various items like bottles and cans, and a dark chair nearby.", "sub_caption": "plate: A partially visible plate containing food remnants.. Scene role: Sitting on the counter near the bottle in the dim light.", "measured_bbox": [0.003, 0.5981, 0.2, 0.6793], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_dirty_plate.png", "raw_ref_image": "references/raw_ref_dirty_plate_attempt_01.png", "reference_verify": "references/reference_verify_dirty_plate.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_dirty_plate_attempt_01.png", "output": "references/ref_dirty_plate.png", "mask": "references/sam_mask_dirty_plate.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [33.0, 334.0, 963.0, 685.0], "mask_score": 3.317592, "mask_area_ratio": 0.170906, "elapsed_seconds": 7.0879}}, {"name": "metal_tool", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "Ego4D:ego4d_video/EGO_15996.npy:object:0", "source_name": "dark object", "source_description": "A dark, silhouetted object with a thin, elongated structure, appearing to be a tool like pliers or scissors, visible against a dimly lit reddish background. Source dataset: Ego4D. Scene context: A close-up view of what appears to be a tool or mechanical part in shadows, possibly a pair of pliers.", "sub_caption": "dark object: A dark, silhouetted tool with a thin, elongated structure resembling pliers.. Scene role: Held firmly by the person's hands under the red light.", "measured_bbox": [0.203, 0.348, 0.476, 0.468], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_metal_tool.png", "raw_ref_image": "references/raw_ref_metal_tool_attempt_01.png", "reference_verify": "references/reference_verify_metal_tool.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_metal_tool_attempt_01.png", "output": "references/ref_metal_tool.png", "mask": "references/sam_mask_metal_tool.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [51.0, 66.0, 993.0, 937.0], "mask_score": 0.918062, "mask_area_ratio": 0.928193, "elapsed_seconds": 7.1989}}, {"name": "box_grater", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P34_111:object:34", "source_name": "grater", "source_description": "A metal box grater partially visible on the far left counter. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen sink area with a window overlooking a patio, featuring various plants, cleaning supplies, and kitchen items.", "sub_caption": "grater: A metal box grater.. Scene role: Resting on the counter towards the background left.", "measured_bbox": [0.4842, 0.4749, 0.5853, 0.5496], "detection_confidence": 0.99, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_box_grater.png", "raw_ref_image": "references/raw_ref_box_grater_attempt_01.png", "reference_verify": "references/reference_verify_box_grater.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_box_grater_attempt_01.png", "output": "references/ref_box_grater.png", "mask": "references/sam_mask_box_grater.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [236.0, 16.0, 787.0, 1015.0], "mask_score": 3.453517, "mask_area_ratio": 0.346949, "elapsed_seconds": 7.1571}}, {"name": "wooden_cabinets", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P28_106:object:8", "source_name": "kitchen cabinets (right)", "source_description": "A row of light brown wooden cabinets with metal handles along the right side. Source dataset: EPIC-Kitchens. Scene context: An overhead view of a kitchen floor with cabinets, a sink, and a dishwasher on the sides.", "sub_caption": "kitchen cabinets (right): A row of light brown wooden cabinets with metal handles.. Scene role: Lining the right side of the kitchen in the background shadows.", "measured_bbox": [0.7319, 0.5034, 0.9945, 0.9951], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_wooden_cabinets.png", "raw_ref_image": "references/raw_ref_wooden_cabinets_attempt_01.png", "reference_verify": "references/reference_verify_wooden_cabinets.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wooden_cabinets_attempt_01.png", "output": "references/ref_wooden_cabinets.png", "mask": "references/sam_mask_wooden_cabinets.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [36.0, 253.0, 986.0, 809.0], "mask_score": 3.454364, "mask_area_ratio": 0.365833, "elapsed_seconds": 7.226}}, {"name": "granite_counter", "is_person": false, "subject_type": "object", "source_set": "obj_set", "source_image_id": "EPIC-Kitchens:P04_108:object:19", "source_name": "granite countertop", "source_description": "A grey, white, and black speckled stone countertop surface visible in the foreground and near the sink/stove. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen scene with items scattered on the counter, floor, and a large blue delivery bag visible on the right.", "sub_caption": "granite countertop: A grey, white, and black speckled stone countertop.. Scene role: The main horizontal surface spanning the lower half of the frame, holding the scattered items.", "measured_bbox": [0.003, 0.5275, 0.6738, 0.883], "detection_confidence": 0.95, "ref_style": "white_bg_encyclopedia_photo", "ref_image": "references/ref_granite_counter.png", "raw_ref_image": "references/raw_ref_granite_counter_attempt_01.png", "reference_verify": "references/reference_verify_granite_counter.json", "reference_verify_passed": true, "reference_attempts": 1, "sam_white_bg": {"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_granite_counter_attempt_01.png", "output": "references/ref_granite_counter.png", "mask": "references/sam_mask_granite_counter.png", "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", "sam_model_type": "vit_b", "sam_device": "auto", "sam_working_size": [640, 640], "sam_max_side": 640, "sam_downscale": 0.625, "prompt_box_xyxy": [0.0, 71.0, 1023.0, 940.0], "mask_score": 3.480751, "mask_area_ratio": 0.58655, "elapsed_seconds": 7.1983}}], "not_emitted": [], "model_ids": {"chat_model": "gcp/google/gemini-3.1-pro-preview", "image_model": "gcp/google/gemini-3-pro-image-preview"}} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000001.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000001.json new file mode 100644 index 0000000000000000000000000000000000000000..6a016e7cbfb2a7ba266347923a204bdc2fb285eb --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000001.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000001", + "plan_path": "sample_000001/plan.json", + "task_path": "sample_000001/vocab_task.json", + "main_image": "sample_000001/main_image.png", + "detections": "sample_000001/detections.json", + "n_detected": 3, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000001", + "pool": "detection_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000003.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000003.json new file mode 100644 index 0000000000000000000000000000000000000000..b25aedb689727ef7284d2ca905e87ba2906b74cf --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000003.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000003", + "plan_path": "sample_000003/plan.json", + "task_path": "sample_000003/vocab_task.json", + "main_image": "sample_000003/main_image.png", + "detections": "sample_000003/detections.json", + "n_detected": 3, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000003", + "pool": "detection_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000004.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000004.json new file mode 100644 index 0000000000000000000000000000000000000000..f40f336f128af5e7e15f22921c27ec7aa9c61d24 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000004.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000004", + "plan_path": "sample_000004/plan.json", + "task_path": "sample_000004/vocab_task.json", + "main_image": "sample_000004/main_image.png", + "detections": "sample_000004/detections.json", + "n_detected": 5, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000004", + "pool": "detection_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000005.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000005.json new file mode 100644 index 0000000000000000000000000000000000000000..03ed0f70c943bc5440cb95b8a1b1f89de8d110f6 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000005.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000005", + "plan_path": "sample_000005/plan.json", + "task_path": "sample_000005/vocab_task.json", + "main_image": "sample_000005/main_image.png", + "detections": "sample_000005/detections.json", + "n_detected": 6, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000005", + "pool": "detection_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000006.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000006.json new file mode 100644 index 0000000000000000000000000000000000000000..3ed9eef0b60cc3b94c5aa658234c2f3b5d3f343d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000006.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000006", + "plan_path": "sample_000006/plan.json", + "task_path": "sample_000006/vocab_task.json", + "main_image": "sample_000006/main_image.png", + "detections": "sample_000006/detections.json", + "n_detected": 8, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000006", + "pool": "detection_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000007.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000007.json new file mode 100644 index 0000000000000000000000000000000000000000..f00eefe3401327cafa2e0130658c57e988781ed4 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000007.json @@ -0,0 +1,32 @@ +{ + "sample_id": "sample_000007", + "plan_path": "sample_000007/plan.json", + "task_path": "sample_000007/vocab_task.json", + "main_image": "sample_000007/main_image.png", + "detections": "sample_000007/detections.json", + "n_detected": 14, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000007", + "pool": "detection_pool", + "retry_count": 3, + "errors": [ + { + "time": 1782260885.2187717, + "error": "RuntimeError: reference generation or verification failed for dark_area: RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark area' is entirely absent; Image shows a cardboard box instead of the requested subject", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 866, in generate_references\n references.append(helpers.diversify_subject(get_client(), sdir, main_image, subject, detection))\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/v8_helpers.py\", line 693, in diversify_subject\n raise RuntimeError(\nRuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark area' is entirely absent; Image shows a cardboard box instead of the requested subject\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1020, in worker_loop\n handler(manifest)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1113, in handler\n references, reference_errors = generate_references(sample_id, plan, detections)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 871, in generate_references\n raise RuntimeError(f\"reference generation or verification failed for {name}: {errors[name]}\") from exc\nRuntimeError: reference generation or verification failed for dark_area: RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark area' is entirely absent; Image shows a cardboard box instead of the requested subject\n" + }, + { + "time": 1782261302.7464893, + "error": "RuntimeError: reference generation or verification failed for dark_area: RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject is absent; image shows a cardboard box instead of a dark shadowed area.", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 866, in generate_references\n references.append(helpers.diversify_subject(get_client(), sdir, main_image, subject, detection))\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/v8_helpers.py\", line 693, in diversify_subject\n raise RuntimeError(\nRuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject is absent; image shows a cardboard box instead of a dark shadowed area.\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1020, in worker_loop\n handler(manifest)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1113, in handler\n references, reference_errors = generate_references(sample_id, plan, detections)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 871, in generate_references\n raise RuntimeError(f\"reference generation or verification failed for {name}: {errors[name]}\") from exc\nRuntimeError: reference generation or verification failed for dark_area: RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject is absent; image shows a cardboard box instead of a dark shadowed area.\n" + }, + { + "time": 1782261717.330226, + "error": "RuntimeError: reference generation or verification failed for dark_area: RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark_area' is absent.; Image shows a cardboard box instead of the requested subject.", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 866, in generate_references\n references.append(helpers.diversify_subject(get_client(), sdir, main_image, subject, detection))\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/v8_helpers.py\", line 693, in diversify_subject\n raise RuntimeError(\nRuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark_area' is absent.; Image shows a cardboard box instead of the requested subject.\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1020, in worker_loop\n handler(manifest)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1113, in handler\n references, reference_errors = generate_references(sample_id, plan, detections)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 871, in generate_references\n raise RuntimeError(f\"reference generation or verification failed for {name}: {errors[name]}\") from exc\nRuntimeError: reference generation or verification failed for dark_area: RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark_area' is absent.; Image shows a cardboard box instead of the requested subject.\n" + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000008.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000008.json new file mode 100644 index 0000000000000000000000000000000000000000..9b16fa3a8a181c2c4b2de45e0e083a4d038bb357 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000008.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000008", + "plan_path": "sample_000008/plan.json", + "task_path": "sample_000008/vocab_task.json", + "main_image": "sample_000008/main_image.png", + "detections": "sample_000008/detections.json", + "n_detected": 10, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000008", + "pool": "detection_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000009.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000009.json new file mode 100644 index 0000000000000000000000000000000000000000..56298ed97dc892b6d0e7828308a3b4b27da78110 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000009.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000009", + "plan_path": "sample_000009/plan.json", + "task_path": "sample_000009/vocab_task.json", + "main_image": "sample_000009/main_image.png", + "detections": "sample_000009/detections.json", + "n_detected": 5, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000009", + "pool": "detection_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000010.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000010.json new file mode 100644 index 0000000000000000000000000000000000000000..ed77a8c9807dd70bc03fecc518643c278d9b7b8c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000010.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000010", + "plan_path": "sample_000010/plan.json", + "task_path": "sample_000010/vocab_task.json", + "main_image": "sample_000010/main_image.png", + "detections": "sample_000010/detections.json", + "n_detected": 9, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000010", + "pool": "detection_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000011.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000011.json new file mode 100644 index 0000000000000000000000000000000000000000..491bc1184efcc808d55f6c2bd1dc16f966999749 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/detection_pool/done/sample_000011.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000011", + "plan_path": "sample_000011/plan.json", + "task_path": "sample_000011/vocab_task.json", + "main_image": "sample_000011/main_image.png", + "detections": "sample_000011/detections.json", + "n_detected": 10, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000011", + "pool": "detection_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000001.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000001.json new file mode 100644 index 0000000000000000000000000000000000000000..64fd57efb906647af4a3e085e9a20b02e99016fa --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000001.json @@ -0,0 +1,4 @@ +{ + "sample_id": "sample_000001", + "row": "sample_000001/row.json" +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000003.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000003.json new file mode 100644 index 0000000000000000000000000000000000000000..ff0968713613b2c92d71ff03fa795c586634d671 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000003.json @@ -0,0 +1,4 @@ +{ + "sample_id": "sample_000003", + "row": "sample_000003/row.json" +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000004.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000004.json new file mode 100644 index 0000000000000000000000000000000000000000..faae18874e83856e048ef243f5e9824b4e70a448 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000004.json @@ -0,0 +1,4 @@ +{ + "sample_id": "sample_000004", + "row": "sample_000004/row.json" +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000005.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000005.json new file mode 100644 index 0000000000000000000000000000000000000000..f361f5514fe3ae9586e376112f859ad1c648e2f2 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000005.json @@ -0,0 +1,4 @@ +{ + "sample_id": "sample_000005", + "row": "sample_000005/row.json" +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000006.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000006.json new file mode 100644 index 0000000000000000000000000000000000000000..bba746622988c822c4538ecf7960fc469dc76dda --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000006.json @@ -0,0 +1,4 @@ +{ + "sample_id": "sample_000006", + "row": "sample_000006/row.json" +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000007.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000007.json new file mode 100644 index 0000000000000000000000000000000000000000..7575e489749caca9f3d7affae09118ea4f9a9f31 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000007.json @@ -0,0 +1,4 @@ +{ + "sample_id": "sample_000007", + "row": "sample_000007/row.json" +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000008.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000008.json new file mode 100644 index 0000000000000000000000000000000000000000..a5ab986f3299e71f9a73b25d79b2c1d8d18a9076 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000008.json @@ -0,0 +1,4 @@ +{ + "sample_id": "sample_000008", + "row": "sample_000008/row.json" +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000009.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000009.json new file mode 100644 index 0000000000000000000000000000000000000000..fcb5d99e3592f42c93fa0307eec42780967578dc --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000009.json @@ -0,0 +1,4 @@ +{ + "sample_id": "sample_000009", + "row": "sample_000009/row.json" +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000010.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000010.json new file mode 100644 index 0000000000000000000000000000000000000000..cd4e0ba4748f5fc45cf2007f4067b22c15ad6ddf --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000010.json @@ -0,0 +1,4 @@ +{ + "sample_id": "sample_000010", + "row": "sample_000010/row.json" +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000011.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000011.json new file mode 100644 index 0000000000000000000000000000000000000000..e9a99bbc10226a952ae2914eb9d3c3f13a4b93cb --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/emit_pool/done/sample_000011.json @@ -0,0 +1,4 @@ +{ + "sample_id": "sample_000011", + "row": "sample_000011/row.json" +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000001.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000001.json new file mode 100644 index 0000000000000000000000000000000000000000..e345c067407f748e75c36fd21f7464082af4e750 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000001.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000001", + "plan_path": "sample_000001/plan.json", + "task_path": "sample_000001/vocab_task.json", + "prompt_hash": "de111ef88cac721a573e1463080c7ef8f709ab3311c0be649576445c9637d6d5", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000001", + "pool": "plan_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000002.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000002.json new file mode 100644 index 0000000000000000000000000000000000000000..a1b147bb42be14dbe311225dc98ab9cca9f05157 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000002.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000002", + "plan_path": "sample_000002/plan.json", + "task_path": "sample_000002/vocab_task.json", + "prompt_hash": "124ba3f0479caaeb5896d579dbace9891cca2fb6875690fdb3f66ee3524a7e9e", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000002", + "pool": "plan_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000003.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000003.json new file mode 100644 index 0000000000000000000000000000000000000000..9f79d725db6dfc9ffbaa077c32ac7f5bf8b9667f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000003.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000003", + "plan_path": "sample_000003/plan.json", + "task_path": "sample_000003/vocab_task.json", + "prompt_hash": "63e951d22cadbf6e15b5fd08f5054b228df47fc857c4e48e47c23160da12a111", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000003", + "pool": "plan_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000004.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000004.json new file mode 100644 index 0000000000000000000000000000000000000000..9e52f76096cc8522ae240ea8b3267e4969c22f84 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000004.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000004", + "plan_path": "sample_000004/plan.json", + "task_path": "sample_000004/vocab_task.json", + "prompt_hash": "3f5c74d4cad870d092def8f4466e975d281c99678ca962809503c5c456a06f49", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000004", + "pool": "plan_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000005.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000005.json new file mode 100644 index 0000000000000000000000000000000000000000..483679e151c10da69ee52be839825322a06f8d24 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000005.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000005", + "plan_path": "sample_000005/plan.json", + "task_path": "sample_000005/vocab_task.json", + "prompt_hash": "946054fe68ce2e3f63d85b3eddd1a3e63af4d647ef6dc47764d65eb13cf867f4", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000005", + "pool": "plan_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000006.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000006.json new file mode 100644 index 0000000000000000000000000000000000000000..1a26f5ffc5f51fb8819456616c2ff390503caea3 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000006.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000006", + "plan_path": "sample_000006/plan.json", + "task_path": "sample_000006/vocab_task.json", + "prompt_hash": "006a1741e37f4e2fda0c82cbfbd7c6119932755e84be0002d17055a7af25bb79", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000006", + "pool": "plan_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000007.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000007.json new file mode 100644 index 0000000000000000000000000000000000000000..1aedcb39260fbb55daa4cb7565d014378fdde5fd --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000007.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000007", + "plan_path": "sample_000007/plan.json", + "task_path": "sample_000007/vocab_task.json", + "prompt_hash": "e999f398d25887be57aad833cb2b38dd0d1b370d4b6b6019fbde86bc6c809c69", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000007", + "pool": "plan_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000008.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000008.json new file mode 100644 index 0000000000000000000000000000000000000000..3007f5f56febdfc9997b600c0582ceccf7fc8866 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000008.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000008", + "plan_path": "sample_000008/plan.json", + "task_path": "sample_000008/vocab_task.json", + "prompt_hash": "63aefc9a503e34719b1b91ec3397c9b92dc92ff7ce2c14e2c7bbe458bad64ba1", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000008", + "pool": "plan_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000009.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000009.json new file mode 100644 index 0000000000000000000000000000000000000000..653a55cd087b5b48e0d919e13e26bcc78c66ab85 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000009.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000009", + "plan_path": "sample_000009/plan.json", + "task_path": "sample_000009/vocab_task.json", + "prompt_hash": "929afc2e2f3cfd582eb2c6b53c41f60141480cb35a1ce251ed1752a6e98ba9ea", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000009", + "pool": "plan_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000010.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000010.json new file mode 100644 index 0000000000000000000000000000000000000000..1b9cca268131443a1ab580398b7a8c24316e54b8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000010.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000010", + "plan_path": "sample_000010/plan.json", + "task_path": "sample_000010/vocab_task.json", + "prompt_hash": "74f4fe54dda8f633b152876c6b6433d4a90a0421304dd6db44bd84b9519110f8", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000010", + "pool": "plan_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000011.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000011.json new file mode 100644 index 0000000000000000000000000000000000000000..541f6d3fae8f98ecaff4b6acff8777900ba10f1b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/plan_pool/done/sample_000011.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000011", + "plan_path": "sample_000011/plan.json", + "task_path": "sample_000011/vocab_task.json", + "prompt_hash": "e6c24d29d9d4a97828419fab43954232185614106bdd1c11f6a196e5398eabbc", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000011", + "pool": "plan_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000001.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000001.json new file mode 100644 index 0000000000000000000000000000000000000000..86ebed9b49e21a1f17a8f10ac99469542619da58 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000001.json @@ -0,0 +1,18 @@ +{ + "sample_id": "sample_000001", + "plan_path": "sample_000001/plan.json", + "task_path": "sample_000001/vocab_task.json", + "main_image": "sample_000001/main_image.png", + "detections": "sample_000001/detections.json", + "references": "sample_000001/references.json", + "n_references": 3, + "reference_errors": {}, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000001", + "pool": "reference_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000003.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000003.json new file mode 100644 index 0000000000000000000000000000000000000000..08509d33a48cc140f2a135490d689a7bd640f1e4 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000003.json @@ -0,0 +1,18 @@ +{ + "sample_id": "sample_000003", + "plan_path": "sample_000003/plan.json", + "task_path": "sample_000003/vocab_task.json", + "main_image": "sample_000003/main_image.png", + "detections": "sample_000003/detections.json", + "references": "sample_000003/references.json", + "n_references": 3, + "reference_errors": {}, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000003", + "pool": "reference_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000004.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000004.json new file mode 100644 index 0000000000000000000000000000000000000000..163afdc32f8b51f4e6f8c7a8417d3e1acf13b3d0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000004.json @@ -0,0 +1,18 @@ +{ + "sample_id": "sample_000004", + "plan_path": "sample_000004/plan.json", + "task_path": "sample_000004/vocab_task.json", + "main_image": "sample_000004/main_image.png", + "detections": "sample_000004/detections.json", + "references": "sample_000004/references.json", + "n_references": 3, + "reference_errors": {}, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000004", + "pool": "reference_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000005.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000005.json new file mode 100644 index 0000000000000000000000000000000000000000..f87ac5a26cbbf7728590f11a74c891cb3df38c7f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000005.json @@ -0,0 +1,18 @@ +{ + "sample_id": "sample_000005", + "plan_path": "sample_000005/plan.json", + "task_path": "sample_000005/vocab_task.json", + "main_image": "sample_000005/main_image.png", + "detections": "sample_000005/detections.json", + "references": "sample_000005/references.json", + "n_references": 6, + "reference_errors": {}, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000005", + "pool": "reference_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000006.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000006.json new file mode 100644 index 0000000000000000000000000000000000000000..1754b27839a1e822553c88896224ad3752c047f2 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000006.json @@ -0,0 +1,18 @@ +{ + "sample_id": "sample_000006", + "plan_path": "sample_000006/plan.json", + "task_path": "sample_000006/vocab_task.json", + "main_image": "sample_000006/main_image.png", + "detections": "sample_000006/detections.json", + "references": "sample_000006/references.json", + "n_references": 8, + "reference_errors": {}, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000006", + "pool": "reference_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000007.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000007.json new file mode 100644 index 0000000000000000000000000000000000000000..703de663bd63e15991cdf6b3c3c8cb644c8569bc --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000007.json @@ -0,0 +1,18 @@ +{ + "sample_id": "sample_000007", + "plan_path": "sample_000007/plan.json", + "task_path": "sample_000007/vocab_task.json", + "main_image": "sample_000007/main_image.png", + "detections": "sample_000007/detections.json", + "references": "sample_000007/references.json", + "n_references": 14, + "reference_errors": {}, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000007", + "pool": "reference_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000008.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000008.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e428ae07b9c48a4d817602609e5105c1ad485 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000008.json @@ -0,0 +1,18 @@ +{ + "sample_id": "sample_000008", + "plan_path": "sample_000008/plan.json", + "task_path": "sample_000008/vocab_task.json", + "main_image": "sample_000008/main_image.png", + "detections": "sample_000008/detections.json", + "references": "sample_000008/references.json", + "n_references": 10, + "reference_errors": {}, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000008", + "pool": "reference_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000009.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000009.json new file mode 100644 index 0000000000000000000000000000000000000000..0b06f2f83d20135b5ffdb0bfc12da679cd5c02c2 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000009.json @@ -0,0 +1,18 @@ +{ + "sample_id": "sample_000009", + "plan_path": "sample_000009/plan.json", + "task_path": "sample_000009/vocab_task.json", + "main_image": "sample_000009/main_image.png", + "detections": "sample_000009/detections.json", + "references": "sample_000009/references.json", + "n_references": 5, + "reference_errors": {}, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000009", + "pool": "reference_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000010.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000010.json new file mode 100644 index 0000000000000000000000000000000000000000..d9f3f8f8c770a8f4791feacad3852bbf8417fb12 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000010.json @@ -0,0 +1,18 @@ +{ + "sample_id": "sample_000010", + "plan_path": "sample_000010/plan.json", + "task_path": "sample_000010/vocab_task.json", + "main_image": "sample_000010/main_image.png", + "detections": "sample_000010/detections.json", + "references": "sample_000010/references.json", + "n_references": 9, + "reference_errors": {}, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000010", + "pool": "reference_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000011.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000011.json new file mode 100644 index 0000000000000000000000000000000000000000..853fd1e05047a734e5439eecf213315bfd2cf570 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/reference_pool/done/sample_000011.json @@ -0,0 +1,18 @@ +{ + "sample_id": "sample_000011", + "plan_path": "sample_000011/plan.json", + "task_path": "sample_000011/vocab_task.json", + "main_image": "sample_000011/main_image.png", + "detections": "sample_000011/detections.json", + "references": "sample_000011/references.json", + "n_references": 10, + "reference_errors": {}, + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000011", + "pool": "reference_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000001.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000001.json new file mode 100644 index 0000000000000000000000000000000000000000..507a9b8097bb191f30f2679ded0b1f1b836a2820 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000001.json @@ -0,0 +1,164 @@ +{ + "sample_id": "sample_000001", + "target_total": 3, + "target_people": 1, + "target_objects": 2, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 3, + "n_detected": 3, + "n_subjects": 3, + "subjects": [ + { + "name": "person_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76251.npy:person:0", + "source_name": "person", + "source_description": "Visible by their hands, which are illuminated in bright red light, holding a small object. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark setting, illuminated by a red light, holding and interacting with a small dark object.", + "sub_caption": "person: A person, prominently visible by their hands and forearms which are bathed in bright red light, holding and interacting with a small dark electronic object.. Scene role: Operating the small device in the foreground", + "measured_bbox": [ + 0.301, + 0.039, + 0.714, + 0.98 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_person_in_red_light_attempt_01.png", + "output": "references/ref_person_in_red_light.png", + "mask": "references/sam_mask_person_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 324.0, + 26.0, + 699.0, + 1013.0 + ], + "mask_score": 3.392402, + "mask_area_ratio": 0.165197, + "elapsed_seconds": 53.7174 + } + }, + { + "name": "red_illuminated_structure", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_2029.npy:object:1", + "source_name": "red illuminated structure", + "source_description": "A structure on the right side of the image, possibly a wall or barricade, strongly illuminated with red light, featuring a grid-like or textured pattern. Source dataset: Ego4D. Scene context: A dark, possibly outdoor or poorly lit indoor area illuminated by strong blue and red artificial lights.", + "sub_caption": "red illuminated structure: A sturdy wall or barricade-like structure featuring a textured, grid-like pattern, strongly illuminated by deep red ambient light.. Scene role: Forms the textured, atmospheric background behind the person", + "measured_bbox": [ + 0.6096, + 0.0, + 0.9961, + 0.991 + ], + "detection_confidence": "high", + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_illuminated_structure.png", + "raw_ref_image": "references/raw_ref_red_illuminated_structure_attempt_01.png", + "reference_verify": "references/reference_verify_red_illuminated_structure.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_red_illuminated_structure_attempt_01.png", + "output": "references/ref_red_illuminated_structure.png", + "mask": "references/sam_mask_red_illuminated_structure.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 0.0, + 1023.0, + 1023.0 + ], + "mask_score": 1.966617, + "mask_area_ratio": 0.597257, + "elapsed_seconds": 10.2671 + } + }, + { + "name": "green_indicator_light", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_294886.npy:object:1", + "source_name": "green light", + "source_description": "Small, bright green glowing indicator light, possibly an LED, located on the dark structure. Source dataset: Ego4D. Scene context: An extremely dark scene with only a faint silhouette of an object and a small green indicator light visible.", + "sub_caption": "green light: A small, bright green glowing LED indicator light piercing through the darkness.. Scene role: Glowing on a dark piece of equipment beside the person, contrasting sharply with the dominant red lighting", + "measured_bbox": [ + 0.1227, + 0.438, + 0.1605, + 0.4975 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_green_indicator_light.png", + "raw_ref_image": "references/raw_ref_green_indicator_light_attempt_01.png", + "reference_verify": "references/reference_verify_green_indicator_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_green_indicator_light_attempt_01.png", + "output": "references/ref_green_indicator_light.png", + "mask": "references/sam_mask_green_indicator_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 347.0, + 335.0, + 676.0, + 688.0 + ], + "mask_score": 3.439631, + "mask_area_ratio": 0.059673, + "elapsed_seconds": 10.1365 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000003.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000003.json new file mode 100644 index 0000000000000000000000000000000000000000..1857e365e6a8e81bea3f348ded63dace118c227a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000003.json @@ -0,0 +1,164 @@ +{ + "sample_id": "sample_000003", + "target_total": 3, + "target_people": 1, + "target_objects": 2, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 3, + "n_detected": 3, + "n_subjects": 3, + "subjects": [ + { + "name": "person_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_135381.npy:person:0", + "source_name": "person", + "source_description": "Visible hands and arms of a person in a dark setting, illuminated by red light, interacting with objects. Source dataset: Ego4D. Scene context: A close-up view of a person's hands and arms interacting with objects in a dark setting, illuminated by red and white light.", + "sub_caption": "person: Visible hands and lower arms, dramatically illuminated by a deep red light, emerging from the dark surroundings.. Scene role: Interacting with the open book, fingers lightly resting on the visible pages.", + "measured_bbox": [ + 0.0, + 0.0, + 0.5655, + 1.0 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_person_in_red_light_attempt_01.png", + "output": "references/ref_person_in_red_light.png", + "mask": "references/sam_mask_person_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 351.0, + 54.0, + 667.0, + 1015.0 + ], + "mask_score": 3.471897, + "mask_area_ratio": 0.134706, + "elapsed_seconds": 8.8737 + } + }, + { + "name": "red_illuminated_book", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_236888.npy:object:0", + "source_name": "book", + "source_description": "An open book with pages visible, illuminated in a red hue, showing text and some graphical elements. Source dataset: Ego4D. Scene context: A dimly lit scene showing an open book, illuminated by a red light, being held or resting in the darkness.", + "sub_caption": "book: A large, open book with visible pages and faint layout elements, cast in a striking red hue from an overhead light source.. Scene role: Lying open flat on the surface directly beneath the person's hands.", + "measured_bbox": [ + 0.2245, + 0.3971, + 0.6708, + 0.6724 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_illuminated_book.png", + "raw_ref_image": "references/raw_ref_red_illuminated_book_attempt_01.png", + "reference_verify": "references/reference_verify_red_illuminated_book.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_red_illuminated_book_attempt_01.png", + "output": "references/ref_red_illuminated_book.png", + "mask": "references/sam_mask_red_illuminated_book.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 22.0, + 180.0, + 1002.0, + 858.0 + ], + "mask_score": 3.463648, + "mask_area_ratio": 0.502075, + "elapsed_seconds": 7.3171 + } + }, + { + "name": "shadowy_doorway", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_311165.npy:object:2", + "source_name": "doorway or opening", + "source_description": "A darker, arched or rectangular shape on the wall, suggesting an opening to another area. Source dataset: Ego4D. Scene context: A very dark, low-resolution scene indoors, mostly obscured by shadow with some faint brownish light indicating walls or structures.", + "sub_caption": "doorway or opening: A dark, arched doorway outline set into a shadowy, indistinct wall, suggesting a passage to another dark room.. Scene role: Positioned in the out-of-focus background to establish architectural depth behind the main illuminated desk area.", + "measured_bbox": [ + 0.5153, + 0.0267, + 0.8188, + 0.8345 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_shadowy_doorway.png", + "raw_ref_image": "references/raw_ref_shadowy_doorway_attempt_01.png", + "reference_verify": "references/reference_verify_shadowy_doorway.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_shadowy_doorway_attempt_01.png", + "output": "references/ref_shadowy_doorway.png", + "mask": "references/sam_mask_shadowy_doorway.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 141.0, + 11.0, + 899.0, + 969.0 + ], + "mask_score": 3.471354, + "mask_area_ratio": 0.542183, + "elapsed_seconds": 7.2941 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000004.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000004.json new file mode 100644 index 0000000000000000000000000000000000000000..731a99cdc18ab8e6157ae95a80e8c68e20ece96d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000004.json @@ -0,0 +1,173 @@ +{ + "sample_id": "sample_000004", + "target_total": 5, + "target_people": 1, + "target_objects": 4, + "canvas_size": [ + 864, + 1152 + ], + "canvas_aspect_ratio": "3:4", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 5, + "n_detected": 5, + "n_subjects": 3, + "subjects": [ + { + "name": "blue_light_source", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_98001.npy:object:0", + "source_name": "blue light source", + "source_description": "A small, blurry blue light, appearing as a faint glow against the dark background. Source dataset: Ego4D. Scene context: A predominantly dark scene with a single, small, indistinct blue light source visible towards the right side.", + "sub_caption": "blue light source: A small, blurry blue light emitting a faint, cool glow against the darkness.. Scene role: Positioned in the deep background to the left, providing an atmospheric back-light and contrasting with the red lighting.", + "measured_bbox": [ + 0.5155, + 0.3724, + 0.553, + 0.4239 + ], + "detection_confidence": "high", + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light_source.png", + "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light_source.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_blue_light_source_attempt_01.png", + "output": "references/ref_blue_light_source.png", + "mask": "references/sam_mask_blue_light_source.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 132.0, + 159.0, + 891.0, + 864.0 + ], + "mask_score": 3.485053, + "mask_area_ratio": 0.292151, + "elapsed_seconds": 9.6106 + } + }, + { + "name": "paint_roller", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_95307.npy:object:0", + "source_name": "paint roller", + "source_description": "A standard paint roller with a dark cylindrical cover and a metal frame connecting to a handle. Source dataset: Ego4D. Scene context: A close-up view of a paint roller against a light-colored wall in a dimly lit setting.", + "sub_caption": "paint roller: A standard paint roller with a dark cylindrical cover, attached to a metal frame and handle.. Scene role: Resting on a nearby surface in the foreground, catching subtle highlights from the red light.", + "measured_bbox": [ + 0.2917, + 0.532, + 0.4405, + 0.7522 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_paint_roller.png", + "raw_ref_image": "references/raw_ref_paint_roller_attempt_01.png", + "reference_verify": "references/reference_verify_paint_roller.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_paint_roller_attempt_01.png", + "output": "references/ref_paint_roller.png", + "mask": "references/sam_mask_paint_roller.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 136.0, + 75.0, + 866.0, + 950.0 + ], + "mask_score": 3.459168, + "mask_area_ratio": 0.116944, + "elapsed_seconds": 7.1624 + } + }, + { + "name": "draped_tarp", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_37499.npy:object:1", + "source_name": "light-colored material", + "source_description": "A draped, light-colored or beige material, possibly part of a tent or covering, visible on the right side. Source dataset: Ego4D. Scene context: The scene is a dark, possibly outdoor or dimly lit setting, showing what appears to be a structure or object covered with a large piece of material or tarp.", + "sub_caption": "light-colored material: A draped, light-colored tarp or drop cloth covering large, indistinct shapes.. Scene role: Draped over items on the right side of the room, adding textural detail and reflecting the mixed ambient lighting.", + "measured_bbox": [ + 0.392, + 0.4207, + 0.8243, + 0.8862 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_draped_tarp.png", + "raw_ref_image": "references/raw_ref_draped_tarp_attempt_01.png", + "reference_verify": "references/reference_verify_draped_tarp.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_draped_tarp_attempt_01.png", + "output": "references/ref_draped_tarp.png", + "mask": "references/sam_mask_draped_tarp.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 189.0, + 1023.0, + 888.0 + ], + "mask_score": 3.482188, + "mask_area_ratio": 0.485075, + "elapsed_seconds": 7.4131 + } + } + ], + "not_emitted": [ + { + "name": "worker_in_cap", + "reason": "not_detected" + }, + { + "name": "stacked_boxes", + "reason": "not_detected" + } + ], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000005.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000005.json new file mode 100644 index 0000000000000000000000000000000000000000..3fa46fac137429f70b533a51fc6cb221a17a584d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000005.json @@ -0,0 +1,302 @@ +{ + "sample_id": "sample_000005", + "target_total": 6, + "target_people": 3, + "target_objects": 3, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 6, + "n_detected": 6, + "n_subjects": 6, + "subjects": [ + { + "name": "pizza_prep_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "EPIC-Kitchens:P11_106:person:0", + "source_name": "person", + "source_description": "The person's hands and lower arms are visible at the bottom of the frame, appearing to be in the middle of preparing food. Source dataset: EPIC-Kitchens. Scene context: A top-down view of a wooden kitchen table where two pizzas are being prepared with various ingredients like red onions, mushrooms, and tomatoes.", + "sub_caption": "person: Hands and lower arms visible, preparing a pizza on a wooden surface.. Scene role: In the foreground, actively making a pizza.", + "measured_bbox": [ + 0.532, + 0.2245, + 1.0, + 0.7871 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_pizza_prep_hands.png", + "raw_ref_image": "references/raw_ref_pizza_prep_hands_attempt_02.png", + "reference_verify": "references/reference_verify_pizza_prep_hands.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_pizza_prep_hands_attempt_02.png", + "output": "references/ref_pizza_prep_hands.png", + "mask": "references/sam_mask_pizza_prep_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 349.0, + 14.0, + 677.0, + 1016.0 + ], + "mask_score": 3.451951, + "mask_area_ratio": 0.150558, + "elapsed_seconds": 10.2196 + } + }, + { + "name": "person_in_jacket", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_22577.npy:person:0", + "source_name": "person", + "source_description": "A person visible mostly from the back, wearing a dark jacket over a red shirt with yellow text that includes the words 'STANLEY CUP'. The person has dark hair. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a red shirt with yellow text, seemingly engaged in an activity in a dimly lit indoor setting.", + "sub_caption": "person: Person seen mostly from the back, dark hair, wearing a dark jacket over a red shirt with yellow abstract patterns.. Scene role: Standing in the midground, facing away toward the kitchen cabinets.", + "measured_bbox": [ + 0.0, + 0.0, + 0.361, + 0.675 + ], + "detection_confidence": 1.0, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_in_jacket.png", + "raw_ref_image": "references/raw_ref_person_in_jacket_attempt_02.png", + "reference_verify": "references/reference_verify_person_in_jacket.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_in_jacket_attempt_02.png", + "output": "references/ref_person_in_jacket.png", + "mask": "references/sam_mask_person_in_jacket.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 334.0, + 15.0, + 684.0, + 1023.0 + ], + "mask_score": 3.414528, + "mask_area_ratio": 0.160464, + "elapsed_seconds": 10.2925 + } + }, + { + "name": "person_with_pan", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "source_description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall.", + "sub_caption": "person: Person seen from the torso down, holding a dark round object.. Scene role: Standing in the background near a blue wall.", + "measured_bbox": [ + 0.1195, + 0.0021, + 0.4483, + 0.6302 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_with_pan.png", + "raw_ref_image": "references/raw_ref_person_with_pan_attempt_02.png", + "reference_verify": "references/reference_verify_person_with_pan.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_with_pan_attempt_02.png", + "output": "references/ref_person_with_pan.png", + "mask": "references/sam_mask_person_with_pan.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 334.0, + 18.0, + 690.0, + 1023.0 + ], + "mask_score": 3.408831, + "mask_area_ratio": 0.163625, + "elapsed_seconds": 10.3028 + } + }, + { + "name": "salt_box", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P35_102:object:1", + "source_name": "box of salt", + "source_description": "A small cardboard box with blue and black text, sitting on the counter near the stove. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink containing dirty dishes, a frying pan on a drying rack, and various cooking utensils.", + "sub_caption": "box of salt: A small cardboard box with blue and black graphical patterns, resembling a salt container.. Scene role: Resting on the wooden counter next to the pizza prep area.", + "measured_bbox": [ + 0.5558, + 0.4006, + 0.6966, + 0.4937 + ], + "detection_confidence": 0.5, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_salt_box.png", + "raw_ref_image": "references/raw_ref_salt_box_attempt_01.png", + "reference_verify": "references/reference_verify_salt_box.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_salt_box_attempt_01.png", + "output": "references/ref_salt_box.png", + "mask": "references/sam_mask_salt_box.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 73.0, + 268.0, + 952.0, + 762.0 + ], + "mask_score": 3.471932, + "mask_area_ratio": 0.353847, + "elapsed_seconds": 10.266 + } + }, + { + "name": "cutlery_set", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_103:object:4", + "source_name": "cutlery", + "source_description": "A silver fork and a knife resting on the yellow plate inside the right sink bowl. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground.", + "sub_caption": "cutlery: A silver fork and a knife resting on a yellow plate.. Scene role: Placed on the counter in the foreground left.", + "measured_bbox": [ + 0.1913, + 0.8329, + 0.6116, + 0.9636 + ], + "detection_confidence": "high", + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_cutlery_set.png", + "raw_ref_image": "references/raw_ref_cutlery_set_attempt_02.png", + "reference_verify": "references/reference_verify_cutlery_set.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_cutlery_set_attempt_02.png", + "output": "references/ref_cutlery_set.png", + "mask": "references/sam_mask_cutlery_set.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 28.0, + 122.0, + 1013.0, + 887.0 + ], + "mask_score": 3.237995, + "mask_area_ratio": 0.452152, + "elapsed_seconds": 10.821 + } + }, + { + "name": "wall_cabinet", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_121:object:12", + "source_name": "cabinet", + "source_description": "A wooden cabinet positioned above the counter on the right side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter top with a stove, a pan, a bowl of yellow liquid, a wooden cutting board, a plate of food, a water bottle, a large water jug, and a small metal lid.", + "sub_caption": "cabinet: A wooden cabinet positioned above a kitchen counter.. Scene role: Mounted on the wall in the midground, directly above the counter.", + "measured_bbox": [ + 0.5897, + 0.0017, + 0.9469, + 0.1735 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wall_cabinet.png", + "raw_ref_image": "references/raw_ref_wall_cabinet_attempt_01.png", + "reference_verify": "references/reference_verify_wall_cabinet.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_wall_cabinet_attempt_01.png", + "output": "references/ref_wall_cabinet.png", + "mask": "references/sam_mask_wall_cabinet.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 15.0, + 31.0, + 1008.0, + 1013.0 + ], + "mask_score": 3.398914, + "mask_area_ratio": 0.735002, + "elapsed_seconds": 10.923 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000006.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000006.json new file mode 100644 index 0000000000000000000000000000000000000000..f1a23fe97b9b73fd13a72199ed0ac66fceb1c7b5 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000006.json @@ -0,0 +1,311 @@ +{ + "sample_id": "sample_000006", + "target_total": 8, + "target_people": 1, + "target_objects": 7, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 8, + "n_detected": 8, + "n_subjects": 6, + "subjects": [ + { + "name": "person_at_sink", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "EPIC-Kitchens:P26_103:person:0", + "source_name": "person", + "source_description": "A person's left hand and lower body, including dark trousers and black shoes with white soles, are visible in the foreground, reaching towards the sink. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground.", + "sub_caption": "person: A person's left hand and lower body wearing dark trousers and black shoes with white soles.. Scene role: Standing in the foreground, reaching their left hand towards the kitchen faucet.", + "measured_bbox": [ + 0.0, + 0.3583, + 0.2441, + 1.0 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_at_sink.png", + "raw_ref_image": "references/raw_ref_person_at_sink_attempt_01.png", + "reference_verify": "references/reference_verify_person_at_sink.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_person_at_sink_attempt_01.png", + "output": "references/ref_person_at_sink.png", + "mask": "references/sam_mask_person_at_sink.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 320.0, + 24.0, + 689.0, + 1002.0 + ], + "mask_score": 3.450755, + "mask_area_ratio": 0.163542, + "elapsed_seconds": 31.0984 + } + }, + { + "name": "wooden_door", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P12_104:object:0", + "source_name": "door", + "source_description": "A light brown wooden door, closed, with a metallic door handle. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a closed wooden door with a metal handle, positioned in a room with a kitchen area visible to the left.", + "sub_caption": "door: A light brown wooden door, closed, with a metallic handle.. Scene role: Visible in the background on the kitchen wall.", + "measured_bbox": [ + 0.2139, + 0.0, + 0.3994, + 0.412 + ], + "detection_confidence": 1.0, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wooden_door.png", + "raw_ref_image": "references/raw_ref_wooden_door_attempt_01.png", + "reference_verify": "references/reference_verify_wooden_door.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wooden_door_attempt_01.png", + "output": "references/ref_wooden_door.png", + "mask": "references/sam_mask_wooden_door.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 256.0, + 26.0, + 767.0, + 1015.0 + ], + "mask_score": 3.446321, + "mask_area_ratio": 0.388947, + "elapsed_seconds": 9.29 + } + }, + { + "name": "wrapped_cucumber", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P37_101:object:1", + "source_name": "cucumber 2", + "source_description": "A long, green cucumber wrapped in clear plastic, resting horizontally on the dark countertop, slightly overlapping the other cucumber and positioned closer to the viewer. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a person holding two whole cucumbers over a dark kitchen counter, with an onion, garlic, a plastic container, a rice cooker, and a living area in the background.", + "sub_caption": "cucumber 2: A long, green cucumber wrapped in clear plastic.. Scene role: Resting horizontally on the dark countertop next to the sink.", + "measured_bbox": [ + 0.2215, + 0.4465, + 0.4029, + 0.5104 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wrapped_cucumber.png", + "raw_ref_image": "references/raw_ref_wrapped_cucumber_attempt_01.png", + "reference_verify": "references/reference_verify_wrapped_cucumber.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wrapped_cucumber_attempt_01.png", + "output": "references/ref_wrapped_cucumber.png", + "mask": "references/sam_mask_wrapped_cucumber.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 33.0, + 397.0, + 1011.0, + 624.0 + ], + "mask_score": 3.430612, + "mask_area_ratio": 0.118574, + "elapsed_seconds": 7.2551 + } + }, + { + "name": "gas_stove", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P22_105:object:12", + "source_name": "stove", + "source_description": "A gas stove with black grates on the left side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a stove, a sink, various utensils, bottles, and cabinets.", + "sub_caption": "stove: A kitchen gas stove with black grates.. Scene role: Positioned along the counter in the background.", + "measured_bbox": [ + 0.03, + 0.345, + 0.318, + 0.444 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_gas_stove.png", + "raw_ref_image": "references/raw_ref_gas_stove_attempt_01.png", + "reference_verify": "references/reference_verify_gas_stove.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_gas_stove_attempt_01.png", + "output": "references/ref_gas_stove.png", + "mask": "references/sam_mask_gas_stove.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 58.0, + 180.0, + 965.0, + 842.0 + ], + "mask_score": 3.470988, + "mask_area_ratio": 0.477615, + "elapsed_seconds": 7.3908 + } + }, + { + "name": "water_pitcher", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P07_107:object:13", + "source_name": "water pitcher", + "source_description": "A clear plastic water pitcher with a white handle and lid, sitting on the counter. Source dataset: EPIC-Kitchens. Scene context: A narrow kitchen space with a dark floor, light cabinets, a sink counter with various items, a radiator on the wall, and a trash can on the floor.", + "sub_caption": "water pitcher: A clear plastic water pitcher with a white handle and lid.. Scene role: Sitting upright on the counter.", + "measured_bbox": [ + 0.3327, + 0.2732, + 0.4536, + 0.4573 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_water_pitcher.png", + "raw_ref_image": "references/raw_ref_water_pitcher_attempt_01.png", + "reference_verify": "references/reference_verify_water_pitcher.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_water_pitcher_attempt_01.png", + "output": "references/ref_water_pitcher.png", + "mask": "references/sam_mask_water_pitcher.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 242.0, + 115.0, + 856.0, + 972.0 + ], + "mask_score": 3.323768, + "mask_area_ratio": 0.450877, + "elapsed_seconds": 7.2849 + } + }, + { + "name": "silver_spoon", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P04_103:object:9", + "source_name": "Silver Spoon", + "source_description": "Small silver metal spoon resting near the black spatula handle. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with a stove, toaster, and various cooking utensils scattered around.", + "sub_caption": "Silver Spoon: A small silver metal spoon.. Scene role: Laying flat on the messy countertop near the cucumber.", + "measured_bbox": [ + 0.3001, + 0.4801, + 0.3988, + 0.539 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_silver_spoon.png", + "raw_ref_image": "references/raw_ref_silver_spoon_attempt_01.png", + "reference_verify": "references/reference_verify_silver_spoon.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_silver_spoon_attempt_01.png", + "output": "references/ref_silver_spoon.png", + "mask": "references/sam_mask_silver_spoon.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 88.0, + 77.0, + 916.0, + 950.0 + ], + "mask_score": 3.449565, + "mask_area_ratio": 0.092279, + "elapsed_seconds": 7.0866 + } + } + ], + "not_emitted": [ + { + "name": "black_pot", + "reason": "not_detected" + }, + { + "name": "kitchen_faucet", + "reason": "not_detected" + } + ], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000007.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000007.json new file mode 100644 index 0000000000000000000000000000000000000000..85b89a99d8beed815342f6394094e7c928fbc4f8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000007.json @@ -0,0 +1,436 @@ +{ + "sample_id": "sample_000007", + "target_total": 14, + "target_people": 1, + "target_objects": 13, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 14, + "n_detected": 14, + "n_subjects": 14, + "subjects": [ + { + "name": "typist", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39231.npy:person:0", + "source_name": "typist", + "source_description": "Visible as a dark silhouette with one hand interacting with the illuminated keyboard. Source dataset: Ego4D. Scene context: A close-up view of a person typing on a keyboard illuminated with blue backlighting in a dark room.", + "sub_caption": "typist: Visible as a dark silhouette with one hand interacting with an illuminated keyboard.. Scene role: Typing at the desk in the center of the frame, serving as the main subject of the scene.", + "measured_bbox": [ + 0.5473, + 0.0, + 0.9968, + 1.0 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_typist.png", + "raw_ref_image": "references/raw_ref_typist_attempt_01.png", + "reference_verify": "references/reference_verify_typist.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_typist.png", + "mask": "references/sam_mask_typist.png" + } + }, + { + "name": "textured_fabric_area", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_293237.npy:object:0", + "source_name": "textured fabric area", + "source_description": "A faint blueish, textured area on the left side of the image, possibly fabric or a rough surface. Source dataset: Ego4D. Scene context: An extremely dark scene with minimal visibility, showing only a few faint, blurry shapes and small points of light.", + "sub_caption": "textured fabric area: A faint blueish, textured area that resembles rough fabric.. Scene role: Draped loosely over the back of the typist's chair, catching some of the ambient blue light.", + "measured_bbox": [ + 0.5434, + 0.5078, + 0.7285, + 0.8985 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_textured_fabric_area.png", + "raw_ref_image": "references/raw_ref_textured_fabric_area_attempt_01.png", + "reference_verify": "references/reference_verify_textured_fabric_area.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_textured_fabric_area.png", + "mask": "references/sam_mask_textured_fabric_area.png" + } + }, + { + "name": "textured_surface", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39224.npy:object:0", + "source_name": "textured surface", + "source_description": "A dark surface featuring a repeating pattern of small, raised shapes that catch the faint blue light, resembling a woven or dimpled fabric texture. Source dataset: Ego4D. Scene context: A close-up view of a textured surface, possibly fabric, illuminated by a faint blue light in an otherwise dark environment.", + "sub_caption": "textured surface: A dark surface featuring a repeating pattern of small, raised shapes, resembling a dimpled texture.. Scene role: Acting as a large desk mat or mousepad underneath the glowing keyboard.", + "measured_bbox": [ + 0.3547, + 0.1364, + 0.8811, + 0.479 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_textured_surface.png", + "raw_ref_image": "references/raw_ref_textured_surface_attempt_01.png", + "reference_verify": "references/reference_verify_textured_surface.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_textured_surface.png", + "mask": "references/sam_mask_textured_surface.png" + } + }, + { + "name": "blue_lines", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_260800.npy:object:0", + "source_name": "blue lines", + "source_description": "Faint, indistinct blue lines in the dark. Source dataset: Ego4D. Scene context: A very dark, almost pitch-black scene with only faint, indistinct blue lines visible in the lower right.", + "sub_caption": "blue lines: Faint, indistinct blue lines glowing in the dark.. Scene role: An LED light strip illuminating the edge of the desk.", + "measured_bbox": [ + 0.3714, + 0.3069, + 0.5828, + 0.4931 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_lines.png", + "raw_ref_image": "references/raw_ref_blue_lines_attempt_01.png", + "reference_verify": "references/reference_verify_blue_lines.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_lines.png", + "mask": "references/sam_mask_blue_lines.png" + } + }, + { + "name": "grid_patterned_floor", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_196981.npy:object:0", + "source_name": "floor", + "source_description": "A dark, grid-patterned floor surface, likely made of tiles or a similar material, covering the right side of the image. Source dataset: Ego4D. Scene context: A close-up view of a floor corner with a grid-patterned surface meeting a solid, light-colored wall.", + "sub_caption": "floor: A dark, grid-patterned floor surface, likely made of tiles.. Scene role: Visible in the lower portion of the room beneath the desk area.", + "measured_bbox": [ + 0.0039, + 0.5389, + 0.6911, + 0.9893 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_grid_patterned_floor.png", + "raw_ref_image": "references/raw_ref_grid_patterned_floor_attempt_01.png", + "reference_verify": "references/reference_verify_grid_patterned_floor.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_grid_patterned_floor.png", + "mask": "references/sam_mask_grid_patterned_floor.png" + } + }, + { + "name": "shadowy_shape", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_165180.npy:object:2", + "source_name": "shadowy shape", + "source_description": "A large, unidentifiable shadowy shape in the center of the scene. Source dataset: Ego4D. Scene context: A very dark, low-visibility scene, possibly outdoors at night or in a deeply shadowed area, with vague shapes illuminated by faint ambient light.", + "sub_caption": "shadowy shape: A large, unidentifiable shadowy shape.. Scene role: Looming in the background, suggesting stacked boxes or furniture in the darkness.", + "measured_bbox": [ + 0.0709, + 0.0492, + 0.3072, + 0.2887 + ], + "detection_confidence": 0.8, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_shadowy_shape.png", + "raw_ref_image": "references/raw_ref_shadowy_shape_attempt_01.png", + "reference_verify": "references/reference_verify_shadowy_shape.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_shadowy_shape.png", + "mask": "references/sam_mask_shadowy_shape.png" + } + }, + { + "name": "blue_light_source", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_97566.npy:object:0", + "source_name": "blue light source", + "source_description": "A small, indistinct, hazy blue glowing area in the otherwise black image, appearing somewhat irregular in shape. Source dataset: Ego4D. Scene context: A very dark, almost completely black scene with a single, small, hazy blue glowing object or light source visible towards the right side.", + "sub_caption": "blue light source: A small, hazy blue glowing area, appearing slightly irregular in shape.. Scene role: A glowing component or hub device resting on the desk.", + "measured_bbox": [ + 0.3661, + 0.1534, + 0.448, + 0.2196 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light_source.png", + "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light_source.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_light_source.png", + "mask": "references/sam_mask_blue_light_source.png" + } + }, + { + "name": "thin_curved_object", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_259561.npy:object:0", + "source_name": "thin curved object", + "source_description": "A thin, metallic or reflective curved object held between the hands in the center of the image. Source dataset: Ego4D. Scene context: A close-up view of hands manipulating objects in a very dark setting, with only a few items partially visible under weak lighting.", + "sub_caption": "thin curved object: A thin, metallic or reflective curved object.. Scene role: Resting on the desk near the keyboard, resembling the band of a pair of headphones reflecting the monitor light.", + "measured_bbox": [ + 0.5384, + 0.1284, + 0.674, + 0.181 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_thin_curved_object.png", + "raw_ref_image": "references/raw_ref_thin_curved_object_attempt_01.png", + "reference_verify": "references/reference_verify_thin_curved_object.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_thin_curved_object.png", + "mask": "references/sam_mask_thin_curved_object.png" + } + }, + { + "name": "electronic_device", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "source_description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device.", + "sub_caption": "electronic device: A faint red rectangular shape with some texture, resembling a phone.. Scene role: Lying on the desk near the typist's left arm, casting a slight red glow that contrasts with the blue lights.", + "measured_bbox": [ + 0.4103, + 0.2767, + 0.5015, + 0.3797 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_electronic_device.png", + "raw_ref_image": "references/raw_ref_electronic_device_attempt_01.png", + "reference_verify": "references/reference_verify_electronic_device.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_electronic_device.png", + "mask": "references/sam_mask_electronic_device.png" + } + }, + { + "name": "blue_light", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_98584.npy:object:0", + "source_name": "blue light", + "source_description": "a small blue light source in a dark setting Source dataset: Ego4D. Scene context: A dark image with a small blue light.", + "sub_caption": "blue light: A small blue light source.. Scene role: A standby light on a computer tower sitting on the floor.", + "measured_bbox": [ + 0.523, + 0.747, + 0.539, + 0.788 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light.png", + "raw_ref_image": "references/raw_ref_blue_light_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_light.png", + "mask": "references/sam_mask_blue_light.png" + } + }, + { + "name": "cable", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P02_137:object:11", + "source_name": "cable", + "source_description": "A light-colored cable trailing down from the counter area towards the floor near the dark bag. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a washing machine, a large exercise ball, and various items on a counter near a window.", + "sub_caption": "cable: A light-colored cable trailing down from the counter area towards the floor.. Scene role: Hanging down from the edge of the desk toward the floor, connecting devices.", + "measured_bbox": [ + 0.3307, + 0.1951, + 0.482, + 0.9804 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_cable.png", + "raw_ref_image": "references/raw_ref_cable_attempt_01.png", + "reference_verify": "references/reference_verify_cable.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_cable.png", + "mask": "references/sam_mask_cable.png" + } + }, + { + "name": "backpack", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P07_104:object:15", + "source_name": "backpack", + "source_description": "A blue and black backpack partially visible on the floor in the bottom left corner. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with unwashed dishes, cleaning supplies, a bunch of bananas, and an oven with a colorful towel hanging on its handle.", + "sub_caption": "backpack: A blue and black backpack.. Scene role: Resting quietly on the grid-patterned floor near the trailing cable.", + "measured_bbox": [ + 0.2708, + 0.6305, + 0.4662, + 0.9996 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_backpack.png", + "raw_ref_image": "references/raw_ref_backpack_attempt_01.png", + "reference_verify": "references/reference_verify_backpack.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_backpack.png", + "mask": "references/sam_mask_backpack.png" + } + }, + { + "name": "small_blue_rectangle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_25245.npy:object:1", + "source_name": "small blue rectangle", + "source_description": "A tiny, bright blue rectangular glow in the bottom right corner. Source dataset: Ego4D. Scene context: The image is almost completely dark, with a faint blue shape visible towards the center-right.", + "sub_caption": "small blue rectangle: A tiny, bright blue rectangular glow.. Scene role: A small secondary display or digital clock sitting on the corner of the desk.", + "measured_bbox": [ + 0.7651, + 0.0635, + 0.8412, + 0.1295 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_small_blue_rectangle.png", + "raw_ref_image": "references/raw_ref_small_blue_rectangle_attempt_01.png", + "reference_verify": "references/reference_verify_small_blue_rectangle.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_small_blue_rectangle.png", + "mask": "references/sam_mask_small_blue_rectangle.png" + } + }, + { + "name": "dark_area", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192730.npy:object:3", + "source_name": "dark area", + "source_description": "A deeply shadowed region dominating the left side of the scene. Source dataset: Ego4D. Scene context: A dark room with a bright light reflecting off a wall, possibly near a doorway or closet.", + "sub_caption": "dark area: A deeply shadowed region dominating the space.. Scene role: Filling the left side of the room, creating an atmospheric, isolated mood around the typist's setup.", + "measured_bbox": [ + 0.003, + 0.0, + 0.3044, + 0.5863 + ], + "detection_confidence": 0.8, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_dark_area.png", + "raw_ref_image": "references/raw_ref_dark_area_attempt_09.png", + "reference_verify": "references/reference_verify_dark_area.json", + "reference_verify_passed": true, + "reference_attempts": 9, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_09.png", + "output": "references/ref_dark_area.png", + "mask": "references/sam_mask_dark_area.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 88.0, + 52.0, + 897.0, + 902.0 + ], + "mask_score": 3.468953, + "mask_area_ratio": 0.45142, + "elapsed_seconds": 7.1708 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000008.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000008.json new file mode 100644 index 0000000000000000000000000000000000000000..a36382d0446bca5edf1f904ad1b1097f389ca6c0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000008.json @@ -0,0 +1,486 @@ +{ + "sample_id": "sample_000008", + "target_total": 10, + "target_people": 8, + "target_objects": 2, + "canvas_size": [ + 1280, + 720 + ], + "canvas_aspect_ratio": "16:9", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 10, + "n_detected": 10, + "n_subjects": 10, + "subjects": [ + { + "name": "person_by_screens", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_165518.npy:person:0", + "source_name": "person", + "source_description": "A figure visible in the center, mostly obscured by darkness, wearing dark clothing. Only a faint outline and some lighter parts of clothing or skin are visible. Source dataset: Ego4D. Scene context: A very dark, low-light indoor scene, possibly a room or stage, with a person partially visible in the center and illuminated rectangular panels or screens visible in the background and foreground.", + "sub_caption": "person: a shadowy figure in dark clothing, faintly outlined by the glow of rectangular panels. Scene role: standing in the background, monitoring the glowing screens", + "measured_bbox": [ + 0.1313, + 0.1321, + 0.2804, + 0.5589 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_by_screens.png", + "raw_ref_image": "references/raw_ref_person_by_screens_attempt_02.png", + "reference_verify": "references/reference_verify_person_by_screens.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_person_by_screens_attempt_02.png", + "output": "references/ref_person_by_screens.png", + "mask": "references/sam_mask_person_by_screens.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 339.0, + 55.0, + 687.0, + 1009.0 + ], + "mask_score": 3.450623, + "mask_area_ratio": 0.151286, + "elapsed_seconds": 10.4397 + } + }, + { + "name": "reaching_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282541.npy:person:0", + "source_name": "person", + "source_description": "Visible hands reaching forward. Source dataset: Ego4D. Scene context: A very dark image with red and blue lights, showing a person's hands.", + "sub_caption": "person: a pair of hands reaching forward, catching sharp reflections of red and blue ambient light. Scene role: reaching across the workbench to grab a tool", + "measured_bbox": [ + 0.4313, + 0.3821, + 0.5678, + 0.5521 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_reaching_hands.png", + "raw_ref_image": "references/raw_ref_reaching_hands_attempt_01.png", + "reference_verify": "references/reference_verify_reaching_hands.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_reaching_hands_attempt_01.png", + "output": "references/ref_reaching_hands.png", + "mask": "references/sam_mask_reaching_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 388.0, + 183.0, + 637.0, + 867.0 + ], + "mask_score": 3.461161, + "mask_area_ratio": 0.070283, + "elapsed_seconds": 10.0646 + } + }, + { + "name": "hands_holding_part", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76427.npy:person:0", + "source_name": "person", + "source_description": "Visible only by their hands, which are illuminated by red light. The hands are positioned as if holding or manipulating something. Source dataset: Ego4D. Scene context: A very dark image mostly showing black space, with dim reddish lighting catching what appears to be a person's hands holding an object.", + "sub_caption": "person: hands bathed in deep red light, carefully gripping a mechanical object. Scene role: holding a component steady on the workbench", + "measured_bbox": [ + 0.2769, + 0.4337, + 0.3938, + 0.5372 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_holding_part.png", + "raw_ref_image": "references/raw_ref_hands_holding_part_attempt_01.png", + "reference_verify": "references/reference_verify_hands_holding_part.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_holding_part_attempt_01.png", + "output": "references/ref_hands_holding_part.png", + "mask": "references/sam_mask_hands_holding_part.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 345.0, + 90.0, + 675.0, + 1015.0 + ], + "mask_score": 3.449955, + "mask_area_ratio": 0.13985, + "elapsed_seconds": 9.9678 + } + }, + { + "name": "hands_with_small_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76415.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, illuminated in red light. The left hand is open, and the right hand is partially obscured, appearing to hold a small object. Source dataset: Ego4D. Scene context: Two hands are visible in a dark environment, possibly holding or manipulating a small object.", + "sub_caption": "person: hands illuminated by a red glow, with one hand open and the other pinching a tiny object. Scene role: inspecting a small microchip", + "measured_bbox": [ + 0.3697, + 0.5225, + 0.5251, + 0.7126 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_with_small_object.png", + "raw_ref_image": "references/raw_ref_hands_with_small_object_attempt_01.png", + "reference_verify": "references/reference_verify_hands_with_small_object.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_small_object_attempt_01.png", + "output": "references/ref_hands_with_small_object.png", + "mask": "references/sam_mask_hands_with_small_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 317.0, + 60.0, + 684.0, + 992.0 + ], + "mask_score": 3.441398, + "mask_area_ratio": 0.128698, + "elapsed_seconds": 10.4408 + } + }, + { + "name": "hands_with_smoldering_tool", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_202366.npy:person:0", + "source_name": "person", + "source_description": "Visible only by their hands, illuminated by a red light, holding and manipulating a smoking device. Source dataset: Ego4D. Scene context: A close-up view of hands holding a smoking device, illuminated by a red light in a very dark environment.", + "sub_caption": "person: hands lit by red light, grasping a smoking device or tool. Scene role: soldering a wire, emitting a trail of smoke", + "measured_bbox": [ + 0.7136, + 0.5556, + 0.8779, + 0.7618 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_with_smoldering_tool.png", + "raw_ref_image": "references/raw_ref_hands_with_smoldering_tool_attempt_03.png", + "reference_verify": "references/reference_verify_hands_with_smoldering_tool.json", + "reference_verify_passed": true, + "reference_attempts": 3, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_smoldering_tool_attempt_03.png", + "output": "references/ref_hands_with_smoldering_tool.png", + "mask": "references/sam_mask_hands_with_smoldering_tool.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 344.0, + 43.0, + 685.0, + 1000.0 + ], + "mask_score": 3.455653, + "mask_area_ratio": 0.157475, + "elapsed_seconds": 10.0065 + } + }, + { + "name": "helmeted_figure", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282799.npy:person:0", + "source_name": "person", + "source_description": "A person whose features are mostly obscured by darkness; they are wearing a helmet and what appears to be a clear face shield. Source dataset: Ego4D. Scene context: A highly obscured and poorly lit scene where a person wearing a helmet and a face shield is somewhat visible.", + "sub_caption": "person: a person partially obscured by darkness wearing a protective helmet and a clear face shield reflecting ambient light. Scene role: leaning closely over the workbench to inspect the soldering work", + "measured_bbox": [ + 0.717, + 0.081, + 1.0, + 0.862 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_helmeted_figure.png", + "raw_ref_image": "references/raw_ref_helmeted_figure_attempt_02.png", + "reference_verify": "references/reference_verify_helmeted_figure.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_helmeted_figure_attempt_02.png", + "output": "references/ref_helmeted_figure.png", + "mask": "references/sam_mask_helmeted_figure.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 333.0, + 17.0, + 696.0, + 1018.0 + ], + "mask_score": 3.313146, + "mask_area_ratio": 0.166679, + "elapsed_seconds": 10.3423 + } + }, + { + "name": "silhouette_with_phone", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_2030.npy:person:0", + "source_name": "person", + "source_description": "A person visible mainly as a dark silhouette against the red and purple light, holding a rectangular object that appears to be a phone. Source dataset: Ego4D. Scene context: A dark, low-light scene illuminated by red and purple light, with a person holding a phone.", + "sub_caption": "person: a dark silhouette starkly contrasted against bright red and purple lighting, holding up a rectangular device. Scene role: standing on the side, illuminating the workspace with a phone flashlight", + "measured_bbox": [ + 0.5717, + 0.0262, + 0.857, + 0.6433 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_silhouette_with_phone.png", + "raw_ref_image": "references/raw_ref_silhouette_with_phone_attempt_01.png", + "reference_verify": "references/reference_verify_silhouette_with_phone.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_silhouette_with_phone_attempt_01.png", + "output": "references/ref_silhouette_with_phone.png", + "mask": "references/sam_mask_silhouette_with_phone.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 317.0, + 16.0, + 694.0, + 1018.0 + ], + "mask_score": 3.400937, + "mask_area_ratio": 0.174557, + "elapsed_seconds": 10.3827 + } + }, + { + "name": "hands_passing_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76362.npy:person:0", + "source_name": "person", + "source_description": "Visible by their hands, which are illuminated by a red light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in a very dark environment with some red and blue light reflections.", + "sub_caption": "person: hands bathed in dim red and blue reflections, tightly holding an unidentifiable dark object. Scene role: passing a heavy piece of hardware to another worker", + "measured_bbox": [ + 0.6035, + 0.4687, + 0.7144, + 0.5946 + ], + "detection_confidence": "high", + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_passing_object.png", + "raw_ref_image": "references/raw_ref_hands_passing_object_attempt_02.png", + "reference_verify": "references/reference_verify_hands_passing_object.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_passing_object_attempt_02.png", + "output": "references/ref_hands_passing_object.png", + "mask": "references/sam_mask_hands_passing_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 338.0, + 17.0, + 690.0, + 1014.0 + ], + "mask_score": 3.418609, + "mask_area_ratio": 0.166451, + "elapsed_seconds": 11.3527 + } + }, + { + "name": "blue_light_module", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_25200.npy:object:0", + "source_name": "blue light", + "source_description": "A small, bright blue rectangular light glowing faintly in the lower right area of the dark scene. Source dataset: Ego4D. Scene context: The image is almost completely dark, showing only a small, faint blue rectangular light source near the bottom right.", + "sub_caption": "blue light: a small, intensely bright blue rectangular light glowing through the shadows. Scene role: sitting on the edge of the workbench, casting a blue beam across the tools", + "measured_bbox": [ + 0.2793, + 0.8032, + 0.3903, + 0.9054 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light_module.png", + "raw_ref_image": "references/raw_ref_blue_light_module_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light_module.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_blue_light_module_attempt_01.png", + "output": "references/ref_blue_light_module.png", + "mask": "references/sam_mask_blue_light_module.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 4.0, + 250.0, + 978.0, + 796.0 + ], + "mask_score": 3.46793, + "mask_area_ratio": 0.374003, + "elapsed_seconds": 10.496 + } + }, + { + "name": "red_device", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "source_description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device.", + "sub_caption": "electronic device: a faintly glowing red rectangular electronic device with a textured surface. Scene role: lying flat on the workbench next to the busy hands, functioning as a diagnostic remote", + "measured_bbox": [ + 0.7228, + 0.7939, + 0.902, + 0.8912 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_device.png", + "raw_ref_image": "references/raw_ref_red_device_attempt_01.png", + "reference_verify": "references/reference_verify_red_device.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_red_device_attempt_01.png", + "output": "references/ref_red_device.png", + "mask": "references/sam_mask_red_device.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 76.0, + 282.0, + 989.0, + 762.0 + ], + "mask_score": 3.430848, + "mask_area_ratio": 0.198863, + "elapsed_seconds": 11.2516 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000009.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000009.json new file mode 100644 index 0000000000000000000000000000000000000000..0012353ba55c36174ae02fe659bcb4fb2d00c8ae --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000009.json @@ -0,0 +1,256 @@ +{ + "sample_id": "sample_000009", + "target_total": 5, + "target_people": 1, + "target_objects": 4, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 5, + "n_detected": 5, + "n_subjects": 5, + "subjects": [ + { + "name": "person_washing_sink", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_120803.npy:person:0", + "source_name": "person", + "source_description": "A person holding an object, only their hands and parts of their dark clothing are visible. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in low light conditions.", + "sub_caption": "person: A person wearing dark, long-sleeved clothing, with only their hands and forearms visible as they reach into the frame.. Scene role: Actively gripping the blue cleaner bottle over the kitchen sink.", + "measured_bbox": [ + 0.0921, + 0.3527, + 1.0, + 1.0 + ], + "detection_confidence": 950, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_washing_sink.png", + "raw_ref_image": "references/raw_ref_person_washing_sink_attempt_01.png", + "reference_verify": "references/reference_verify_person_washing_sink.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_person_washing_sink_attempt_01.png", + "output": "references/ref_person_washing_sink.png", + "mask": "references/sam_mask_person_washing_sink.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 350.0, + 14.0, + 674.0, + 1023.0 + ], + "mask_score": 3.431432, + "mask_area_ratio": 0.156425, + "elapsed_seconds": 10.3393 + } + }, + { + "name": "blue_cleaner_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P30_102:object:6", + "source_name": "cleaner bottle", + "source_description": "Blue plastic bottle with a label, possibly a cleaning product, located behind the sink. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink, dish rack, toaster, cutting board, and various utensils and containers.", + "sub_caption": "cleaner bottle: A bright blue, unlabelled plastic spray bottle with a nozzle top, typical of liquid cleaning solutions.. Scene role: Held in the person's hands, positioned just above the sink basin.", + "measured_bbox": [ + 0.4107, + 0.2797, + 0.632, + 0.8174 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_cleaner_bottle.png", + "raw_ref_image": "references/raw_ref_blue_cleaner_bottle_attempt_01.png", + "reference_verify": "references/reference_verify_blue_cleaner_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_blue_cleaner_bottle_attempt_01.png", + "output": "references/ref_blue_cleaner_bottle.png", + "mask": "references/sam_mask_blue_cleaner_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 327.0, + 55.0, + 696.0, + 968.0 + ], + "mask_score": 3.452606, + "mask_area_ratio": 0.185988, + "elapsed_seconds": 10.6007 + } + }, + { + "name": "red_sink_mat", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P27_104:object:6", + "source_name": "red mat", + "source_description": "A textured red mat lining the bottom of the right sink basin. Source dataset: EPIC-Kitchens. Scene context: A kitchen sink area with dirty dishes in both basins, a hand holding a smartphone recording the scene, and various items on the counter.", + "sub_caption": "red mat: A vibrant red, textured rubber mat featuring a grid or perforated pattern.. Scene role: Placed flat against the bottom of the stainless steel sink basin, visible beneath the hands.", + "measured_bbox": [ + 0.3049, + 0.5104, + 0.8328, + 0.9302 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_sink_mat.png", + "raw_ref_image": "references/raw_ref_red_sink_mat_attempt_01.png", + "reference_verify": "references/reference_verify_red_sink_mat.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_red_sink_mat_attempt_01.png", + "output": "references/ref_red_sink_mat.png", + "mask": "references/sam_mask_red_sink_mat.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 21.0, + 148.0, + 1001.0, + 881.0 + ], + "mask_score": 3.479561, + "mask_area_ratio": 0.582802, + "elapsed_seconds": 10.3866 + } + }, + { + "name": "silver_faucet", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P37_103:object:3", + "source_name": "faucet", + "source_description": "Silver metal kitchen faucet attached to the sink. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter and sink with bowls, raw chicken, and cleaning supplies.", + "sub_caption": "faucet: A polished, curved silver metal kitchen faucet with a standard spout.. Scene role: Rising from the back of the sink counter, partially occluded by the person's hands and bottle.", + "measured_bbox": [ + 0.5344, + 0.0136, + 0.718, + 0.3772 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_silver_faucet.png", + "raw_ref_image": "references/raw_ref_silver_faucet_attempt_01.png", + "reference_verify": "references/reference_verify_silver_faucet.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_silver_faucet_attempt_01.png", + "output": "references/ref_silver_faucet.png", + "mask": "references/sam_mask_silver_faucet.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 115.0, + 62.0, + 895.0, + 985.0 + ], + "mask_score": 3.425959, + "mask_area_ratio": 0.163316, + "elapsed_seconds": 10.2359 + } + }, + { + "name": "yellow_dish_gloves", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P02_128:object:7", + "source_name": "yellow dish gloves", + "source_description": "A pair of yellow rubber gloves lying flat on the countertop near the sink. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen space featuring a washing machine, sink area, and various cleaning and kitchen supplies on countertops and the floor.", + "sub_caption": "yellow dish gloves: A pair of thick, bright yellow rubber dishwashing gloves, slightly crumpled and glossy.. Scene role: Laying flat on the countertop directly next to the sink rim, ready to be worn.", + "measured_bbox": [ + 0.0, + 0.3471, + 0.2191, + 0.8264 + ], + "detection_confidence": 1.0, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_yellow_dish_gloves.png", + "raw_ref_image": "references/raw_ref_yellow_dish_gloves_attempt_01.png", + "reference_verify": "references/reference_verify_yellow_dish_gloves.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_yellow_dish_gloves_attempt_01.png", + "output": "references/ref_yellow_dish_gloves.png", + "mask": "references/sam_mask_yellow_dish_gloves.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 138.0, + 66.0, + 850.0, + 952.0 + ], + "mask_score": 3.462321, + "mask_area_ratio": 0.269736, + "elapsed_seconds": 10.2407 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000010.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000010.json new file mode 100644 index 0000000000000000000000000000000000000000..48e4b687b9101f7903bf4844dd791cd1b0554089 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000010.json @@ -0,0 +1,440 @@ +{ + "sample_id": "sample_000010", + "target_total": 9, + "target_people": 8, + "target_objects": 1, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 9, + "n_detected": 9, + "n_subjects": 9, + "subjects": [ + { + "name": "person_reading_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_236885.npy:person:0", + "source_name": "person", + "source_description": "A person is reading a book, partially visible in the red light, with only part of their arm and hand shown holding the book. Source dataset: Ego4D. Scene context: A person is reading a book illuminated by a red light in an otherwise dark room.", + "sub_caption": "person: A person partially visible in the shadows, holding and reading a book that is strongly illuminated by a red light.. Scene role: Sitting on the left side of the room, focused intently on reading their book in the red glow.", + "measured_bbox": [ + 0.0127, + 0.1501, + 0.4525, + 0.6235 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_reading_red_light.png", + "raw_ref_image": "references/raw_ref_person_reading_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_reading_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_reading_red_light_attempt_01.png", + "output": "references/ref_person_reading_red_light.png", + "mask": "references/sam_mask_person_reading_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 339.0, + 16.0, + 680.0, + 1023.0 + ], + "mask_score": 3.324489, + "mask_area_ratio": 0.157365, + "elapsed_seconds": 7.1471 + } + }, + { + "name": "person_lying_reading_blue_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_97416.npy:person:0", + "source_name": "person", + "source_description": "A person lying on a bed. Source dataset: Ego4D. Scene context: An indoor scene featuring a person lying on a bed reading a book with a small blue light.", + "sub_caption": "person: A person lying down comfortably, visible in the dark while reading a book illuminated by a small, crisp blue light.. Scene role: Lying on a bed in the back right corner, quietly reading separate from the main group.", + "measured_bbox": [ + 0.6944, + 0.2802, + 0.9406, + 0.4237 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_lying_reading_blue_light.png", + "raw_ref_image": "references/raw_ref_person_lying_reading_blue_light_attempt_02.png", + "reference_verify": "references/reference_verify_person_lying_reading_blue_light.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_lying_reading_blue_light_attempt_02.png", + "output": "references/ref_person_lying_reading_blue_light.png", + "mask": "references/sam_mask_person_lying_reading_blue_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 360.0, + 75.0, + 665.0, + 980.0 + ], + "mask_score": 3.501601, + "mask_area_ratio": 0.137436, + "elapsed_seconds": 7.1743 + } + }, + { + "name": "person_holding_blue_flashlight", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_42340.npy:person:0", + "source_name": "person", + "source_description": "A person is visible in the lower right, holding a blue flashlight. Only their hand and part of their arm, covered by a dark sleeve, are visible. Source dataset: Ego4D. Scene context: A person is holding a lit blue flashlight in a dark environment.", + "sub_caption": "person: An arm covered by a dark sleeve, with the hand firmly holding a bright blue flashlight that cuts through the darkness.. Scene role: Positioned in the lower foreground, pointing the blue flashlight into the room to provide illumination.", + "measured_bbox": [ + 0.222, + 0.574, + 0.493, + 0.981 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_holding_blue_flashlight.png", + "raw_ref_image": "references/raw_ref_person_holding_blue_flashlight_attempt_05.png", + "reference_verify": "references/reference_verify_person_holding_blue_flashlight.json", + "reference_verify_passed": true, + "reference_attempts": 5, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_holding_blue_flashlight_attempt_05.png", + "output": "references/ref_person_holding_blue_flashlight.png", + "mask": "references/sam_mask_person_holding_blue_flashlight.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 331.0, + 17.0, + 704.0, + 1017.0 + ], + "mask_score": 3.354952, + "mask_area_ratio": 0.150028, + "elapsed_seconds": 7.2854 + } + }, + { + "name": "person_standing_background", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "source_description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall.", + "sub_caption": "person: A person seen from the torso down, standing in the dim room and holding a dark round object near a dimly lit blue wall.. Scene role: Standing quietly in the background, observing the room while holding a round object.", + "measured_bbox": [ + 0.4462, + 0.0267, + 0.5613, + 0.5921 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_standing_background.png", + "raw_ref_image": "references/raw_ref_person_standing_background_attempt_01.png", + "reference_verify": "references/reference_verify_person_standing_background.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_standing_background_attempt_01.png", + "output": "references/ref_person_standing_background.png", + "mask": "references/sam_mask_person_standing_background.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 337.0, + 25.0, + 678.0, + 1014.0 + ], + "mask_score": 3.31662, + "mask_area_ratio": 0.134835, + "elapsed_seconds": 7.1397 + } + }, + { + "name": "person_face_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76449.npy:person:0", + "source_name": "person", + "source_description": "Partially visible due to very low lighting; red light illuminates skin on what looks like hands or arms, and possibly part of the face or shoulder. Source dataset: Ego4D. Scene context: A very dark scene with sparse red lighting illuminating what appears to be a person's hands and part of their face or shoulder in the center, and a glowing rectangular object in the upper right corner.", + "sub_caption": "person: A person emerging from the darkness, with deep red light catching the skin on their hands, arms, and part of their face.. Scene role: Sitting near the center of the gathering, looking toward the others while bathed in red ambient light.", + "measured_bbox": [ + 0.5873, + 0.345, + 0.6506, + 0.4705 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_face_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_face_in_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_face_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_face_in_red_light_attempt_01.png", + "output": "references/ref_person_face_in_red_light.png", + "mask": "references/sam_mask_person_face_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 357.0, + 50.0, + 662.0, + 997.0 + ], + "mask_score": 3.468133, + "mask_area_ratio": 0.138924, + "elapsed_seconds": 7.09 + } + }, + { + "name": "hands_holding_small_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_259526.npy:person:0", + "source_name": "hands", + "source_description": "Visible hands, illuminated in reddish light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of hands holding something, with abstract colored shapes or fabrics in the dark background.", + "sub_caption": "hands: A pair of hands illuminated in a reddish light, holding a small object against the dark environment.. Scene role: Sitting within the group, holding a small item in the pool of red light.", + "measured_bbox": [ + 0.5539, + 0.5673, + 0.6564, + 0.6516 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_holding_small_object.png", + "raw_ref_image": "references/raw_ref_hands_holding_small_object_attempt_01.png", + "reference_verify": "references/reference_verify_hands_holding_small_object.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_small_object_attempt_01.png", + "output": "references/ref_hands_holding_small_object.png", + "mask": "references/sam_mask_hands_holding_small_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 347.0, + 66.0, + 678.0, + 1002.0 + ], + "mask_score": 3.446404, + "mask_area_ratio": 0.148472, + "elapsed_seconds": 7.2113 + } + }, + { + "name": "hands_holding_triangular_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76363.npy:person:0", + "source_name": "person", + "source_description": "Only hands are visible, illuminated by a red light source, positioned towards the top right. Source dataset: Ego4D. Scene context: A very dark image showing hands illuminated by red light and a small triangular object with blue and white patterns.", + "sub_caption": "person: Hands clearly visible under a red light source, carefully holding a small triangular object with patterns.. Scene role: Showing the patterned triangular object to the group nearby.", + "measured_bbox": [ + 0.6869, + 0.6745, + 0.898, + 0.9021 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_holding_triangular_object.png", + "raw_ref_image": "references/raw_ref_hands_holding_triangular_object_attempt_05.png", + "reference_verify": "references/reference_verify_hands_holding_triangular_object.json", + "reference_verify_passed": true, + "reference_attempts": 5, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_triangular_object_attempt_05.png", + "output": "references/ref_hands_holding_triangular_object.png", + "mask": "references/sam_mask_hands_holding_triangular_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 371.0, + 176.0, + 647.0, + 904.0 + ], + "mask_score": 3.472167, + "mask_area_ratio": 0.075788, + "elapsed_seconds": 7.6612 + } + }, + { + "name": "hands_gesturing", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282746.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, lit by red light, holding something or gesturing. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark room illuminated by stage lights.", + "sub_caption": "person: A person's hands catching the red stage-like lighting, caught mid-gesture.. Scene role: Actively gesturing and conversing with the central group.", + "measured_bbox": [ + 0.233, + 0.5315, + 0.3583, + 0.6782 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_gesturing.png", + "raw_ref_image": "references/raw_ref_hands_gesturing_attempt_01.png", + "reference_verify": "references/reference_verify_hands_gesturing.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_gesturing_attempt_01.png", + "output": "references/ref_hands_gesturing.png", + "mask": "references/sam_mask_hands_gesturing.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 350.0, + 64.0, + 677.0, + 979.0 + ], + "mask_score": 3.460945, + "mask_area_ratio": 0.132824, + "elapsed_seconds": 7.2309 + } + }, + { + "name": "plastic_water_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_124:object:6", + "source_name": "water bottle", + "source_description": "A clear plastic bottle, likely containing water, with a blue and white label, standing on the counter to the right. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a stovetop with a pan cooking food, accompanied by various kitchen items on the adjacent counter spaces.", + "sub_caption": "water bottle: A clear plastic bottle containing water, catching the colorful reflections of the scattered red and blue lights.. Scene role: Resting upright on the floor in the center of the group, reflecting the dramatic lighting.", + "measured_bbox": [ + 0.5155, + 0.7075, + 0.5653, + 0.9622 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_plastic_water_bottle.png", + "raw_ref_image": "references/raw_ref_plastic_water_bottle_attempt_03.png", + "reference_verify": "references/reference_verify_plastic_water_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 3, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_plastic_water_bottle_attempt_03.png", + "output": "references/ref_plastic_water_bottle.png", + "mask": "references/sam_mask_plastic_water_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 356.0, + 72.0, + 666.0, + 982.0 + ], + "mask_score": 3.471713, + "mask_area_ratio": 0.17085, + "elapsed_seconds": 9.3022 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000011.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000011.json new file mode 100644 index 0000000000000000000000000000000000000000..80791d9550c21a5b75d9cff8607c4b17e8eccd78 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/rows/sample_000011.json @@ -0,0 +1,486 @@ +{ + "sample_id": "sample_000011", + "target_total": 10, + "target_people": 1, + "target_objects": 9, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 10, + "n_detected": 10, + "n_subjects": 10, + "subjects": [ + { + "name": "person_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76422.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, illuminated by a red light, and they appear to be holding a small object. Source dataset: Ego4D. Scene context: A dark scene with a person's hands holding what appears to be a small device or tool illuminated by red light.", + "sub_caption": "person: The person's hands are visible, illuminated by a strong red light, holding a small object or tool.. Scene role: Working intently, holding the metal tool over the granite countertop.", + "measured_bbox": [ + 0.1787, + 0.2332, + 0.4617, + 0.5792 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_hands.png", + "raw_ref_image": "references/raw_ref_person_hands_attempt_01.png", + "reference_verify": "references/reference_verify_person_hands.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_person_hands_attempt_01.png", + "output": "references/ref_person_hands.png", + "mask": "references/sam_mask_person_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 338.0, + 36.0, + 704.0, + 1013.0 + ], + "mask_score": 3.415329, + "mask_area_ratio": 0.148867, + "elapsed_seconds": 7.2976 + } + }, + { + "name": "textured_mat", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39154.npy:object:0", + "source_name": "textured surface", + "source_description": "A dark surface covered with a repeating pattern of small, raised bumps or dots, illuminated by a blue light. Source dataset: Ego4D. Scene context: A close-up view of a textured surface illuminated with blue light in a dark environment.", + "sub_caption": "textured surface: A dark surface covered with a repeating pattern of small, raised bumps or dots, catching faint blue light reflections.. Scene role: Lying flat on the granite countertop under the person's hands.", + "measured_bbox": [ + 0.1138, + 0.5866, + 0.5947, + 0.7728 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_textured_mat.png", + "raw_ref_image": "references/raw_ref_textured_mat_attempt_01.png", + "reference_verify": "references/reference_verify_textured_mat.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_textured_mat_attempt_01.png", + "output": "references/ref_textured_mat.png", + "mask": "references/sam_mask_textured_mat.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 29.0, + 292.0, + 994.0, + 732.0 + ], + "mask_score": 3.388931, + "mask_area_ratio": 0.280238, + "elapsed_seconds": 7.3108 + } + }, + { + "name": "wall_shelf", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_309243.npy:object:1", + "source_name": "shelf", + "source_description": "A dark, multi-tiered shelf attached to the wall on the right side, holding various unidentifiable small items. Source dataset: Ego4D. Scene context: A dimly lit room illuminated entirely by strong red light, where a person is sitting and holding a child in their lap.", + "sub_caption": "shelf: A dark, multi-tiered shelf attached to the wall, holding various unidentifiable small items.. Scene role: Attached to the wall in the shadowy background above the counter.", + "measured_bbox": [ + 0.5206, + 0.0681, + 0.6781, + 0.3789 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wall_shelf.png", + "raw_ref_image": "references/raw_ref_wall_shelf_attempt_01.png", + "reference_verify": "references/reference_verify_wall_shelf.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wall_shelf_attempt_01.png", + "output": "references/ref_wall_shelf.png", + "mask": "references/sam_mask_wall_shelf.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 176.0, + 42.0, + 861.0, + 940.0 + ], + "mask_score": 3.354082, + "mask_area_ratio": 0.233056, + "elapsed_seconds": 7.1907 + } + }, + { + "name": "striped_towel", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P03_118:object:22", + "source_name": "towel", + "source_description": "A black and white striped towel hanging from the oven door handle. Source dataset: EPIC-Kitchens. Scene context: A narrow, slightly messy kitchen with dark grey countertops, white cabinets, and wooden flooring.", + "sub_caption": "towel: A black and white striped towel.. Scene role: Hanging down from a lower cabinet handle just below the edge of the countertop.", + "measured_bbox": [ + 0.6082, + 0.7266, + 0.7242, + 0.9973 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_striped_towel.png", + "raw_ref_image": "references/raw_ref_striped_towel_attempt_01.png", + "reference_verify": "references/reference_verify_striped_towel.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_striped_towel_attempt_01.png", + "output": "references/ref_striped_towel.png", + "mask": "references/sam_mask_striped_towel.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 125.0, + 53.0, + 897.0, + 971.0 + ], + "mask_score": 3.50075, + "mask_area_ratio": 0.517391, + "elapsed_seconds": 7.4653 + } + }, + { + "name": "glass_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P30_113:object:5", + "source_name": "bottle", + "source_description": "A clear glass bottle with amber liquid standing on the counter near the cutting board. Source dataset: EPIC-Kitchens. Scene context: A person's hand is visible in the foreground of a kitchen with light wood cabinets, dark countertops, a white washing machine, and a white refrigerator.", + "sub_caption": "bottle: A clear glass bottle filled with amber liquid.. Scene role: Standing on the granite countertop to the side, catching dim reflections.", + "measured_bbox": [ + 0.2049, + 0.4778, + 0.2764, + 0.6209 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_glass_bottle.png", + "raw_ref_image": "references/raw_ref_glass_bottle_attempt_01.png", + "reference_verify": "references/reference_verify_glass_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_glass_bottle_attempt_01.png", + "output": "references/ref_glass_bottle.png", + "mask": "references/sam_mask_glass_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 198.0, + 0.0, + 820.0, + 1008.0 + ], + "mask_score": 3.251198, + "mask_area_ratio": 0.458004, + "elapsed_seconds": 7.2584 + } + }, + { + "name": "dirty_plate", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_102:object:5", + "source_name": "plate", + "source_description": "A partially visible plate or flat dish on the table, seemingly containing food or remnants. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a table covered in a blue and green plaid tablecloth, holding various items like bottles and cans, and a dark chair nearby.", + "sub_caption": "plate: A partially visible plate containing food remnants.. Scene role: Sitting on the counter near the bottle in the dim light.", + "measured_bbox": [ + 0.003, + 0.5981, + 0.2, + 0.6793 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_dirty_plate.png", + "raw_ref_image": "references/raw_ref_dirty_plate_attempt_01.png", + "reference_verify": "references/reference_verify_dirty_plate.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_dirty_plate_attempt_01.png", + "output": "references/ref_dirty_plate.png", + "mask": "references/sam_mask_dirty_plate.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 33.0, + 334.0, + 963.0, + 685.0 + ], + "mask_score": 3.317592, + "mask_area_ratio": 0.170906, + "elapsed_seconds": 7.0879 + } + }, + { + "name": "metal_tool", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_15996.npy:object:0", + "source_name": "dark object", + "source_description": "A dark, silhouetted object with a thin, elongated structure, appearing to be a tool like pliers or scissors, visible against a dimly lit reddish background. Source dataset: Ego4D. Scene context: A close-up view of what appears to be a tool or mechanical part in shadows, possibly a pair of pliers.", + "sub_caption": "dark object: A dark, silhouetted tool with a thin, elongated structure resembling pliers.. Scene role: Held firmly by the person's hands under the red light.", + "measured_bbox": [ + 0.203, + 0.348, + 0.476, + 0.468 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_metal_tool.png", + "raw_ref_image": "references/raw_ref_metal_tool_attempt_01.png", + "reference_verify": "references/reference_verify_metal_tool.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_metal_tool_attempt_01.png", + "output": "references/ref_metal_tool.png", + "mask": "references/sam_mask_metal_tool.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 51.0, + 66.0, + 993.0, + 937.0 + ], + "mask_score": 0.918062, + "mask_area_ratio": 0.928193, + "elapsed_seconds": 7.1989 + } + }, + { + "name": "box_grater", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P34_111:object:34", + "source_name": "grater", + "source_description": "A metal box grater partially visible on the far left counter. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen sink area with a window overlooking a patio, featuring various plants, cleaning supplies, and kitchen items.", + "sub_caption": "grater: A metal box grater.. Scene role: Resting on the counter towards the background left.", + "measured_bbox": [ + 0.4842, + 0.4749, + 0.5853, + 0.5496 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_box_grater.png", + "raw_ref_image": "references/raw_ref_box_grater_attempt_01.png", + "reference_verify": "references/reference_verify_box_grater.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_box_grater_attempt_01.png", + "output": "references/ref_box_grater.png", + "mask": "references/sam_mask_box_grater.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 236.0, + 16.0, + 787.0, + 1015.0 + ], + "mask_score": 3.453517, + "mask_area_ratio": 0.346949, + "elapsed_seconds": 7.1571 + } + }, + { + "name": "wooden_cabinets", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P28_106:object:8", + "source_name": "kitchen cabinets (right)", + "source_description": "A row of light brown wooden cabinets with metal handles along the right side. Source dataset: EPIC-Kitchens. Scene context: An overhead view of a kitchen floor with cabinets, a sink, and a dishwasher on the sides.", + "sub_caption": "kitchen cabinets (right): A row of light brown wooden cabinets with metal handles.. Scene role: Lining the right side of the kitchen in the background shadows.", + "measured_bbox": [ + 0.7319, + 0.5034, + 0.9945, + 0.9951 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wooden_cabinets.png", + "raw_ref_image": "references/raw_ref_wooden_cabinets_attempt_01.png", + "reference_verify": "references/reference_verify_wooden_cabinets.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wooden_cabinets_attempt_01.png", + "output": "references/ref_wooden_cabinets.png", + "mask": "references/sam_mask_wooden_cabinets.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 36.0, + 253.0, + 986.0, + 809.0 + ], + "mask_score": 3.454364, + "mask_area_ratio": 0.365833, + "elapsed_seconds": 7.226 + } + }, + { + "name": "granite_counter", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P04_108:object:19", + "source_name": "granite countertop", + "source_description": "A grey, white, and black speckled stone countertop surface visible in the foreground and near the sink/stove. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen scene with items scattered on the counter, floor, and a large blue delivery bag visible on the right.", + "sub_caption": "granite countertop: A grey, white, and black speckled stone countertop.. Scene role: The main horizontal surface spanning the lower half of the frame, holding the scattered items.", + "measured_bbox": [ + 0.003, + 0.5275, + 0.6738, + 0.883 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_granite_counter.png", + "raw_ref_image": "references/raw_ref_granite_counter_attempt_01.png", + "reference_verify": "references/reference_verify_granite_counter.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_granite_counter_attempt_01.png", + "output": "references/ref_granite_counter.png", + "mask": "references/sam_mask_granite_counter.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 71.0, + 1023.0, + 940.0 + ], + "mask_score": 3.480751, + "mask_area_ratio": 0.58655, + "elapsed_seconds": 7.1983 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/bbox_overlay.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/bbox_overlay.png new file mode 100644 index 0000000000000000000000000000000000000000..95e3df957795cc818e13ccef3459f11ba791b5d8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/bbox_overlay.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e031c3871a7015bfd37e74c113128c5b07d1af03276fa1a34043a30bb4f30f0 +size 1352721 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/compose_prompt.txt b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/compose_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..bed0096ac597cb30a18533d54b5449673163493b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/compose_prompt.txt @@ -0,0 +1,59 @@ +Render the following JSON scene specification as a photorealistic 1248x832 image using a true 3:2 canvas. Every listed person and object must appear visibly in the image. Keep normal proportions and the requested aspect ratio. The foreground must contain only subjects explicitly listed in the JSON scene specification. Do not add any unlisted foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects. Background context may include non-localizable scenery only when it does not introduce a distinct foreground subject. No text, no labels, no logos, no watermarks. +JSON scene specification: +{ + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1248, + 832 + ], + "aspect_ratio": "3:2", + "style": "photorealistic" + }, + "scene": { + "setting": "A dark, atmospheric industrial interior dominated by strong artificial red and green lights.", + "activity": "A person is carefully adjusting a small dark object near an electronic panel, while a nearby indicator light glows.", + "composition": "Medium close-up shot, shallow depth of field. The red grid structure forms the background on the right. The person is centrally framed, hands prominent in the foreground and strongly lit by the ambient red light. The dark device with the green LED is positioned on the left side of the frame.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:2 composition", + "final canvas size 1248x832", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_in_red_light", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_76251.npy:person:0", + "source_name": "person", + "description": "A person wearing a dark jacket, partially visible with hands bathed in intense bright red light, holding a small dark object.", + "role_in_scene": "Interacting with the small dark object in the center of the scene." + } + ], + "objects": [ + { + "name": "red_illuminated_structure", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_2029.npy:object:1", + "source_name": "red illuminated structure", + "description": "A wall or barricade strongly illuminated with intense red light, featuring a grid-like, industrial textured pattern.", + "role_in_scene": "Serving as the glowing backdrop and main light source on the right side of the scene." + }, + { + "name": "green_indicator_light", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_294886.npy:object:1", + "source_name": "green light", + "description": "A small, bright green glowing indicator LED located on a shadowed, dark structural panel.", + "role_in_scene": "Glowing prominently from a dark device on the left, providing a stark color contrast to the red lighting." + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/detect_refine_green_indicator_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/detect_refine_green_indicator_light.png new file mode 100644 index 0000000000000000000000000000000000000000..5390275adae36065540c4bb5b5f3f954623adb6f Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/detect_refine_green_indicator_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/detect_refine_person_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/detect_refine_person_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..bbf1c13eb0c8b378b610a7889ee870631efabb2d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/detect_refine_person_in_red_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d21b464e354bcd5533f138e83308c8bba274d03e98869c3af51dccd9ca37249 +size 669915 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/detect_refine_red_illuminated_structure.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/detect_refine_red_illuminated_structure.png new file mode 100644 index 0000000000000000000000000000000000000000..20f2960383c54140affe3f03cf3533173d568db7 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/detect_refine_red_illuminated_structure.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15b6d126539cface1b085bad45315d8e01f8dd61b0ca59eb158090e787e15c9 +size 678819 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/diversify_input_green_indicator_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/diversify_input_green_indicator_light.png new file mode 100644 index 0000000000000000000000000000000000000000..114f65e023b354518f2667f9d61f7a42ee73c257 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/diversify_input_green_indicator_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/diversify_input_person_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/diversify_input_person_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..51a4e899b579d4fc0db4abee1a501a5fa9813e6b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/diversify_input_person_in_red_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da5357848555dac489c791c7b4bc71baf049323ff7a693c72ee423615d64be01 +size 993993 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/diversify_input_red_illuminated_structure.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/diversify_input_red_illuminated_structure.png new file mode 100644 index 0000000000000000000000000000000000000000..41894e0c1f9e2f591c80adf3b000993c73b5d87c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/crops/diversify_input_red_illuminated_structure.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f83cb488c5df95e5c0093f7b8629e891ae587003d4a09ffee9558c19be39782 +size 720393 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/detections.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/detections.json new file mode 100644 index 0000000000000000000000000000000000000000..63b825640c261773bb92db64c47a57951b0675d6 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/detections.json @@ -0,0 +1,59 @@ +[ + { + "name": "person_in_red_light", + "present": true, + "bbox": [ + 0.301, + 0.039, + 0.714, + 0.98 + ], + "confidence": 0.95, + "notes": "refine failed; using coarse bbox", + "coarse_bbox": [ + 0.301, + 0.039, + 0.714, + 0.98 + ], + "refine_crop": "crops/detect_refine_person_in_red_light.png" + }, + { + "name": "red_illuminated_structure", + "present": true, + "bbox": [ + 0.6096, + 0.0, + 0.9961, + 0.991 + ], + "confidence": "high", + "notes": "Tight bounding box capturing the red illuminated grid structure that forms the background of the scene.", + "coarse_bbox": [ + 0.609, + 0.0, + 0.996, + 1.0 + ], + "refine_crop": "crops/detect_refine_red_illuminated_structure.png" + }, + { + "name": "green_indicator_light", + "present": true, + "bbox": [ + 0.1227, + 0.438, + 0.1605, + 0.4975 + ], + "confidence": 0.95, + "notes": "The primary focus is the prominent green glowing indicator light.", + "coarse_bbox": [ + 0.119, + 0.438, + 0.166, + 0.493 + ], + "refine_crop": "crops/detect_refine_green_indicator_light.png" + } +] diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/main_image.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/main_image.png new file mode 100644 index 0000000000000000000000000000000000000000..a4f64408d496304179a549e2810e99606082b026 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/main_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d4893d7fc65f46508f11f7472746aee45cdb0a1a458624fb6b6950b4552a94b +size 1401637 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/plan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/plan.json new file mode 100644 index 0000000000000000000000000000000000000000..9bfd27b523d746b05b5adc69334a365e8009e8f6 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/plan.json @@ -0,0 +1,103 @@ +{ + "sample_id": "sample_000001", + "target_total": 3, + "target_people": 1, + "target_objects": 2, + "compose_prompt": { + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1248, + 832 + ], + "aspect_ratio": "3:2", + "style": "photorealistic" + }, + "scene": { + "setting": "A dark, moody futuristic room or industrial corridor illuminated primarily by intense red artificial light", + "activity": "A person is interacting with a small dark device near a grid-like wall structure, while a contrasting green indicator light glows in the darkness", + "composition": "Cinematic mid-shot with a shallow depth of field. The person's hands holding the device are in the foreground, deeply bathed in red light. The red illuminated grid structure forms the atmospheric background. The small green indicator light is placed on a dark console to the side, providing a sharp color contrast.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:2 composition", + "final canvas size 1248x832", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_in_red_light", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_76251.npy:person:0", + "source_name": "person", + "description": "A person, prominently visible by their hands and forearms which are bathed in bright red light, holding and interacting with a small dark electronic object.", + "role_in_scene": "Operating the small device in the foreground" + } + ], + "objects": [ + { + "name": "red_illuminated_structure", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_2029.npy:object:1", + "source_name": "red illuminated structure", + "description": "A sturdy wall or barricade-like structure featuring a textured, grid-like pattern, strongly illuminated by deep red ambient light.", + "role_in_scene": "Forms the textured, atmospheric background behind the person" + }, + { + "name": "green_indicator_light", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_294886.npy:object:1", + "source_name": "green light", + "description": "A small, bright green glowing LED indicator light piercing through the darkness.", + "role_in_scene": "Glowing on a dark piece of equipment beside the person, contrasting sharply with the dominant red lighting" + } + ] + }, + "expected_subjects": [ + { + "name": "person_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76251.npy:person:0", + "source_name": "person", + "source_description": "Visible by their hands, which are illuminated in bright red light, holding a small object. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark setting, illuminated by a red light, holding and interacting with a small dark object.", + "sub_caption": "person: A person, prominently visible by their hands and forearms which are bathed in bright red light, holding and interacting with a small dark electronic object.. Scene role: Operating the small device in the foreground", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "red_illuminated_structure", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_2029.npy:object:1", + "source_name": "red illuminated structure", + "source_description": "A structure on the right side of the image, possibly a wall or barricade, strongly illuminated with red light, featuring a grid-like or textured pattern. Source dataset: Ego4D. Scene context: A dark, possibly outdoor or poorly lit indoor area illuminated by strong blue and red artificial lights.", + "sub_caption": "red illuminated structure: A sturdy wall or barricade-like structure featuring a textured, grid-like pattern, strongly illuminated by deep red ambient light.. Scene role: Forms the textured, atmospheric background behind the person", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "green_indicator_light", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_294886.npy:object:1", + "source_name": "green light", + "source_description": "Small, bright green glowing indicator light, possibly an LED, located on the dark structure. Source dataset: Ego4D. Scene context: An extremely dark scene with only a faint silhouette of an object and a small green indicator light visible.", + "sub_caption": "green light: A small, bright green glowing LED indicator light piercing through the darkness.. Scene role: Glowing on a dark piece of equipment beside the person, contrasting sharply with the dominant red lighting", + "ref_style": "white_bg_encyclopedia_photo" + } + ], + "vocab_task_path": "sample_000001/vocab_task.json", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references.json new file mode 100644 index 0000000000000000000000000000000000000000..fbf9ac6a7198d3420ce82a6247e0155cca8161a0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references.json @@ -0,0 +1,101 @@ +{ + "references": [ + { + "name": "person_in_red_light", + "ref_image": "references/ref_person_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", + "diversify_input": "crops/diversify_input_person_in_red_light.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_person_in_red_light_attempt_01.png", + "output": "references/ref_person_in_red_light.png", + "mask": "references/sam_mask_person_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 324.0, + 26.0, + 699.0, + 1013.0 + ], + "mask_score": 3.392402, + "mask_area_ratio": 0.165197, + "elapsed_seconds": 53.7174 + }, + "reference_verify": "references/reference_verify_person_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "red_illuminated_structure", + "ref_image": "references/ref_red_illuminated_structure.png", + "raw_ref_image": "references/raw_ref_red_illuminated_structure_attempt_01.png", + "diversify_input": "crops/diversify_input_red_illuminated_structure.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_red_illuminated_structure_attempt_01.png", + "output": "references/ref_red_illuminated_structure.png", + "mask": "references/sam_mask_red_illuminated_structure.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 0.0, + 1023.0, + 1023.0 + ], + "mask_score": 1.966617, + "mask_area_ratio": 0.597257, + "elapsed_seconds": 10.2671 + }, + "reference_verify": "references/reference_verify_red_illuminated_structure.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "green_indicator_light", + "ref_image": "references/ref_green_indicator_light.png", + "raw_ref_image": "references/raw_ref_green_indicator_light_attempt_01.png", + "diversify_input": "crops/diversify_input_green_indicator_light.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_green_indicator_light_attempt_01.png", + "output": "references/ref_green_indicator_light.png", + "mask": "references/sam_mask_green_indicator_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 347.0, + 335.0, + 676.0, + 688.0 + ], + "mask_score": 3.439631, + "mask_area_ratio": 0.059673, + "elapsed_seconds": 10.1365 + }, + "reference_verify": "references/reference_verify_green_indicator_light.json", + "reference_verify_passed": true, + "reference_attempts": 1 + } + ], + "reference_errors": {} +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/ref_green_indicator_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/ref_green_indicator_light.png new file mode 100644 index 0000000000000000000000000000000000000000..d5cbe6dc168e3b9851ac5fa0f3da9cd6bf8eaf9d Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/ref_green_indicator_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/ref_person_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/ref_person_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..c0b9b95a2f1f24a6ee9ade29095702edc276b44a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/ref_person_in_red_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9104a0d402752b86828137bd482a3285758d48c93ef580f25400c54bd5adb8e5 +size 300840 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/ref_red_illuminated_structure.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/ref_red_illuminated_structure.png new file mode 100644 index 0000000000000000000000000000000000000000..94dbc6d9f5b1b5fb6c343f2dd0e87481e900345e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/ref_red_illuminated_structure.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:595c3867e0df5721aec2fd74b772fa94d92f2d925396ceb2a659aa61f487cff3 +size 1142992 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/reference_verify_green_indicator_light.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/reference_verify_green_indicator_light.json new file mode 100644 index 0000000000000000000000000000000000000000..9a04e4a05b22d9abfb38a46f51af1eb0f015371d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/reference_verify_green_indicator_light.json @@ -0,0 +1,46 @@ +{ + "name": "green_indicator_light", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_green_indicator_light_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_green_indicator_light_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_green_indicator_light_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_green_indicator_light_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/candidate_ref_green_indicator_light_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/candidate_sam_mask_green_indicator_light_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 347.0, + 335.0, + 676.0, + 688.0 + ], + "mask_score": 3.439631, + "mask_area_ratio": 0.059673, + "elapsed_seconds": 10.1365 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image perfectly shows an isolated green indicator light against a white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/reference_verify_person_in_red_light.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/reference_verify_person_in_red_light.json new file mode 100644 index 0000000000000000000000000000000000000000..4f0c88058ba2b68ce9c57af29ee042f020609830 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/reference_verify_person_in_red_light.json @@ -0,0 +1,46 @@ +{ + "name": "person_in_red_light", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_in_red_light_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_in_red_light_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_person_in_red_light_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/candidate_ref_person_in_red_light_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/candidate_sam_mask_person_in_red_light_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 324.0, + 26.0, + 699.0, + 1013.0 + ], + "mask_score": 3.392402, + "mask_area_ratio": 0.165197, + "elapsed_seconds": 53.7174 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body person on white background, perfectly isolated and not cropped." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/reference_verify_red_illuminated_structure.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/reference_verify_red_illuminated_structure.json new file mode 100644 index 0000000000000000000000000000000000000000..c889d9283f2c75565ebdfa42aa4cabbbbc98f972 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/reference_verify_red_illuminated_structure.json @@ -0,0 +1,46 @@ +{ + "name": "red_illuminated_structure", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_red_illuminated_structure_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_red_illuminated_structure_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_red_illuminated_structure_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_red_illuminated_structure_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/candidate_ref_red_illuminated_structure_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/candidate_sam_mask_red_illuminated_structure_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 0.0, + 1023.0, + 1023.0 + ], + "mask_score": 1.966617, + "mask_area_ratio": 0.597257, + "elapsed_seconds": 10.2671 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a representative crop of the red grid-like pattern. Since the subject is a pattern/surface structure, a crop is acceptable." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/sam_mask_green_indicator_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/sam_mask_green_indicator_light.png new file mode 100644 index 0000000000000000000000000000000000000000..e7909c7196e19ad3f3c2c2af71f39ed6eb7c2002 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/sam_mask_green_indicator_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/sam_mask_person_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/sam_mask_person_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..dc2d4c9d8ec5e26fe586ad0ff65898ed32422e42 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/sam_mask_person_in_red_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/sam_mask_red_illuminated_structure.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/sam_mask_red_illuminated_structure.png new file mode 100644 index 0000000000000000000000000000000000000000..18fc41e8e3028d1ee4189e5740ea53104f10a419 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/references/sam_mask_red_illuminated_structure.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/row.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/row.json new file mode 100644 index 0000000000000000000000000000000000000000..507a9b8097bb191f30f2679ded0b1f1b836a2820 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/row.json @@ -0,0 +1,164 @@ +{ + "sample_id": "sample_000001", + "target_total": 3, + "target_people": 1, + "target_objects": 2, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 3, + "n_detected": 3, + "n_subjects": 3, + "subjects": [ + { + "name": "person_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76251.npy:person:0", + "source_name": "person", + "source_description": "Visible by their hands, which are illuminated in bright red light, holding a small object. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark setting, illuminated by a red light, holding and interacting with a small dark object.", + "sub_caption": "person: A person, prominently visible by their hands and forearms which are bathed in bright red light, holding and interacting with a small dark electronic object.. Scene role: Operating the small device in the foreground", + "measured_bbox": [ + 0.301, + 0.039, + 0.714, + 0.98 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_person_in_red_light_attempt_01.png", + "output": "references/ref_person_in_red_light.png", + "mask": "references/sam_mask_person_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 324.0, + 26.0, + 699.0, + 1013.0 + ], + "mask_score": 3.392402, + "mask_area_ratio": 0.165197, + "elapsed_seconds": 53.7174 + } + }, + { + "name": "red_illuminated_structure", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_2029.npy:object:1", + "source_name": "red illuminated structure", + "source_description": "A structure on the right side of the image, possibly a wall or barricade, strongly illuminated with red light, featuring a grid-like or textured pattern. Source dataset: Ego4D. Scene context: A dark, possibly outdoor or poorly lit indoor area illuminated by strong blue and red artificial lights.", + "sub_caption": "red illuminated structure: A sturdy wall or barricade-like structure featuring a textured, grid-like pattern, strongly illuminated by deep red ambient light.. Scene role: Forms the textured, atmospheric background behind the person", + "measured_bbox": [ + 0.6096, + 0.0, + 0.9961, + 0.991 + ], + "detection_confidence": "high", + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_illuminated_structure.png", + "raw_ref_image": "references/raw_ref_red_illuminated_structure_attempt_01.png", + "reference_verify": "references/reference_verify_red_illuminated_structure.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_red_illuminated_structure_attempt_01.png", + "output": "references/ref_red_illuminated_structure.png", + "mask": "references/sam_mask_red_illuminated_structure.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 0.0, + 1023.0, + 1023.0 + ], + "mask_score": 1.966617, + "mask_area_ratio": 0.597257, + "elapsed_seconds": 10.2671 + } + }, + { + "name": "green_indicator_light", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_294886.npy:object:1", + "source_name": "green light", + "source_description": "Small, bright green glowing indicator light, possibly an LED, located on the dark structure. Source dataset: Ego4D. Scene context: An extremely dark scene with only a faint silhouette of an object and a small green indicator light visible.", + "sub_caption": "green light: A small, bright green glowing LED indicator light piercing through the darkness.. Scene role: Glowing on a dark piece of equipment beside the person, contrasting sharply with the dominant red lighting", + "measured_bbox": [ + 0.1227, + 0.438, + 0.1605, + 0.4975 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_green_indicator_light.png", + "raw_ref_image": "references/raw_ref_green_indicator_light_attempt_01.png", + "reference_verify": "references/reference_verify_green_indicator_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000001/references/raw_ref_green_indicator_light_attempt_01.png", + "output": "references/ref_green_indicator_light.png", + "mask": "references/sam_mask_green_indicator_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 347.0, + 335.0, + 676.0, + 688.0 + ], + "mask_score": 3.439631, + "mask_area_ratio": 0.059673, + "elapsed_seconds": 10.1365 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/vocab_task.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/vocab_task.json new file mode 100644 index 0000000000000000000000000000000000000000..01fd749207df080356e3ed32523f0a19b046fc40 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000001/vocab_task.json @@ -0,0 +1,56 @@ +{ + "task_id": "sample_000001", + "sample_id": "sample_000001", + "sample_index": 1, + "target_total": 3, + "target_people": 1, + "target_objects": 2, + "people_candidates": [ + { + "candidate_index": 0, + "source_offset": 109, + "image_id": "Ego4D:ego4d_video/EGO_150078.npy:person:0", + "name": "person", + "description": "A person is partially visible, holding a long, dark object in front of them. Source dataset: Ego4D. Scene context: A close-up view of a person holding a long object, possibly a tool or weapon, against a bright, overexposed background." + }, + { + "candidate_index": 1, + "source_offset": 1009, + "image_id": "Ego4D:ego4d_video/EGO_76251.npy:person:0", + "name": "person", + "description": "Visible by their hands, which are illuminated in bright red light, holding a small object. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark setting, illuminated by a red light, holding and interacting with a small dark object." + } + ], + "object_candidates": [ + { + "candidate_index": 0, + "source_offset": 5811, + "image_id": "Ego4D:ego4d_video/EGO_2029.npy:object:1", + "name": "red illuminated structure", + "description": "A structure on the right side of the image, possibly a wall or barricade, strongly illuminated with red light, featuring a grid-like or textured pattern. Source dataset: Ego4D. Scene context: A dark, possibly outdoor or poorly lit indoor area illuminated by strong blue and red artificial lights." + }, + { + "candidate_index": 1, + "source_offset": 5237, + "image_id": "Ego4D:ego4d_video/EGO_172706.npy:object:2", + "name": "stove top", + "description": "A dark stove top surface beneath the pan. Source dataset: Ego4D. Scene context: A close-up view of food being cooked in a pan on a stove." + }, + { + "candidate_index": 2, + "source_offset": 5410, + "image_id": "Ego4D:ego4d_video/EGO_192247.npy:object:1", + "name": "scissors", + "description": "A pair of metallic scissors with black handles lying closed on the white surface. Source dataset: Ego4D. Scene context: A top-down view of hands working with fabric on a white surface, surrounded by scissors and other materials." + }, + { + "candidate_index": 3, + "source_offset": 7900, + "image_id": "Ego4D:ego4d_video/EGO_294886.npy:object:1", + "name": "green light", + "description": "Small, bright green glowing indicator light, possibly an LED, located on the dark structure. Source dataset: Ego4D. Scene context: An extremely dark scene with only a faint silhouette of an object and a small green indicator light visible." + } + ], + "rng_seed": 1782032722, + "created_at": 1782259667.5924346 +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/compose_prompt.txt b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/compose_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3aa7fa4c3b7a5e7d89ca42756cf4cfc33e6aeb2 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/compose_prompt.txt @@ -0,0 +1,155 @@ +Render the following JSON scene specification as a photorealistic 1248x832 image using a true 3:2 canvas. Every listed person and object must appear visibly in the image. Keep normal proportions and the requested aspect ratio. The foreground must contain only subjects explicitly listed in the JSON scene specification. Do not add any unlisted foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects. Background context may include non-localizable scenery only when it does not introduce a distinct foreground subject. No text, no labels, no logos, no watermarks. +JSON scene specification: +{ + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1248, + 832 + ], + "aspect_ratio": "3:2", + "style": "photorealistic" + }, + "scene": { + "setting": "A dimly lit, moody interior room, resembling a gaming setup or a neon-lit bedroom, strongly illuminated by contrasting blue and red light sources against dark shadows.", + "activity": "Two people are hanging out in the dark; one person is forming a heart shape with their hands under a warm red light, while the other is manipulating a small object nearby under a blue glow.", + "composition": "Mid-shot to close-up focused on the glowing hands and textured surfaces in the foreground. Shallow depth of field blurs the background door, wooden structures, and scattered LED indicators. 3:2 landscape framing to capture both sets of hands side by side while showing the ambient room lighting.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:2 composition", + "final canvas size 1248x832", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_forming_heart", + "source_index": 2, + "source_image_id": "Ego4D:ego4d_video/EGO_282814.npy:person:0", + "source_name": "person forming heart", + "description": "Two hands forming a heart shape, beautifully illuminated by a faint reddish light against the dark background.", + "role_in_scene": "In the center-left foreground, forming a heart shape with hands as a gesture of affection." + }, + { + "name": "person_manipulating_object", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_76307.npy:person:0", + "source_name": "person", + "description": "A pair of hands visible in dim lighting, carefully manipulating a small, unseen item.", + "role_in_scene": "In the center-right foreground, interacting with something small near the heart-forming hands." + } + ], + "objects": [ + { + "name": "small_blue_led", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_165181.npy:object:2", + "source_name": "small blue light", + "description": "A tiny, bright blue point of light, acting as an LED indicator.", + "role_in_scene": "Glowing softly on a piece of electronic equipment in the deep background." + }, + { + "name": "blurry_blue_light", + "source_index": 4, + "source_image_id": "Ego4D:ego4d_video/EGO_97360.npy:object:0", + "source_name": "blue light", + "description": "A faint, blurry blue light source glowing in the dark.", + "role_in_scene": "Providing ambient blue backlighting on the right side of the scene." + }, + { + "name": "ambient_blue_glow", + "source_index": 9, + "source_image_id": "Ego4D:ego4d_video/EGO_97324.npy:object:0", + "source_name": "blue light source", + "description": "A diffuse, faint blue glowing area.", + "role_in_scene": "Illuminating the shadows behind the manipulating hands." + }, + { + "name": "background_door", + "source_index": 10, + "source_image_id": "Ego4D:ego4d_video/EGO_94739.npy:object:3", + "source_name": "door", + "description": "A dark-colored door and door frame, barely visible in the low light.", + "role_in_scene": "Providing architectural depth in the blurred background on the left." + }, + { + "name": "blue_lit_fabric_backdrop", + "source_index": 11, + "source_image_id": "Ego4D:ego4d_video/EGO_93411.npy:object:2", + "source_name": "textured background", + "description": "A wrinkled surface illuminated with a blue hue, resembling crumpled fabric.", + "role_in_scene": "Draped in the background, catching the blue ambient light." + }, + { + "name": "dim_wooden_floor", + "source_index": 12, + "source_image_id": "EPIC-Kitchens:P28_114:object:3", + "source_name": "wooden floor", + "description": "A light oak-colored wood floor with rectangular planks.", + "role_in_scene": "Partially visible at the very bottom edge of the frame, fading into the dark shadows." + }, + { + "name": "small_green_led", + "source_index": 16, + "source_image_id": "Ego4D:ego4d_video/EGO_294896.npy:object:0", + "source_name": "green light", + "description": "A tiny, dim green LED indicator light.", + "role_in_scene": "Shining faintly from a device in the dark background on the right." + }, + { + "name": "ambient_red_patch", + "source_index": 18, + "source_image_id": "Ego4D:ego4d_video/EGO_232140.npy:object:0", + "source_name": "faint red patch", + "description": "A faintly illuminated irregular red area.", + "role_in_scene": "Casting a warm red glow onto the surrounding surfaces near the heart-forming hands." + }, + { + "name": "dark_wooden_structure", + "source_index": 19, + "source_image_id": "Ego4D:ego4d_video/EGO_256074.npy:object:1", + "source_name": "wooden structure", + "description": "A dimly lit wooden structure, resembling part of a chair or desk.", + "role_in_scene": "Located in the mid-ground, anchoring the scene's spatial layout." + }, + { + "name": "dark_blue_fabric_surface", + "source_index": 22, + "source_image_id": "Ego4D:ego4d_video/EGO_93412.npy:object:1", + "source_name": "textured surface", + "description": "Dark blue material with folds and wrinkles.", + "role_in_scene": "Covering the table or surface just below the interacting hands." + }, + { + "name": "grid_pattern_deskmat", + "source_index": 23, + "source_image_id": "Ego4D:ego4d_video/EGO_40027.npy:object:0", + "source_name": "patterned surface", + "description": "A textured surface with a repeating grid-like pattern, highly reflective of blue light.", + "role_in_scene": "Acting as a desk mat underneath the hands, catching the bright blue screen light." + }, + { + "name": "blue_monitor_screen", + "source_index": 24, + "source_image_id": "Ego4D:ego4d_video/EGO_27241.npy:object:0", + "source_name": "blue light source", + "description": "A bright blue rectangular light source, acting as a glowing screen.", + "role_in_scene": "Positioned just out of focus on the right, casting heavy blue light onto the scene." + }, + { + "name": "red_light_reflection_on_arm", + "source_index": 25, + "source_image_id": "Ego4D:ego4d_video/EGO_282711.npy:object:0", + "source_name": "red light reflection", + "description": "A thin, elongated red glow reflecting off a surface.", + "role_in_scene": "Reflecting sharply off the forearm of the person forming the heart." + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blue_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blue_light.png new file mode 100644 index 0000000000000000000000000000000000000000..9805b351a171f7edccbfdc362dee872266e763ab Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blue_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blue_light_source.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blue_light_source.png new file mode 100644 index 0000000000000000000000000000000000000000..801defac20a2ea5dad657b341b3406ed35192fe0 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blue_light_source.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blurred_shapes.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blurred_shapes.png new file mode 100644 index 0000000000000000000000000000000000000000..b9195c21d7dc01ab40896b05418eabf4126ca565 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blurred_shapes.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blurry_blue_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blurry_blue_light.png new file mode 100644 index 0000000000000000000000000000000000000000..9228fa706cc5629664e8ce06cfd31af840b485ea Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_blurry_blue_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_bright_blue_panel.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_bright_blue_panel.png new file mode 100644 index 0000000000000000000000000000000000000000..2834b4b13dcfc3e20c7607c7a4cb1c20b669a4c1 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_bright_blue_panel.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3aa06eb1021e79a4964d9cd73508a6c1dbf6ab5414f1258619ea21799b2b769 +size 139822 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_dark_door.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_dark_door.png new file mode 100644 index 0000000000000000000000000000000000000000..7a9f0ad862bd520d8badc394b92d6e5c885f3a7e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_dark_door.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61335fbd26e00b8922af84a4994147409fd08fd515362671602d28f66a73e042 +size 192338 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_door_frame.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_door_frame.png new file mode 100644 index 0000000000000000000000000000000000000000..f3aa3842817c4b011097bb74e561df574291f50b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_door_frame.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e28db9bcd7f1f6bb4e4221481aed1c072732e6e58393f757245ecb91bdf62274 +size 212672 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_faint_blue_glow.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_faint_blue_glow.png new file mode 100644 index 0000000000000000000000000000000000000000..f8013c6b15e7a24e8122510f5de4d18fb4df02ef Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_faint_blue_glow.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_faint_red_patch.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_faint_red_patch.png new file mode 100644 index 0000000000000000000000000000000000000000..afbb0e177241aa6c134e4b741be0557eba62eca0 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_faint_red_patch.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_patterned_floor.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_patterned_floor.png new file mode 100644 index 0000000000000000000000000000000000000000..2131d40e4ae466bd2375431aaa7481e2ab7af5c8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_patterned_floor.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1506031cbea86d92f43fd00372cbeaaf76a8c6de072537ebb04da57e1b7226 +size 222443 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_patterned_surface.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_patterned_surface.png new file mode 100644 index 0000000000000000000000000000000000000000..44cbf8c66dd20f74916ccb081568ba441b2d4825 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_patterned_surface.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:968d4cb0422953eae70017bd60ed636c4c5a4af256da00391fcee59f816f747c +size 222356 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_forming_heart.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_forming_heart.png new file mode 100644 index 0000000000000000000000000000000000000000..93c51fa0dc4a8db9234705d8aabdc8ec085d27d2 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_forming_heart.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d53af09acca9c700d26e233ced8f53f1c9c62a32e59482741a7f85d0532ee67b +size 610158 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_holding_red_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_holding_red_object.png new file mode 100644 index 0000000000000000000000000000000000000000..b0ab7b0040578e7fa3ffd6bcb556b57d79d6eb31 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_holding_red_object.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b241080adc89a3811100415289af849f3bb5b61c138e92d15c1805738222cf4b +size 610279 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_manipulating_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_manipulating_object.png new file mode 100644 index 0000000000000000000000000000000000000000..d3792ed03cb38e86cc3534026590076b215fe91a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_person_manipulating_object.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f3b23d6a426f763d5cd32b21986e14ef8ecbb5f5e4bcbac4d85a4d66b905e61 +size 318397 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_rectangular_blue_screen.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_rectangular_blue_screen.png new file mode 100644 index 0000000000000000000000000000000000000000..e375b90a2b425af2bfac2a8a23840d0eec8b01d9 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_rectangular_blue_screen.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3992e8ae53f691650f259fc6e418d32d842ca63804c1d9691a3b0a75d84eca +size 149324 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_red_light_reflection.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_red_light_reflection.png new file mode 100644 index 0000000000000000000000000000000000000000..77665866079b2df9281b384635952331d7432868 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_red_light_reflection.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_small_blue_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_small_blue_light.png new file mode 100644 index 0000000000000000000000000000000000000000..2395b0bba68ed9f860423223e25ec2ae79e13879 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_small_blue_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_small_crumpled_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_small_crumpled_object.png new file mode 100644 index 0000000000000000000000000000000000000000..172dba5ff5440ff63e77351a7eb0922867ec415c Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_small_crumpled_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_small_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_small_object.png new file mode 100644 index 0000000000000000000000000000000000000000..9d41e379eda27dcf2f224c6916103867b5eb7af4 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_small_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_background.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_background.png new file mode 100644 index 0000000000000000000000000000000000000000..306f277385aa88829918e48bfee813560a12d6a6 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_background.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3579ef8a318470835c60e24205ac553fac3f7b4fe6a831c46b1ebb79ccacaceb +size 118094 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_background_drapery.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_background_drapery.png new file mode 100644 index 0000000000000000000000000000000000000000..ab2ef0814069c430ac5a2577fbe0cb368b075a95 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_background_drapery.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a64065a04b1a89526874090ad8bafc63f151f295a8caa7df0273f5e8bf2b528 +size 113191 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_surface.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_surface.png new file mode 100644 index 0000000000000000000000000000000000000000..bfbabbcf7c071f373298f8619196bf66306a0276 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_surface.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8a5ddb2feda29ad184bec904f47aad70a6cdcab7b7c36b7e7cb0b78e33947a +size 586788 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_table_cloth.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_table_cloth.png new file mode 100644 index 0000000000000000000000000000000000000000..5ad22d896954e149a1df715e6e4f4926d78464da --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_textured_table_cloth.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0cb4142e0c7732477a56bfcedd5e583424a1a9d19a308ea0a3d9b2f61656821 +size 587313 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_wooden_chair.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_wooden_chair.png new file mode 100644 index 0000000000000000000000000000000000000000..e633d45446c508310b3154eb367f592b77c722f1 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_wooden_chair.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247bae50bb44ed25863fae007bbd832dbc19bba59535daaa0d31eff5ae7c4d6e +size 285386 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_wooden_structure.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_wooden_structure.png new file mode 100644 index 0000000000000000000000000000000000000000..c3117c547a437c49b6eb355caaf5fffa974f85ea --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/crops/detect_refine_wooden_structure.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2811ae83bf00c07f19ebc3b07f23ebbaa00fe0d4abbe653df4a2c707296b9993 +size 289736 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/main_image.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/main_image.png new file mode 100644 index 0000000000000000000000000000000000000000..a29189336996d20cc79c2db7ace9f337e925d573 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/main_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dfc653e1b80a7af507b4003085e936cd625e11f521e14a02d003177052485bb +size 1221188 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/plan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/plan.json new file mode 100644 index 0000000000000000000000000000000000000000..abdd7f305e5c6ff796f81db0c73efea38bf4ee2e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/plan.json @@ -0,0 +1,331 @@ +{ + "sample_id": "sample_000002", + "target_total": 15, + "target_people": 2, + "target_objects": 13, + "compose_prompt": { + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1248, + 832 + ], + "aspect_ratio": "3:2", + "style": "photorealistic" + }, + "scene": { + "setting": "An immersive, dimly-lit interactive light art installation room, where sharp darkness is interrupted by glowing LEDs, screens, and colorful illuminated textures.", + "activity": "Two people are playfully interacting with the vibrant light exhibits; one holds up a bright red glowing object, while the other forms a heart shape with their hands against a warm red backlight.", + "composition": "Mid-shot emphasizing the glowing light sources and the hands interacting with them. Deep depth of field captures the spatial arrangement of the dark room, with illuminated fabrics, a wooden structure, and a door fading into the shadows.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:2 composition", + "final canvas size 1248x832", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_holding_red_object", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_172550.npy:person:0", + "source_name": "person", + "description": "Only hands and arms are visible, illuminated softly as they hold up a distinct red object in the darkness.", + "role_in_scene": "Holding the red object toward the center-left to cast a soft red glow around." + }, + { + "name": "person_forming_heart", + "source_index": 2, + "source_image_id": "Ego4D:ego4d_video/EGO_282814.npy:person:0", + "source_name": "person forming heart", + "description": "Two hands connecting to form a heart shape, highlighted from behind by a faint reddish-orange light.", + "role_in_scene": "Positioned towards the center-right, making a heart gesture in front of a faint red light." + } + ], + "objects": [ + { + "name": "small_blue_light", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_165181.npy:object:2", + "source_name": "small blue light", + "description": "A tiny, bright blue LED point of light glowing distinctly in the shadow.", + "role_in_scene": "Attached to a wall panel on the right side of the room." + }, + { + "name": "small_crumpled_object", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_259555.npy:object:0", + "source_name": "small object", + "description": "A small, dark, crumpled item that catches faint bluish reflections on its surface.", + "role_in_scene": "Resting on a surface near the hands holding the red object." + }, + { + "name": "blurry_blue_light", + "source_index": 4, + "source_image_id": "Ego4D:ego4d_video/EGO_97360.npy:object:0", + "source_name": "blue light", + "description": "A faint, out-of-focus blue light source hovering in the deep background.", + "role_in_scene": "Adding ambient background depth to the right." + }, + { + "name": "blue_light_source", + "source_index": 9, + "source_image_id": "Ego4D:ego4d_video/EGO_97324.npy:object:0", + "source_name": "blue light source", + "description": "A blurry blue glowing patch fading into the surrounding black background.", + "role_in_scene": "Projected onto the floor or lower wall to the right." + }, + { + "name": "dark_door", + "source_index": 10, + "source_image_id": "Ego4D:ego4d_video/EGO_94739.npy:object:3", + "source_name": "door", + "description": "A solid dark-colored door and door frame barely distinguishable from the shadows.", + "role_in_scene": "Anchoring the background setting on the far left side." + }, + { + "name": "textured_background", + "source_index": 11, + "source_image_id": "Ego4D:ego4d_video/EGO_93411.npy:object:2", + "source_name": "textured background", + "description": "A backdrop of crumpled material covered in wrinkles, illuminated by a cool blue hue.", + "role_in_scene": "Draped along the wall on the left, catching the blue light." + }, + { + "name": "blurred_shapes", + "source_index": 14, + "source_image_id": "Ego4D:ego4d_video/EGO_76265.npy:object:1", + "source_name": "blurred shapes", + "description": "Indistinct, blurred luminous white and blue shapes floating against the dark.", + "role_in_scene": "Abstract light reflections seen in the upper right background." + }, + { + "name": "green_light", + "source_index": 16, + "source_image_id": "Ego4D:ego4d_video/EGO_294896.npy:object:0", + "source_name": "green light", + "description": "A very small, dim green LED indicator dot glowing steadily.", + "role_in_scene": "Located on the edge of the dark door frame." + }, + { + "name": "faint_red_patch", + "source_index": 18, + "source_image_id": "Ego4D:ego4d_video/EGO_232140.npy:object:0", + "source_name": "faint red patch", + "description": "A faintly illuminated irregular patch of red light.", + "role_in_scene": "Bouncing off a surface in the upper left corner of the room." + }, + { + "name": "wooden_structure", + "source_index": 19, + "source_image_id": "Ego4D:ego4d_video/EGO_256074.npy:object:1", + "source_name": "wooden structure", + "description": "A faint wooden structure, resembling a chair, slightly lit by ambient light.", + "role_in_scene": "Placed in the background behind the person forming the heart." + }, + { + "name": "textured_surface", + "source_index": 22, + "source_image_id": "Ego4D:ego4d_video/EGO_93412.npy:object:1", + "source_name": "textured surface", + "description": "A dark blue material with distinct, rich folds and wrinkles.", + "role_in_scene": "Covering a low table or display stand in the foreground." + }, + { + "name": "patterned_surface", + "source_index": 23, + "source_image_id": "Ego4D:ego4d_video/EGO_40027.npy:object:0", + "source_name": "patterned surface", + "description": "A geometric surface covered in a repeating chevron grid pattern, harshly illuminated by blue light.", + "role_in_scene": "Serving as a decorative wall installation behind the subjects." + }, + { + "name": "rectangular_blue_screen", + "source_index": 24, + "source_image_id": "Ego4D:ego4d_video/EGO_27241.npy:object:0", + "source_name": "blue light source", + "description": "A bright blue rectangular light panel casting distinct sharp shadows.", + "role_in_scene": "Positioned near the bottom right to light up the nearby textured fabric." + } + ] + }, + "expected_subjects": [ + { + "name": "person_holding_red_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_172550.npy:person:0", + "source_name": "person", + "source_description": "A person holding a red object. Only the hands and arms are visible. Source dataset: Ego4D. Scene context: A close-up view of a person holding a red object in a dimly lit environment, possibly outdoors or near a window.", + "sub_caption": "person: Only hands and arms are visible, illuminated softly as they hold up a distinct red object in the darkness.. Scene role: Holding the red object toward the center-left to cast a soft red glow around.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "person_forming_heart", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282814.npy:person:0", + "source_name": "person forming heart", + "source_description": "Two hands forming a heart shape, illuminated by a faint reddish light against a dark background. Source dataset: Ego4D. Scene context: Two hands forming a heart shape are visible in a very dark setting with faint orange and blue lights in the background.", + "sub_caption": "person forming heart: Two hands connecting to form a heart shape, highlighted from behind by a faint reddish-orange light.. Scene role: Positioned towards the center-right, making a heart gesture in front of a faint red light.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "small_blue_light", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_165181.npy:object:2", + "source_name": "small blue light", + "source_description": "A tiny, bright blue point of light, possibly an LED indicator or reflection, situated towards the center-right. Source dataset: Ego4D. Scene context: A very dark, low-visibility indoor scene with faint blue and greenish lighting, showing indistinct shapes.", + "sub_caption": "small blue light: A tiny, bright blue LED point of light glowing distinctly in the shadow.. Scene role: Attached to a wall panel on the right side of the room.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "small_crumpled_object", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_259555.npy:object:0", + "source_name": "small object", + "source_description": "A small, dark, crumpled item with faint bluish reflections held between the fingers of the person's hand. Source dataset: Ego4D. Scene context: In a very dimly lit environment, a person's hand holding a small object is visible against a backdrop of patterned textile.", + "sub_caption": "small object: A small, dark, crumpled item that catches faint bluish reflections on its surface.. Scene role: Resting on a surface near the hands holding the red object.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "blurry_blue_light", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_97360.npy:object:0", + "source_name": "blue light", + "source_description": "A faint, blurry blue light source or reflection in the darkness. Source dataset: Ego4D. Scene context: The image is almost completely dark, showing only a small, blurry blue light source or reflection on the right side.", + "sub_caption": "blue light: A faint, out-of-focus blue light source hovering in the deep background.. Scene role: Adding ambient background depth to the right.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "blue_light_source", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_97324.npy:object:0", + "source_name": "blue light source", + "source_description": "A faint, blurry blue light source visible against the black background on the right side of the image. Source dataset: Ego4D. Scene context: A predominantly dark scene with a small, indistinct blue glowing area visible on the right side.", + "sub_caption": "blue light source: A blurry blue glowing patch fading into the surrounding black background.. Scene role: Projected onto the floor or lower wall to the right.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "dark_door", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_94739.npy:object:3", + "source_name": "door", + "source_description": "A dark-colored door or door frame visible on the left side of the image. Source dataset: Ego4D. Scene context: A close-up view of a person's hand holding a remote control pointing towards a wall.", + "sub_caption": "door: A solid dark-colored door and door frame barely distinguishable from the shadows.. Scene role: Anchoring the background setting on the far left side.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "textured_background", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_93411.npy:object:2", + "source_name": "textured background", + "source_description": "A surface with wrinkles and folds, illuminated with a blue hue, resembling fabric or crumpled material. Source dataset: Ego4D. Scene context: The image shows a dark scene, possibly indoors or at night, with a prominent light reflection and a dark circular object against a textured, slightly illuminated background.", + "sub_caption": "textured background: A backdrop of crumpled material covered in wrinkles, illuminated by a cool blue hue.. Scene role: Draped along the wall on the left, catching the blue light.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "blurred_shapes", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76265.npy:object:1", + "source_name": "blurred shapes", + "source_description": "Indistinct, blurred shapes in the upper right corner, with some white and blue colors visible. Source dataset: Ego4D. Scene context: A dark scene with a red glowing object, possibly a light or reflection, and some blurred shapes in the background.", + "sub_caption": "blurred shapes: Indistinct, blurred luminous white and blue shapes floating against the dark.. Scene role: Abstract light reflections seen in the upper right background.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "green_light", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_294896.npy:object:0", + "source_name": "green light", + "source_description": "A small, dim green light, possibly an LED indicator, glowing in the darkness on the right side of the frame. Source dataset: Ego4D. Scene context: A nearly pitch-black scene with a single small green light visible on the right side.", + "sub_caption": "green light: A very small, dim green LED indicator dot glowing steadily.. Scene role: Located on the edge of the dark door frame.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "faint_red_patch", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_232140.npy:object:0", + "source_name": "faint red patch", + "source_description": "A small, faintly illuminated irregular red shape located in the upper left area. Source dataset: Ego4D. Scene context: An extremely dark and poorly lit scene with only a few faint red shapes barely visible against a black background.", + "sub_caption": "faint red patch: A faintly illuminated irregular patch of red light.. Scene role: Bouncing off a surface in the upper left corner of the room.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "wooden_structure", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_256074.npy:object:1", + "source_name": "wooden structure", + "source_description": "A wooden structure, possibly a chair or part of a building, visible in the background, illuminated by a light source. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a red sweatshirt in a dimly lit environment, possibly outdoors at night.", + "sub_caption": "wooden structure: A faint wooden structure, resembling a chair, slightly lit by ambient light.. Scene role: Placed in the background behind the person forming the heart.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "textured_surface", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_93412.npy:object:1", + "source_name": "textured surface", + "source_description": "A dark blue material with folds and wrinkles, resembling fabric. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object over a textured blue surface.", + "sub_caption": "textured surface: A dark blue material with distinct, rich folds and wrinkles.. Scene role: Covering a low table or display stand in the foreground.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "patterned_surface", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_40027.npy:object:0", + "source_name": "patterned surface", + "source_description": "A textured surface with a repeating grid-like pattern of small, roughly triangular or chevron shapes, illuminated by a strong blue light against a black background. Source dataset: Ego4D. Scene context: A close-up view of a patterned surface illuminated by blue light in a dark environment.", + "sub_caption": "patterned surface: A geometric surface covered in a repeating chevron grid pattern, harshly illuminated by blue light.. Scene role: Serving as a decorative wall installation behind the subjects.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "rectangular_blue_screen", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_27241.npy:object:0", + "source_name": "blue light source", + "source_description": "A bright blue rectangular light source, possibly a screen or a light panel, illuminating the surrounding area. Source dataset: Ego4D. Scene context: A very dark image with mostly black space and some blue light illuminating parts of objects at the bottom right.", + "sub_caption": "blue light source: A bright blue rectangular light panel casting distinct sharp shadows.. Scene role: Positioned near the bottom right to light up the nearby textured fabric.", + "ref_style": "white_bg_encyclopedia_photo" + } + ], + "vocab_task_path": "sample_000002/vocab_task.json", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/vocab_task.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/vocab_task.json new file mode 100644 index 0000000000000000000000000000000000000000..0db8ba2eb2f807085e6e273093a0d60b7060c9fc --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000002/vocab_task.json @@ -0,0 +1,224 @@ +{ + "task_id": "sample_000002", + "sample_id": "sample_000002", + "sample_index": 2, + "target_total": 15, + "target_people": 2, + "target_objects": 13, + "people_candidates": [ + { + "candidate_index": 0, + "source_offset": 169, + "image_id": "Ego4D:ego4d_video/EGO_172550.npy:person:0", + "name": "person", + "description": "A person holding a red object. Only the hands and arms are visible. Source dataset: Ego4D. Scene context: A close-up view of a person holding a red object in a dimly lit environment, possibly outdoors or near a window." + }, + { + "candidate_index": 1, + "source_offset": 297, + "image_id": "Ego4D:ego4d_video/EGO_19792.npy:person:0", + "name": "person", + "description": "Mostly obscured by darkness, a person's lower legs and feet are partially visible against the tiled floor. One leg shows a patch of bare skin, and a dark shoe is visible near the bottom edge. Source dataset: Ego4D. Scene context: A very dark, low-visibility scene showing what appears to be a person's legs and feet on a tiled floor." + }, + { + "candidate_index": 2, + "source_offset": 762, + "image_id": "Ego4D:ego4d_video/EGO_282814.npy:person:0", + "name": "person forming heart", + "description": "Two hands forming a heart shape, illuminated by a faint reddish light against a dark background. Source dataset: Ego4D. Scene context: Two hands forming a heart shape are visible in a very dark setting with faint orange and blue lights in the background." + }, + { + "candidate_index": 3, + "source_offset": 1035, + "image_id": "Ego4D:ego4d_video/EGO_76307.npy:person:0", + "name": "person", + "description": "Only the hands of the person are visible, illuminated by a reddish light, appearing to hold or manipulate something small. Source dataset: Ego4D. Scene context: A dark scene with a pair of hands visible in a dim, reddish light." + } + ], + "object_candidates": [ + { + "candidate_index": 0, + "source_offset": 4694, + "image_id": "Ego4D:ego4d_video/EGO_165181.npy:object:2", + "name": "small blue light", + "description": "A tiny, bright blue point of light, possibly an LED indicator or reflection, situated towards the center-right. Source dataset: Ego4D. Scene context: A very dark, low-visibility indoor scene with faint blue and greenish lighting, showing indistinct shapes." + }, + { + "candidate_index": 1, + "source_offset": 6694, + "image_id": "Ego4D:ego4d_video/EGO_24752.npy:object:2", + "name": "blue object 3", + "description": "Another faint blue shape, rectangular, lower right. Source dataset: Ego4D. Scene context: A very dark scene with a few dimly lit blue objects visible." + }, + { + "candidate_index": 2, + "source_offset": 5576, + "image_id": "Ego4D:ego4d_video/EGO_193048.npy:object:0", + "name": "broom handle", + "description": "A light brown or yellow wooden or plastic handle, extending diagonally into the frame. Source dataset: Ego4D. Scene context: A close-up view of a broom handle and some dark object attached to it, in front of a blurry bluish-gray background." + }, + { + "candidate_index": 3, + "source_offset": 7068, + "image_id": "Ego4D:ego4d_video/EGO_259555.npy:object:0", + "name": "small object", + "description": "A small, dark, crumpled item with faint bluish reflections held between the fingers of the person's hand. Source dataset: Ego4D. Scene context: In a very dimly lit environment, a person's hand holding a small object is visible against a backdrop of patterned textile." + }, + { + "candidate_index": 4, + "source_offset": 9472, + "image_id": "Ego4D:ego4d_video/EGO_97360.npy:object:0", + "name": "blue light", + "description": "A faint, blurry blue light source or reflection in the darkness. Source dataset: Ego4D. Scene context: The image is almost completely dark, showing only a small, blurry blue light source or reflection on the right side." + }, + { + "candidate_index": 5, + "source_offset": 3772, + "image_id": "EPIC-Kitchens:P35_107:object:18", + "name": "wooden cabinet 1", + "description": "A wooden cabinet with drawers, located to the left of the table. Source dataset: EPIC-Kitchens. Scene context: A dining table covered with a patterned tablecloth, surrounded by chairs, in a room with a TV and wooden furniture." + }, + { + "candidate_index": 6, + "source_offset": 5262, + "image_id": "Ego4D:ego4d_video/EGO_173275.npy:object:1", + "name": "rectangular object", + "description": "another very faint, barely visible rectangular shape in the dark Source dataset: Ego4D. Scene context: An extremely dark scene with a few faint rectangular objects barely visible." + }, + { + "candidate_index": 7, + "source_offset": 2572, + "image_id": "EPIC-Kitchens:P26_111:object:4", + "name": "lighter", + "description": "Small black object, likely a lighter, lying on the counter next to the stove. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with a stove, dirty dishes, and cooking pots." + }, + { + "candidate_index": 8, + "source_offset": 3580, + "image_id": "EPIC-Kitchens:P34_109:object:2", + "name": "wooden bowl", + "description": "A round wooden bowl containing miscellaneous items, located on the left. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a person standing in a kitchen, looking down at a dark countertop with various items and a pot boiling on the stove." + }, + { + "candidate_index": 9, + "source_offset": 9435, + "image_id": "Ego4D:ego4d_video/EGO_97324.npy:object:0", + "name": "blue light source", + "description": "A faint, blurry blue light source visible against the black background on the right side of the image. Source dataset: Ego4D. Scene context: A predominantly dark scene with a small, indistinct blue glowing area visible on the right side." + }, + { + "candidate_index": 10, + "source_offset": 9272, + "image_id": "Ego4D:ego4d_video/EGO_94739.npy:object:3", + "name": "door", + "description": "A dark-colored door or door frame visible on the left side of the image. Source dataset: Ego4D. Scene context: A close-up view of a person's hand holding a remote control pointing towards a wall." + }, + { + "candidate_index": 11, + "source_offset": 9217, + "image_id": "Ego4D:ego4d_video/EGO_93411.npy:object:2", + "name": "textured background", + "description": "A surface with wrinkles and folds, illuminated with a blue hue, resembling fabric or crumpled material. Source dataset: Ego4D. Scene context: The image shows a dark scene, possibly indoors or at night, with a prominent light reflection and a dark circular object against a textured, slightly illuminated background." + }, + { + "candidate_index": 12, + "source_offset": 2961, + "image_id": "EPIC-Kitchens:P28_114:object:3", + "name": "wooden floor", + "description": "A light oak-colored wood laminate or hardwood floor with staggered rectangular planks. Source dataset: EPIC-Kitchens. Scene context: A top-down view of a galley kitchen with wooden flooring, showing a sink area on the left and an oven with countertop prep area on the right." + }, + { + "candidate_index": 13, + "source_offset": 2936, + "image_id": "EPIC-Kitchens:P28_109:object:12", + "name": "bracelet", + "description": "A white beaded bracelet worn on the person's right arm. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a kitchen scene with a sink, dish rack, and wooden flooring." + }, + { + "candidate_index": 14, + "source_offset": 8878, + "image_id": "Ego4D:ego4d_video/EGO_76265.npy:object:1", + "name": "blurred shapes", + "description": "Indistinct, blurred shapes in the upper right corner, with some white and blue colors visible. Source dataset: Ego4D. Scene context: A dark scene with a red glowing object, possibly a light or reflection, and some blurred shapes in the background." + }, + { + "candidate_index": 15, + "source_offset": 2085, + "image_id": "EPIC-Kitchens:P22_107:object:11", + "name": "shelf", + "description": "A small wire shelf attached to the wall above the sink, holding a sponge and other items. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a double stainless steel kitchen sink with a draining rack and various items around it." + }, + { + "candidate_index": 16, + "source_offset": 7906, + "image_id": "Ego4D:ego4d_video/EGO_294896.npy:object:0", + "name": "green light", + "description": "A small, dim green light, possibly an LED indicator, glowing in the darkness on the right side of the frame. Source dataset: Ego4D. Scene context: A nearly pitch-black scene with a single small green light visible on the right side." + }, + { + "candidate_index": 17, + "source_offset": 1312, + "image_id": "EPIC-Kitchens:P04_103:object:9", + "name": "Silver Spoon", + "description": "Small silver metal spoon resting near the black spatula handle. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with a stove, toaster, and various cooking utensils scattered around." + }, + { + "candidate_index": 18, + "source_offset": 6364, + "image_id": "Ego4D:ego4d_video/EGO_232140.npy:object:0", + "name": "faint red patch", + "description": "A small, faintly illuminated irregular red shape located in the upper left area. Source dataset: Ego4D. Scene context: An extremely dark and poorly lit scene with only a few faint red shapes barely visible against a black background." + }, + { + "candidate_index": 19, + "source_offset": 6919, + "image_id": "Ego4D:ego4d_video/EGO_256074.npy:object:1", + "name": "wooden structure", + "description": "A wooden structure, possibly a chair or part of a building, visible in the background, illuminated by a light source. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a red sweatshirt in a dimly lit environment, possibly outdoors at night." + }, + { + "candidate_index": 20, + "source_offset": 5165, + "image_id": "Ego4D:ego4d_video/EGO_172585.npy:object:0", + "name": "fabric fragment 1", + "description": "A blurry, light-colored piece of fabric or material in the upper center, with a rounded edge. Source dataset: Ego4D. Scene context: A dark, blurry view showing fragments of clothing and indistinct shapes, likely in a poorly lit environment." + }, + { + "candidate_index": 21, + "source_offset": 9478, + "image_id": "Ego4D:ego4d_video/EGO_97368.npy:object:0", + "name": "blue light", + "description": "A small, blurry, irregular patch of bright blue light glowing in the darkness. Source dataset: Ego4D. Scene context: A completely dark scene with a single, small, blurred blue light source visible on the right side." + }, + { + "candidate_index": 22, + "source_offset": 9205, + "image_id": "Ego4D:ego4d_video/EGO_93412.npy:object:1", + "name": "textured surface", + "description": "A dark blue material with folds and wrinkles, resembling fabric. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object over a textured blue surface." + }, + { + "candidate_index": 23, + "source_offset": 8385, + "image_id": "Ego4D:ego4d_video/EGO_40027.npy:object:0", + "name": "patterned surface", + "description": "A textured surface with a repeating grid-like pattern of small, roughly triangular or chevron shapes, illuminated by a strong blue light against a black background. Source dataset: Ego4D. Scene context: A close-up view of a patterned surface illuminated by blue light in a dark environment." + }, + { + "candidate_index": 24, + "source_offset": 7344, + "image_id": "Ego4D:ego4d_video/EGO_27241.npy:object:0", + "name": "blue light source", + "description": "A bright blue rectangular light source, possibly a screen or a light panel, illuminating the surrounding area. Source dataset: Ego4D. Scene context: A very dark image with mostly black space and some blue light illuminating parts of objects at the bottom right." + }, + { + "candidate_index": 25, + "source_offset": 7504, + "image_id": "Ego4D:ego4d_video/EGO_282711.npy:object:0", + "name": "red light reflection", + "description": "a thin, elongated red glow, possibly reflecting off a limb or a piece of clothing in the lower-middle part of the image Source dataset: Ego4D. Scene context: a dark scene with red and blue lights outlining what appears to be a person's limbs or clothing" + } + ], + "rng_seed": 1782137451, + "created_at": 1782259667.6103773 +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/bbox_overlay.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/bbox_overlay.png new file mode 100644 index 0000000000000000000000000000000000000000..af9e8f4ab52720790dfec2bca819e008127f1bbb --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/bbox_overlay.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac44f32592cca17a13a6b5baf311f183e59dfbb2a635010efc8e02ce63a6b902 +size 1199804 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/compose_prompt.txt b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/compose_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..2264e043851c87c4a06bc973e00d2fe12a04efcd --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/compose_prompt.txt @@ -0,0 +1,59 @@ +Render the following JSON scene specification as a photorealistic 1152x864 image using a true 4:3 canvas. Every listed person and object must appear visibly in the image. Keep normal proportions and the requested aspect ratio. The foreground must contain only subjects explicitly listed in the JSON scene specification. Do not add any unlisted foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects. Background context may include non-localizable scenery only when it does not introduce a distinct foreground subject. No text, no labels, no logos, no watermarks. +JSON scene specification: +{ + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1152, + 864 + ], + "aspect_ratio": "4:3", + "style": "photorealistic" + }, + "scene": { + "setting": "A mysterious, darkened chamber illuminated primarily by an intense red light source.", + "activity": "A person's hands are carefully holding and examining an open book under the red light, with a dark arched doorway looming in the background.", + "composition": "Medium close-up shot focused on the foreground hands and book, bathed in red lighting, while the dark arched doorway serves as a shadowy, out-of-focus background element adding depth to the scene.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 4:3 composition", + "final canvas size 1152x864", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "examining_person", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_135381.npy:person:0", + "source_name": "person", + "description": "Visible hands and arms of a person in a dark setting, illuminated by red light, interacting with an object.", + "role_in_scene": "Holding the book open in the red light" + } + ], + "objects": [ + { + "name": "red_lit_book", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_236888.npy:object:0", + "source_name": "book", + "description": "An open book with pages visible, illuminated in a red hue, showing abstract graphical elements instead of readable text.", + "role_in_scene": "Resting in the person's hands, being examined under the light" + }, + { + "name": "arched_doorway", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_311165.npy:object:2", + "source_name": "doorway or opening", + "description": "A darker, arched rectangular shape on the wall, suggesting an opening to another dark area.", + "role_in_scene": "Providing depth in the dimly lit background" + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_person_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_person_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..e0d57e3fb851201ebff36bebec00fc5f2fde39e7 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_person_in_red_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f026c569a18fbb666de3422056d146e3b0e54ca3b8bcb7ef8d5f3d2210c5a4c +size 771962 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_red_illuminated_book.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_red_illuminated_book.png new file mode 100644 index 0000000000000000000000000000000000000000..0c49d9dd3f52a9f906eb34766a3ab7f22b8c6e65 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_red_illuminated_book.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9359e2ae0ac2769b02372c90e5c6785a9e4b2748222b5aa8de598f0854b0cf6a +size 272196 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_shadowy_doorway.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_shadowy_doorway.png new file mode 100644 index 0000000000000000000000000000000000000000..0c01a738b594a854c98481c0c6238c16727a093b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/detect_refine_shadowy_doorway.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0dba4de0460f2a86d064816d8be6f626d5672a7df5de5ce76b258a2765efdac +size 491609 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_person_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_person_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..d0ca3852ac0287329b7f9b8f2a1d2580392145b0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_person_in_red_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a45eb07facb1a79c2e380de92553251a710533ad4df16752ea4764accf888ec0 +size 953961 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_red_illuminated_book.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_red_illuminated_book.png new file mode 100644 index 0000000000000000000000000000000000000000..16a8964177ac73d730ef9624b9dfebb37d1dd219 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_red_illuminated_book.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa15816bc6cf8dfec8e92358214728777b9f82af17bda2286ed7ffe24bd0eb59 +size 332136 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_shadowy_doorway.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_shadowy_doorway.png new file mode 100644 index 0000000000000000000000000000000000000000..22c2f9bb11fa6760ee23c5de0f4a290ab8ab1e59 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/crops/diversify_input_shadowy_doorway.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:874217f9178e08bd2b07d1c1c582994582c8900646f292945d95d563c492b8ea +size 502913 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/detections.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/detections.json new file mode 100644 index 0000000000000000000000000000000000000000..064c0e2ebe98b78b81a9d5d6b0bf7070affc0464 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/detections.json @@ -0,0 +1,59 @@ +[ + { + "name": "person_in_red_light", + "present": true, + "bbox": [ + 0.0, + 0.0, + 0.5655, + 1.0 + ], + "confidence": 0.9, + "notes": "Visible portion of the person holding the book, showing arms and torso in dark red light.", + "coarse_bbox": [ + 0.0, + 0.0, + 0.556, + 1.0 + ], + "refine_crop": "crops/detect_refine_person_in_red_light.png" + }, + { + "name": "red_illuminated_book", + "present": true, + "bbox": [ + 0.2245, + 0.3971, + 0.6708, + 0.6724 + ], + "confidence": 0.98, + "notes": "Tight bounding box capturing the visible extent of the red illuminated book, including its pages and binding.", + "coarse_bbox": [ + 0.222, + 0.396, + 0.671, + 0.674 + ], + "refine_crop": "crops/detect_refine_red_illuminated_book.png" + }, + { + "name": "shadowy_doorway", + "present": true, + "bbox": [ + 0.5153, + 0.0267, + 0.8188, + 0.8345 + ], + "confidence": 0.9, + "notes": "The bounding box covers the entire arched doorway structure visible in the background.", + "coarse_bbox": [ + 0.493, + 0.034, + 0.888, + 0.762 + ], + "refine_crop": "crops/detect_refine_shadowy_doorway.png" + } +] diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/main_image.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/main_image.png new file mode 100644 index 0000000000000000000000000000000000000000..2cb7651e58ec29cc60f0ac127b4ea2bf36e75c65 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/main_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e92b8461169cafa562f92f0f0327336ba7e9c7fe972210b503e8befaaa95a14c +size 1236169 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/plan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/plan.json new file mode 100644 index 0000000000000000000000000000000000000000..8d5d3a66559cc5730b91a8bd92b353afe2f7cdfd --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/plan.json @@ -0,0 +1,103 @@ +{ + "sample_id": "sample_000003", + "target_total": 3, + "target_people": 1, + "target_objects": 2, + "compose_prompt": { + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1152, + 864 + ], + "aspect_ratio": "4:3", + "style": "photorealistic" + }, + "scene": { + "setting": "A dimly lit, mysterious room predominantly bathed in deep shadows, with a stark red light illuminating a desk area in the foreground and a dark architectural structure in the background.", + "activity": "A person is carefully turning the pages of an open book under the focused red light.", + "composition": "The camera frames the scene from a medium close-up, focusing on the person's hands and the open book in the illuminated foreground. The arched doorway is visible in the softly blurred, shadowy background, providing a sense of depth and spatial context.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 4:3 composition", + "final canvas size 1152x864", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_in_red_light", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_135381.npy:person:0", + "source_name": "person", + "description": "Visible hands and lower arms, dramatically illuminated by a deep red light, emerging from the dark surroundings.", + "role_in_scene": "Interacting with the open book, fingers lightly resting on the visible pages." + } + ], + "objects": [ + { + "name": "red_illuminated_book", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_236888.npy:object:0", + "source_name": "book", + "description": "A large, open book with visible pages and faint layout elements, cast in a striking red hue from an overhead light source.", + "role_in_scene": "Lying open flat on the surface directly beneath the person's hands." + }, + { + "name": "shadowy_doorway", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_311165.npy:object:2", + "source_name": "doorway or opening", + "description": "A dark, arched doorway outline set into a shadowy, indistinct wall, suggesting a passage to another dark room.", + "role_in_scene": "Positioned in the out-of-focus background to establish architectural depth behind the main illuminated desk area." + } + ] + }, + "expected_subjects": [ + { + "name": "person_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_135381.npy:person:0", + "source_name": "person", + "source_description": "Visible hands and arms of a person in a dark setting, illuminated by red light, interacting with objects. Source dataset: Ego4D. Scene context: A close-up view of a person's hands and arms interacting with objects in a dark setting, illuminated by red and white light.", + "sub_caption": "person: Visible hands and lower arms, dramatically illuminated by a deep red light, emerging from the dark surroundings.. Scene role: Interacting with the open book, fingers lightly resting on the visible pages.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "red_illuminated_book", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_236888.npy:object:0", + "source_name": "book", + "source_description": "An open book with pages visible, illuminated in a red hue, showing text and some graphical elements. Source dataset: Ego4D. Scene context: A dimly lit scene showing an open book, illuminated by a red light, being held or resting in the darkness.", + "sub_caption": "book: A large, open book with visible pages and faint layout elements, cast in a striking red hue from an overhead light source.. Scene role: Lying open flat on the surface directly beneath the person's hands.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "shadowy_doorway", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_311165.npy:object:2", + "source_name": "doorway or opening", + "source_description": "A darker, arched or rectangular shape on the wall, suggesting an opening to another area. Source dataset: Ego4D. Scene context: A very dark, low-resolution scene indoors, mostly obscured by shadow with some faint brownish light indicating walls or structures.", + "sub_caption": "doorway or opening: A dark, arched doorway outline set into a shadowy, indistinct wall, suggesting a passage to another dark room.. Scene role: Positioned in the out-of-focus background to establish architectural depth behind the main illuminated desk area.", + "ref_style": "white_bg_encyclopedia_photo" + } + ], + "vocab_task_path": "sample_000003/vocab_task.json", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references.json new file mode 100644 index 0000000000000000000000000000000000000000..e7d9c656e0f92e1230c7154e3e24e60570f65499 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references.json @@ -0,0 +1,101 @@ +{ + "references": [ + { + "name": "person_in_red_light", + "ref_image": "references/ref_person_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", + "diversify_input": "crops/diversify_input_person_in_red_light.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_person_in_red_light_attempt_01.png", + "output": "references/ref_person_in_red_light.png", + "mask": "references/sam_mask_person_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 351.0, + 54.0, + 667.0, + 1015.0 + ], + "mask_score": 3.471897, + "mask_area_ratio": 0.134706, + "elapsed_seconds": 8.8737 + }, + "reference_verify": "references/reference_verify_person_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "red_illuminated_book", + "ref_image": "references/ref_red_illuminated_book.png", + "raw_ref_image": "references/raw_ref_red_illuminated_book_attempt_01.png", + "diversify_input": "crops/diversify_input_red_illuminated_book.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_red_illuminated_book_attempt_01.png", + "output": "references/ref_red_illuminated_book.png", + "mask": "references/sam_mask_red_illuminated_book.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 22.0, + 180.0, + 1002.0, + 858.0 + ], + "mask_score": 3.463648, + "mask_area_ratio": 0.502075, + "elapsed_seconds": 7.3171 + }, + "reference_verify": "references/reference_verify_red_illuminated_book.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "shadowy_doorway", + "ref_image": "references/ref_shadowy_doorway.png", + "raw_ref_image": "references/raw_ref_shadowy_doorway_attempt_01.png", + "diversify_input": "crops/diversify_input_shadowy_doorway.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_shadowy_doorway_attempt_01.png", + "output": "references/ref_shadowy_doorway.png", + "mask": "references/sam_mask_shadowy_doorway.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 141.0, + 11.0, + 899.0, + 969.0 + ], + "mask_score": 3.471354, + "mask_area_ratio": 0.542183, + "elapsed_seconds": 7.2941 + }, + "reference_verify": "references/reference_verify_shadowy_doorway.json", + "reference_verify_passed": true, + "reference_attempts": 1 + } + ], + "reference_errors": {} +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_person_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_person_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..d6be8ca410cad3e47d17aea02d219e3cdef0790e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_person_in_red_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:527db4f6a08c1e40bc53f28df0ee45b6c5f596340f695afafbc77754dc61de8b +size 268010 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_red_illuminated_book.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_red_illuminated_book.png new file mode 100644 index 0000000000000000000000000000000000000000..1aa5197207042712f2832271362f6f8965b6b39f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_red_illuminated_book.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d75dfeb7fb07442cfa1804e7b03c3b906680b38f3962e701e4ece321e3b658a9 +size 1040245 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_shadowy_doorway.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_shadowy_doorway.png new file mode 100644 index 0000000000000000000000000000000000000000..e0743897ea4a4e52cae6425f4d1805a8da6181bc --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/ref_shadowy_doorway.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539e15a5348202c3b05581e3d7757cf5cf77209fbd5c0b3499d0afbbb9f554df +size 1032551 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/reference_verify_person_in_red_light.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/reference_verify_person_in_red_light.json new file mode 100644 index 0000000000000000000000000000000000000000..24ddc7e1c86d378f6b566be172b944daa3bd19f4 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/reference_verify_person_in_red_light.json @@ -0,0 +1,46 @@ +{ + "name": "person_in_red_light", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_in_red_light_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_in_red_light_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_person_in_red_light_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/candidate_ref_person_in_red_light_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/candidate_sam_mask_person_in_red_light_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 351.0, + 54.0, + 667.0, + 1015.0 + ], + "mask_score": 3.471897, + "mask_area_ratio": 0.134706, + "elapsed_seconds": 8.8737 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image features a complete, uncropped full-body view of a person centered on a white background with ample margins, satisfying all hard requirements." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/reference_verify_red_illuminated_book.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/reference_verify_red_illuminated_book.json new file mode 100644 index 0000000000000000000000000000000000000000..4c8a896e6da915f1889673aeacb14c2e9a10c67c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/reference_verify_red_illuminated_book.json @@ -0,0 +1,46 @@ +{ + "name": "red_illuminated_book", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_red_illuminated_book_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_red_illuminated_book_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_red_illuminated_book_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_red_illuminated_book_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/candidate_ref_red_illuminated_book_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/candidate_sam_mask_red_illuminated_book_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 22.0, + 180.0, + 1002.0, + 858.0 + ], + "mask_score": 3.463648, + "mask_area_ratio": 0.502075, + "elapsed_seconds": 7.3171 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a complete, isolated open book with a red hue on a white background, matching the subject description perfectly." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/reference_verify_shadowy_doorway.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/reference_verify_shadowy_doorway.json new file mode 100644 index 0000000000000000000000000000000000000000..3de12eac1b74250414b25ae27f542d60ab2172ff --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/reference_verify_shadowy_doorway.json @@ -0,0 +1,46 @@ +{ + "name": "shadowy_doorway", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_shadowy_doorway_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_shadowy_doorway_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_shadowy_doorway_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_shadowy_doorway_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/candidate_ref_shadowy_doorway_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/candidate_sam_mask_shadowy_doorway_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 141.0, + 11.0, + 899.0, + 969.0 + ], + "mask_score": 3.471354, + "mask_area_ratio": 0.542183, + "elapsed_seconds": 7.2941 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a dark, arched stone doorway isolated on a white background. The surrounding wall is cropped, which is acceptable for an architectural feature." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/sam_mask_person_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/sam_mask_person_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..38fb316e0202ae6a31c1eaa2897b75d56c0d9103 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/sam_mask_person_in_red_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/sam_mask_red_illuminated_book.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/sam_mask_red_illuminated_book.png new file mode 100644 index 0000000000000000000000000000000000000000..b6643e663f6c261172101fda4c068b50738f0d4f Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/sam_mask_red_illuminated_book.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/sam_mask_shadowy_doorway.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/sam_mask_shadowy_doorway.png new file mode 100644 index 0000000000000000000000000000000000000000..2bd4ef2abe1dec8a9bb1ce34e3e2ad7ef060acf8 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/references/sam_mask_shadowy_doorway.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/row.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/row.json new file mode 100644 index 0000000000000000000000000000000000000000..1857e365e6a8e81bea3f348ded63dace118c227a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/row.json @@ -0,0 +1,164 @@ +{ + "sample_id": "sample_000003", + "target_total": 3, + "target_people": 1, + "target_objects": 2, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 3, + "n_detected": 3, + "n_subjects": 3, + "subjects": [ + { + "name": "person_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_135381.npy:person:0", + "source_name": "person", + "source_description": "Visible hands and arms of a person in a dark setting, illuminated by red light, interacting with objects. Source dataset: Ego4D. Scene context: A close-up view of a person's hands and arms interacting with objects in a dark setting, illuminated by red and white light.", + "sub_caption": "person: Visible hands and lower arms, dramatically illuminated by a deep red light, emerging from the dark surroundings.. Scene role: Interacting with the open book, fingers lightly resting on the visible pages.", + "measured_bbox": [ + 0.0, + 0.0, + 0.5655, + 1.0 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_in_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_person_in_red_light_attempt_01.png", + "output": "references/ref_person_in_red_light.png", + "mask": "references/sam_mask_person_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 351.0, + 54.0, + 667.0, + 1015.0 + ], + "mask_score": 3.471897, + "mask_area_ratio": 0.134706, + "elapsed_seconds": 8.8737 + } + }, + { + "name": "red_illuminated_book", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_236888.npy:object:0", + "source_name": "book", + "source_description": "An open book with pages visible, illuminated in a red hue, showing text and some graphical elements. Source dataset: Ego4D. Scene context: A dimly lit scene showing an open book, illuminated by a red light, being held or resting in the darkness.", + "sub_caption": "book: A large, open book with visible pages and faint layout elements, cast in a striking red hue from an overhead light source.. Scene role: Lying open flat on the surface directly beneath the person's hands.", + "measured_bbox": [ + 0.2245, + 0.3971, + 0.6708, + 0.6724 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_illuminated_book.png", + "raw_ref_image": "references/raw_ref_red_illuminated_book_attempt_01.png", + "reference_verify": "references/reference_verify_red_illuminated_book.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_red_illuminated_book_attempt_01.png", + "output": "references/ref_red_illuminated_book.png", + "mask": "references/sam_mask_red_illuminated_book.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 22.0, + 180.0, + 1002.0, + 858.0 + ], + "mask_score": 3.463648, + "mask_area_ratio": 0.502075, + "elapsed_seconds": 7.3171 + } + }, + { + "name": "shadowy_doorway", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_311165.npy:object:2", + "source_name": "doorway or opening", + "source_description": "A darker, arched or rectangular shape on the wall, suggesting an opening to another area. Source dataset: Ego4D. Scene context: A very dark, low-resolution scene indoors, mostly obscured by shadow with some faint brownish light indicating walls or structures.", + "sub_caption": "doorway or opening: A dark, arched doorway outline set into a shadowy, indistinct wall, suggesting a passage to another dark room.. Scene role: Positioned in the out-of-focus background to establish architectural depth behind the main illuminated desk area.", + "measured_bbox": [ + 0.5153, + 0.0267, + 0.8188, + 0.8345 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_shadowy_doorway.png", + "raw_ref_image": "references/raw_ref_shadowy_doorway_attempt_01.png", + "reference_verify": "references/reference_verify_shadowy_doorway.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000003/references/raw_ref_shadowy_doorway_attempt_01.png", + "output": "references/ref_shadowy_doorway.png", + "mask": "references/sam_mask_shadowy_doorway.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 141.0, + 11.0, + 899.0, + 969.0 + ], + "mask_score": 3.471354, + "mask_area_ratio": 0.542183, + "elapsed_seconds": 7.2941 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/vocab_task.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/vocab_task.json new file mode 100644 index 0000000000000000000000000000000000000000..c91802c8ab67cbb4a71b97bf2084ddab8f708e41 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000003/vocab_task.json @@ -0,0 +1,56 @@ +{ + "task_id": "sample_000003", + "sample_id": "sample_000003", + "sample_index": 3, + "target_total": 3, + "target_people": 1, + "target_objects": 2, + "people_candidates": [ + { + "candidate_index": 0, + "source_offset": 89, + "image_id": "Ego4D:ego4d_video/EGO_149319.npy:person:0", + "name": "driver", + "description": "Visible hands holding a dark steering wheel, with the rest of the person in silhouette due to low light. Source dataset: Ego4D. Scene context: A close-up, dark view showing parts of a steering wheel and a person's hands operating it." + }, + { + "candidate_index": 1, + "source_offset": 78, + "image_id": "Ego4D:ego4d_video/EGO_135381.npy:person:0", + "name": "person", + "description": "Visible hands and arms of a person in a dark setting, illuminated by red light, interacting with objects. Source dataset: Ego4D. Scene context: A close-up view of a person's hands and arms interacting with objects in a dark setting, illuminated by red and white light." + } + ], + "object_candidates": [ + { + "candidate_index": 0, + "source_offset": 4364, + "image_id": "Ego4D:ego4d_video/EGO_157476.npy:object:0", + "name": "object", + "description": "An indistinct object being held or manipulated by the person's hands, with some reflective parts catching the dim light. Source dataset: Ego4D. Scene context: A person's hands are visible in a dimly lit environment, interacting with an object." + }, + { + "candidate_index": 1, + "source_offset": 7952, + "image_id": "Ego4D:ego4d_video/EGO_311165.npy:object:2", + "name": "doorway or opening", + "description": "A darker, arched or rectangular shape on the wall, suggesting an opening to another area. Source dataset: Ego4D. Scene context: A very dark, low-resolution scene indoors, mostly obscured by shadow with some faint brownish light indicating walls or structures." + }, + { + "candidate_index": 2, + "source_offset": 6191, + "image_id": "Ego4D:ego4d_video/EGO_22563.npy:object:1", + "name": "lanyard", + "description": "A thick black strap with gold lettering, draped across the person's torso. Source dataset: Ego4D. Scene context: A close-up view of a person wearing red pants and a black lanyard with gold lettering." + }, + { + "candidate_index": 3, + "source_offset": 6418, + "image_id": "Ego4D:ego4d_video/EGO_236888.npy:object:0", + "name": "book", + "description": "An open book with pages visible, illuminated in a red hue, showing text and some graphical elements. Source dataset: Ego4D. Scene context: A dimly lit scene showing an open book, illuminated by a red light, being held or resting in the darkness." + } + ], + "rng_seed": 1782242180, + "created_at": 1782259667.6245332 +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/bbox_overlay.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/bbox_overlay.png new file mode 100644 index 0000000000000000000000000000000000000000..650b7e1dfd6d81bc6625b0a997f61ccfe23184bf --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/bbox_overlay.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e214c78019c29dad2ed89e3596b341ac42b4c7882ce1b461955410e9b0d37223 +size 1185511 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/compose_prompt.txt b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/compose_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..7a5eb6cdcec3764da40cf102238c32c18710ff15 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/compose_prompt.txt @@ -0,0 +1,75 @@ +Render the following JSON scene specification as a photorealistic 1248x832 image using a true 3:2 canvas. Every listed person and object must appear visibly in the image. Keep normal proportions and the requested aspect ratio. The foreground must contain only subjects explicitly listed in the JSON scene specification. Do not add any unlisted foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects. Background context may include non-localizable scenery only when it does not introduce a distinct foreground subject. No text, no labels, no logos, no watermarks. +JSON scene specification: +{ + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1248, + 832 + ], + "aspect_ratio": "3:2", + "style": "photorealistic" + }, + "scene": { + "setting": "A dark, unfinished storage space or basement currently under renovation, featuring a strong cinematic contrast between warm reddish foreground lighting and cool blue background lighting.", + "activity": "A person illuminated by red light is preparing to paint the space, holding a paint roller while standing near stacked boxes and tarp-covered furniture.", + "composition": "Eye-level medium shot. The person is framed slightly off-center in the foreground, lit from the side by a reddish glow. The paint roller is held in their hand. In the midground, the draped tarp and stacked boxes add depth, partially illuminated by the ambient blue hue. In the deep background, the small blue light source glows blurrily, creating a shallow depth of field.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:2 composition", + "final canvas size 1248x832", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "painter_in_red_light", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_50052.npy:person:0", + "source_name": "person", + "description": "A person whose face is partially visible, wearing a cap, and strongly cast in dramatic reddish light.", + "role_in_scene": "Standing in the foreground, holding the paint roller, looking attentively off-camera." + } + ], + "objects": [ + { + "name": "blue_light_source", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_98001.npy:object:0", + "source_name": "blue light source", + "description": "A small, blurry blue light, appearing as a faint glow against the dark background.", + "role_in_scene": "Shining indistinctly in the distant background, providing the cool ambient light for the room." + }, + { + "name": "paint_roller", + "source_index": 2, + "source_image_id": "Ego4D:ego4d_video/EGO_95307.npy:object:0", + "source_name": "paint roller", + "description": "A standard paint roller with a dark cylindrical cover and a metal frame connecting to a handle.", + "role_in_scene": "Held firmly by the person in the foreground." + }, + { + "name": "protective_tarp", + "source_index": 4, + "source_image_id": "Ego4D:ego4d_video/EGO_37499.npy:object:1", + "source_name": "light-colored material", + "description": "A draped, light-colored or beige material acting as a protective drop cloth or tarp.", + "role_in_scene": "Covering unseen furniture or materials in the midground." + }, + { + "name": "stacked_boxes", + "source_index": 5, + "source_image_id": "Ego4D:ego4d_video/EGO_39255.npy:object:1", + "source_name": "stacked boxes", + "description": "A tall stack of rectangular boxes or containers, dimly lit with a blue hue.", + "role_in_scene": "Resting against the wall in the midground, adding texture to the dark room." + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_blue_light_source.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_blue_light_source.png new file mode 100644 index 0000000000000000000000000000000000000000..5b94f7dffaa947bb9f4f17f936d9d36cc88f7d8d Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_blue_light_source.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_draped_tarp.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_draped_tarp.png new file mode 100644 index 0000000000000000000000000000000000000000..97d00eb60d3752713346af9910441771e9239c3f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_draped_tarp.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11abd2b8ec552fadf8d7fc14cb30e2238362065f27eb486bee44d7e6f0422c2b +size 366236 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_paint_roller.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_paint_roller.png new file mode 100644 index 0000000000000000000000000000000000000000..b0ab202a833466f7a40c6bed6020e9cb3c1f9d11 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_paint_roller.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_painter_in_cap.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_painter_in_cap.png new file mode 100644 index 0000000000000000000000000000000000000000..03e0a90085474995f5b0ccd3d9f4afb7f256a0de --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_painter_in_cap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25768d172feaa00484aa36e1443a53c87bfdccaa468058141d33050fcf3112b7 +size 425861 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_stacked_storage_boxes.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_stacked_storage_boxes.png new file mode 100644 index 0000000000000000000000000000000000000000..a1159979606bbf8dfa78101c64a2efbe2e34d4ca --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/detect_refine_stacked_storage_boxes.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13264b8010897325b1f9387dcfa1a8348426d5b4eaa27de0d4f0588bf34f8bba +size 167667 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/diversify_input_blue_light_source.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/diversify_input_blue_light_source.png new file mode 100644 index 0000000000000000000000000000000000000000..7e4c031dee84d5a65f6a7782917b1ece0ea9e1da Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/diversify_input_blue_light_source.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/diversify_input_draped_tarp.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/diversify_input_draped_tarp.png new file mode 100644 index 0000000000000000000000000000000000000000..b0556bb04032e8d2f1e3ff3f095c8b71576622d6 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/diversify_input_draped_tarp.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f1527481091f2a9c2b1e04a8836d041ec9ad3ebf1f3b145b50c3fa04bc28a4d +size 472461 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/diversify_input_paint_roller.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/diversify_input_paint_roller.png new file mode 100644 index 0000000000000000000000000000000000000000..7c53875decfe89851be604ebb90c2145f81c6c17 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/crops/diversify_input_paint_roller.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/detections.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/detections.json new file mode 100644 index 0000000000000000000000000000000000000000..5ad24164cec0f37748a0fb66457ca385521a01b7 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/detections.json @@ -0,0 +1,97 @@ +[ + { + "name": "painter_in_cap", + "present": true, + "bbox": [ + 0.0032, + 0.039, + 0.3202, + 1.0 + ], + "confidence": 9.5, + "notes": "A person wearing a cap, casting strong red light on their face.", + "coarse_bbox": [ + 0.0, + 0.037, + 0.321, + 1.0 + ], + "refine_crop": "crops/detect_refine_painter_in_cap.png" + }, + { + "name": "blue_light_source", + "present": true, + "bbox": [ + 0.5155, + 0.3724, + 0.553, + 0.4239 + ], + "confidence": "high", + "notes": "The object is a bright blue light source occupying most of the image.", + "coarse_bbox": [ + 0.517, + 0.375, + 0.553, + 0.424 + ], + "refine_crop": "crops/detect_refine_blue_light_source.png" + }, + { + "name": "paint_roller", + "present": true, + "bbox": [ + 0.2917, + 0.532, + 0.4405, + 0.7522 + ], + "confidence": 0.99, + "notes": "The paint roller with handle and frame is clearly visible.", + "coarse_bbox": [ + 0.305, + 0.53, + 0.441, + 0.754 + ], + "refine_crop": "crops/detect_refine_paint_roller.png" + }, + { + "name": "draped_tarp", + "present": true, + "bbox": [ + 0.392, + 0.4207, + 0.8243, + 0.8862 + ], + "confidence": 0.98, + "notes": "Tight bounding box around the light-colored material draped over the object in the middle of the room.", + "coarse_bbox": [ + 0.388, + 0.425, + 0.822, + 0.88 + ], + "refine_crop": "crops/detect_refine_draped_tarp.png" + }, + { + "name": "stacked_storage_boxes", + "present": true, + "bbox": [ + 0.2404, + 0.3738, + 0.4004, + 0.9726 + ], + "confidence": 0.95, + "notes": "Bounding box covers the entire visible stack of cardboard boxes.", + "coarse_bbox": [ + 0.238, + 0.375, + 0.4, + 0.975 + ], + "refine_crop": "crops/detect_refine_stacked_storage_boxes.png" + } +] diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/main_image.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/main_image.png new file mode 100644 index 0000000000000000000000000000000000000000..14cb6e8f502f230cdac53d431d56828967f443c4 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/main_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6f89ad02403e64bc0ce2c62a2bad5fb4f779c70bb5b7c23a177bdf066163c71 +size 1226895 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/plan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/plan.json new file mode 100644 index 0000000000000000000000000000000000000000..220e2c790ab3414930bf6df9b60db8b0ec602c65 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/plan.json @@ -0,0 +1,141 @@ +{ + "sample_id": "sample_000004", + "target_total": 5, + "target_people": 1, + "target_objects": 4, + "compose_prompt": { + "format": "structured_json_prompt", + "canvas": { + "size": [ + 864, + 1152 + ], + "aspect_ratio": "3:4", + "style": "photorealistic" + }, + "scene": { + "setting": "A dimly lit room undergoing renovation, characterized by moody, cinematic dual-tone lighting with warm red and cool blue tones.", + "activity": "A worker wearing a cap is taking a break, inspecting the dark room during a night-time painting project.", + "composition": "3:4 vertical framing, medium shot. The person is centrally located in the midground, illuminated by directional red light. A paint roller rests nearby. Depth is achieved by placing the draped tarp and stacked boxes in the background, gently illuminated by a faint blue light source to create an atmospheric, moody contrast.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:4 composition", + "final canvas size 864x1152", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "worker_in_cap", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_50052.npy:person:0", + "source_name": "person", + "description": "A person whose face is partially visible, wearing a cap, with the features cast in strong reddish light.", + "role_in_scene": "Standing in the center, looking thoughtfully across the room, with the cap casting a shadow over the upper face." + } + ], + "objects": [ + { + "name": "blue_light_source", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_98001.npy:object:0", + "source_name": "blue light source", + "description": "A small, blurry blue light emitting a faint, cool glow against the darkness.", + "role_in_scene": "Positioned in the deep background to the left, providing an atmospheric back-light and contrasting with the red lighting." + }, + { + "name": "paint_roller", + "source_index": 2, + "source_image_id": "Ego4D:ego4d_video/EGO_95307.npy:object:0", + "source_name": "paint roller", + "description": "A standard paint roller with a dark cylindrical cover, attached to a metal frame and handle.", + "role_in_scene": "Resting on a nearby surface in the foreground, catching subtle highlights from the red light." + }, + { + "name": "draped_tarp", + "source_index": 4, + "source_image_id": "Ego4D:ego4d_video/EGO_37499.npy:object:1", + "source_name": "light-colored material", + "description": "A draped, light-colored tarp or drop cloth covering large, indistinct shapes.", + "role_in_scene": "Draped over items on the right side of the room, adding textural detail and reflecting the mixed ambient lighting." + }, + { + "name": "stacked_boxes", + "source_index": 5, + "source_image_id": "Ego4D:ego4d_video/EGO_39255.npy:object:1", + "source_name": "stacked boxes", + "description": "A tall stack of rectangular cardboard boxes or containers.", + "role_in_scene": "Stacked against the wall in the background, slightly illuminated by the cool blue hue from the blue light source." + } + ] + }, + "expected_subjects": [ + { + "name": "worker_in_cap", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_50052.npy:person:0", + "source_name": "person", + "source_description": "A person whose face is partially visible in the center, wearing a cap, and cast in reddish light. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a cap, with only their face partially visible in dim lighting.", + "sub_caption": "person: A person whose face is partially visible, wearing a cap, with the features cast in strong reddish light.. Scene role: Standing in the center, looking thoughtfully across the room, with the cap casting a shadow over the upper face.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "blue_light_source", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_98001.npy:object:0", + "source_name": "blue light source", + "source_description": "A small, blurry blue light, appearing as a faint glow against the dark background. Source dataset: Ego4D. Scene context: A predominantly dark scene with a single, small, indistinct blue light source visible towards the right side.", + "sub_caption": "blue light source: A small, blurry blue light emitting a faint, cool glow against the darkness.. Scene role: Positioned in the deep background to the left, providing an atmospheric back-light and contrasting with the red lighting.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "paint_roller", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_95307.npy:object:0", + "source_name": "paint roller", + "source_description": "A standard paint roller with a dark cylindrical cover and a metal frame connecting to a handle. Source dataset: Ego4D. Scene context: A close-up view of a paint roller against a light-colored wall in a dimly lit setting.", + "sub_caption": "paint roller: A standard paint roller with a dark cylindrical cover, attached to a metal frame and handle.. Scene role: Resting on a nearby surface in the foreground, catching subtle highlights from the red light.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "draped_tarp", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_37499.npy:object:1", + "source_name": "light-colored material", + "source_description": "A draped, light-colored or beige material, possibly part of a tent or covering, visible on the right side. Source dataset: Ego4D. Scene context: The scene is a dark, possibly outdoor or dimly lit setting, showing what appears to be a structure or object covered with a large piece of material or tarp.", + "sub_caption": "light-colored material: A draped, light-colored tarp or drop cloth covering large, indistinct shapes.. Scene role: Draped over items on the right side of the room, adding textural detail and reflecting the mixed ambient lighting.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "stacked_boxes", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39255.npy:object:1", + "source_name": "stacked boxes", + "source_description": "A tall stack of rectangular boxes or containers against the left wall, dimly lit with a blue hue. Source dataset: Ego4D. Scene context: A dark, possibly industrial or storage space containing rows of stacked egg cartons illuminated by dim, cool-toned light.", + "sub_caption": "stacked boxes: A tall stack of rectangular cardboard boxes or containers.. Scene role: Stacked against the wall in the background, slightly illuminated by the cool blue hue from the blue light source.", + "ref_style": "white_bg_encyclopedia_photo" + } + ], + "vocab_task_path": "sample_000004/vocab_task.json", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references.json new file mode 100644 index 0000000000000000000000000000000000000000..77ccb5408e7470cbe4eeedb5aa31d319de173aeb --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references.json @@ -0,0 +1,101 @@ +{ + "references": [ + { + "name": "blue_light_source", + "ref_image": "references/ref_blue_light_source.png", + "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", + "diversify_input": "crops/diversify_input_blue_light_source.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_blue_light_source_attempt_01.png", + "output": "references/ref_blue_light_source.png", + "mask": "references/sam_mask_blue_light_source.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 132.0, + 159.0, + 891.0, + 864.0 + ], + "mask_score": 3.485053, + "mask_area_ratio": 0.292151, + "elapsed_seconds": 9.6106 + }, + "reference_verify": "references/reference_verify_blue_light_source.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "paint_roller", + "ref_image": "references/ref_paint_roller.png", + "raw_ref_image": "references/raw_ref_paint_roller_attempt_01.png", + "diversify_input": "crops/diversify_input_paint_roller.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_paint_roller_attempt_01.png", + "output": "references/ref_paint_roller.png", + "mask": "references/sam_mask_paint_roller.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 136.0, + 75.0, + 866.0, + 950.0 + ], + "mask_score": 3.459168, + "mask_area_ratio": 0.116944, + "elapsed_seconds": 7.1624 + }, + "reference_verify": "references/reference_verify_paint_roller.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "draped_tarp", + "ref_image": "references/ref_draped_tarp.png", + "raw_ref_image": "references/raw_ref_draped_tarp_attempt_01.png", + "diversify_input": "crops/diversify_input_draped_tarp.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_draped_tarp_attempt_01.png", + "output": "references/ref_draped_tarp.png", + "mask": "references/sam_mask_draped_tarp.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 189.0, + 1023.0, + 888.0 + ], + "mask_score": 3.482188, + "mask_area_ratio": 0.485075, + "elapsed_seconds": 7.4131 + }, + "reference_verify": "references/reference_verify_draped_tarp.json", + "reference_verify_passed": true, + "reference_attempts": 1 + } + ], + "reference_errors": {} +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_blue_light_source.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_blue_light_source.png new file mode 100644 index 0000000000000000000000000000000000000000..a41650133151bfce0f051fbfef9f07e463f49572 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_blue_light_source.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b2f8b4d0bd40c7aae3bd60001c6305707041e5d005d90d15ab8ab00a3d906d7 +size 494686 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_draped_tarp.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_draped_tarp.png new file mode 100644 index 0000000000000000000000000000000000000000..a54ed4422e5ab3c47596541a028556d84ad24ee2 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_draped_tarp.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:173db4b6d559628e4c57b50fee69ce78daf26db6e14e0404b36974224a1d4319 +size 790483 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_paint_roller.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_paint_roller.png new file mode 100644 index 0000000000000000000000000000000000000000..9207392ff969a4f828010607a1aae331d8fc86bb --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/ref_paint_roller.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da3bd464554911c5c6f8d114f896e7976fd90c86292dfbc2af6999e42e87d4fb +size 300906 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/reference_verify_blue_light_source.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/reference_verify_blue_light_source.json new file mode 100644 index 0000000000000000000000000000000000000000..7d4bbfd4ead08a47ea8e94577cb822a8ba102e66 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/reference_verify_blue_light_source.json @@ -0,0 +1,46 @@ +{ + "name": "blue_light_source", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_blue_light_source_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_blue_light_source_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_blue_light_source_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/candidate_ref_blue_light_source_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/candidate_sam_mask_blue_light_source_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 132.0, + 159.0, + 891.0, + 864.0 + ], + "mask_score": 3.485053, + "mask_area_ratio": 0.292151, + "elapsed_seconds": 9.6106 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Isolated blue light source component on a white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/reference_verify_draped_tarp.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/reference_verify_draped_tarp.json new file mode 100644 index 0000000000000000000000000000000000000000..227ca83d5cb5b29a026b8cd85190d9250a7f2aab --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/reference_verify_draped_tarp.json @@ -0,0 +1,46 @@ +{ + "name": "draped_tarp", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_draped_tarp_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_draped_tarp_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_draped_tarp_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_draped_tarp_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/candidate_ref_draped_tarp_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/candidate_sam_mask_draped_tarp_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 189.0, + 1023.0, + 888.0 + ], + "mask_score": 3.482188, + "mask_area_ratio": 0.485075, + "elapsed_seconds": 7.4131 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The draped tarp is fully visible, isolated on a white background, and clearly shows the light-colored material covering indistinct shapes." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/reference_verify_paint_roller.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/reference_verify_paint_roller.json new file mode 100644 index 0000000000000000000000000000000000000000..1d11305c6e2f0be02e6bab89f61c9221c7405cb1 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/reference_verify_paint_roller.json @@ -0,0 +1,46 @@ +{ + "name": "paint_roller", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_paint_roller_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_paint_roller_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_paint_roller_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_paint_roller_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/candidate_ref_paint_roller_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/candidate_sam_mask_paint_roller_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 136.0, + 75.0, + 866.0, + 950.0 + ], + "mask_score": 3.459168, + "mask_area_ratio": 0.116944, + "elapsed_seconds": 7.1624 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image is a perfectly isolated reference of a single paint roller on a white background with no cropping." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/sam_mask_blue_light_source.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/sam_mask_blue_light_source.png new file mode 100644 index 0000000000000000000000000000000000000000..9e54b61c1ffa297ebf8ea0747366af0e4e27865a Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/sam_mask_blue_light_source.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/sam_mask_draped_tarp.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/sam_mask_draped_tarp.png new file mode 100644 index 0000000000000000000000000000000000000000..de777b50b87e8e30bd99e4cbc7ae4052915e70a7 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/sam_mask_draped_tarp.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/sam_mask_paint_roller.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/sam_mask_paint_roller.png new file mode 100644 index 0000000000000000000000000000000000000000..0f66fac76597498da801d0b5e93098bba9f906a4 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/references/sam_mask_paint_roller.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/row.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/row.json new file mode 100644 index 0000000000000000000000000000000000000000..731a99cdc18ab8e6157ae95a80e8c68e20ece96d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/row.json @@ -0,0 +1,173 @@ +{ + "sample_id": "sample_000004", + "target_total": 5, + "target_people": 1, + "target_objects": 4, + "canvas_size": [ + 864, + 1152 + ], + "canvas_aspect_ratio": "3:4", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 5, + "n_detected": 5, + "n_subjects": 3, + "subjects": [ + { + "name": "blue_light_source", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_98001.npy:object:0", + "source_name": "blue light source", + "source_description": "A small, blurry blue light, appearing as a faint glow against the dark background. Source dataset: Ego4D. Scene context: A predominantly dark scene with a single, small, indistinct blue light source visible towards the right side.", + "sub_caption": "blue light source: A small, blurry blue light emitting a faint, cool glow against the darkness.. Scene role: Positioned in the deep background to the left, providing an atmospheric back-light and contrasting with the red lighting.", + "measured_bbox": [ + 0.5155, + 0.3724, + 0.553, + 0.4239 + ], + "detection_confidence": "high", + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light_source.png", + "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light_source.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_blue_light_source_attempt_01.png", + "output": "references/ref_blue_light_source.png", + "mask": "references/sam_mask_blue_light_source.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 132.0, + 159.0, + 891.0, + 864.0 + ], + "mask_score": 3.485053, + "mask_area_ratio": 0.292151, + "elapsed_seconds": 9.6106 + } + }, + { + "name": "paint_roller", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_95307.npy:object:0", + "source_name": "paint roller", + "source_description": "A standard paint roller with a dark cylindrical cover and a metal frame connecting to a handle. Source dataset: Ego4D. Scene context: A close-up view of a paint roller against a light-colored wall in a dimly lit setting.", + "sub_caption": "paint roller: A standard paint roller with a dark cylindrical cover, attached to a metal frame and handle.. Scene role: Resting on a nearby surface in the foreground, catching subtle highlights from the red light.", + "measured_bbox": [ + 0.2917, + 0.532, + 0.4405, + 0.7522 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_paint_roller.png", + "raw_ref_image": "references/raw_ref_paint_roller_attempt_01.png", + "reference_verify": "references/reference_verify_paint_roller.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_paint_roller_attempt_01.png", + "output": "references/ref_paint_roller.png", + "mask": "references/sam_mask_paint_roller.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 136.0, + 75.0, + 866.0, + 950.0 + ], + "mask_score": 3.459168, + "mask_area_ratio": 0.116944, + "elapsed_seconds": 7.1624 + } + }, + { + "name": "draped_tarp", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_37499.npy:object:1", + "source_name": "light-colored material", + "source_description": "A draped, light-colored or beige material, possibly part of a tent or covering, visible on the right side. Source dataset: Ego4D. Scene context: The scene is a dark, possibly outdoor or dimly lit setting, showing what appears to be a structure or object covered with a large piece of material or tarp.", + "sub_caption": "light-colored material: A draped, light-colored tarp or drop cloth covering large, indistinct shapes.. Scene role: Draped over items on the right side of the room, adding textural detail and reflecting the mixed ambient lighting.", + "measured_bbox": [ + 0.392, + 0.4207, + 0.8243, + 0.8862 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_draped_tarp.png", + "raw_ref_image": "references/raw_ref_draped_tarp_attempt_01.png", + "reference_verify": "references/reference_verify_draped_tarp.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000004/references/raw_ref_draped_tarp_attempt_01.png", + "output": "references/ref_draped_tarp.png", + "mask": "references/sam_mask_draped_tarp.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 189.0, + 1023.0, + 888.0 + ], + "mask_score": 3.482188, + "mask_area_ratio": 0.485075, + "elapsed_seconds": 7.4131 + } + } + ], + "not_emitted": [ + { + "name": "worker_in_cap", + "reason": "not_detected" + }, + { + "name": "stacked_boxes", + "reason": "not_detected" + } + ], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/vocab_task.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/vocab_task.json new file mode 100644 index 0000000000000000000000000000000000000000..3e87e496e1e3661e1f0119d06c591eb5f21bb786 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000004/vocab_task.json @@ -0,0 +1,84 @@ +{ + "task_id": "sample_000004", + "sample_id": "sample_000004", + "sample_index": 4, + "target_total": 5, + "target_people": 1, + "target_objects": 4, + "people_candidates": [ + { + "candidate_index": 0, + "source_offset": 956, + "image_id": "Ego4D:ego4d_video/EGO_50052.npy:person:0", + "name": "person", + "description": "A person whose face is partially visible in the center, wearing a cap, and cast in reddish light. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a cap, with only their face partially visible in dim lighting." + }, + { + "candidate_index": 1, + "source_offset": 300, + "image_id": "Ego4D:ego4d_video/EGO_202339.npy:person:0", + "name": "person", + "description": "Only the hands are visible, illuminated by a strong red light, holding and manipulating a paintbrush. Source dataset: Ego4D. Scene context: A close-up view of hands holding a paintbrush under a strong red light in a very dark environment." + } + ], + "object_candidates": [ + { + "candidate_index": 0, + "source_offset": 9807, + "image_id": "Ego4D:ego4d_video/EGO_98001.npy:object:0", + "name": "blue light source", + "description": "A small, blurry blue light, appearing as a faint glow against the dark background. Source dataset: Ego4D. Scene context: A predominantly dark scene with a single, small, indistinct blue light source visible towards the right side." + }, + { + "candidate_index": 1, + "source_offset": 8874, + "image_id": "Ego4D:ego4d_video/EGO_76305.npy:object:0", + "name": "small object", + "description": "A dark, small object held between the hands, details are indistinguishable due to low light. Source dataset: Ego4D. Scene context: A very dark image showing hands illuminated by red light holding what appears to be a small tool or device." + }, + { + "candidate_index": 2, + "source_offset": 9290, + "image_id": "Ego4D:ego4d_video/EGO_95307.npy:object:0", + "name": "paint roller", + "description": "A standard paint roller with a dark cylindrical cover and a metal frame connecting to a handle. Source dataset: Ego4D. Scene context: A close-up view of a paint roller against a light-colored wall in a dimly lit setting." + }, + { + "candidate_index": 3, + "source_offset": 1471, + "image_id": "EPIC-Kitchens:P06_101:object:6", + "name": "kitchen floor tiles", + "description": "The floor in the foreground features intricate, geometric, multi-colored tiles (brown, beige, red, black), while the floor visible through the doorway consists of dark grey or black square tiles. Source dataset: EPIC-Kitchens. Scene context: A view of a kitchen showing a refrigerator, a washing machine, a cardboard box, and patterned floor tiles leading into another room." + }, + { + "candidate_index": 4, + "source_offset": 8045, + "image_id": "Ego4D:ego4d_video/EGO_37499.npy:object:1", + "name": "light-colored material", + "description": "A draped, light-colored or beige material, possibly part of a tent or covering, visible on the right side. Source dataset: Ego4D. Scene context: The scene is a dark, possibly outdoor or dimly lit setting, showing what appears to be a structure or object covered with a large piece of material or tarp." + }, + { + "candidate_index": 5, + "source_offset": 8227, + "image_id": "Ego4D:ego4d_video/EGO_39255.npy:object:1", + "name": "stacked boxes", + "description": "A tall stack of rectangular boxes or containers against the left wall, dimly lit with a blue hue. Source dataset: Ego4D. Scene context: A dark, possibly industrial or storage space containing rows of stacked egg cartons illuminated by dim, cool-toned light." + }, + { + "candidate_index": 6, + "source_offset": 2434, + "image_id": "EPIC-Kitchens:P26_102:object:10", + "name": "floor", + "description": "Dark, possibly tiled flooring visible beneath and around the table. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a table covered in a blue and green plaid tablecloth, holding various items like bottles and cans, and a dark chair nearby." + }, + { + "candidate_index": 7, + "source_offset": 2681, + "image_id": "EPIC-Kitchens:P26_122:object:0", + "name": "bowl of eggs", + "description": "A glass bowl containing three cracked eggs. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter with a bowl of eggs, a cutting board, a knife, a fork, and a stovetop." + } + ], + "rng_seed": 1782346909, + "created_at": 1782259667.639344 +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/bbox_overlay.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/bbox_overlay.png new file mode 100644 index 0000000000000000000000000000000000000000..a5bdef570489686809f460b1cce73fc8de09abae --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/bbox_overlay.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0c98cb13cd986d8d8053ef788f3508cc76bf87761493f2d44a5f0d179550db9 +size 1281427 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/compose_prompt.txt b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/compose_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..f961a4ba2db1dbe6893fb905ecc4ceb6f193baaa --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/compose_prompt.txt @@ -0,0 +1,83 @@ +Render the following JSON scene specification as a photorealistic 1152x864 image using a true 4:3 canvas. Every listed person and object must appear visibly in the image. Keep normal proportions and the requested aspect ratio. The foreground must contain only subjects explicitly listed in the JSON scene specification. Do not add any unlisted foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects. Background context may include non-localizable scenery only when it does not introduce a distinct foreground subject. No text, no labels, no logos, no watermarks. +JSON scene specification: +{ + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1152, + 864 + ], + "aspect_ratio": "4:3", + "style": "photorealistic" + }, + "scene": { + "setting": "A cozy, dimly lit kitchen with a wooden table in the foreground, and a counter against a blue wall in the background", + "activity": "People are collaborating to prepare a meal; hands are actively preparing food in the foreground, another person reaches for an ingredient, and a third person stands ready holding a pan.", + "composition": "First-person or close over-the-shoulder framing. The bottom of the frame is dominated by hands preparing food. A hand enters from the side reaching toward the center. The midground features the torso and legs of a standing person and a kitchen counter. The background shows a blue wall with a wooden cabinet on the upper right.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 4:3 composition", + "final canvas size 1152x864", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_reaching", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_232131.npy:person:0", + "source_name": "person", + "description": "A person wearing dark clothing, partially visible, with a hand reaching towards a bright red object on the table.", + "role_in_scene": "Reaching for an ingredient on the wooden table" + }, + { + "name": "person_standing", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "description": "A person standing by a blue wall, seen from the torso down, holding a dark round object resembling a pan.", + "role_in_scene": "Standing in the midground waiting with a pan" + }, + { + "name": "person_preparing_food", + "source_index": 4, + "source_image_id": "EPIC-Kitchens:P11_106:person:0", + "source_name": "person", + "description": "Hands and lower arms of a person visible at the bottom edge of the frame, actively preparing food on a wooden surface.", + "role_in_scene": "Working in the immediate foreground, anchoring the perspective" + } + ], + "objects": [ + { + "name": "cardboard_box", + "source_index": 2, + "source_image_id": "EPIC-Kitchens:P35_102:object:1", + "source_name": "box of salt", + "description": "A small, plain cardboard box without any text, sitting clearly on the kitchen counter.", + "role_in_scene": "Resting on the midground counter" + }, + { + "name": "cutlery_on_plate", + "source_index": 4, + "source_image_id": "EPIC-Kitchens:P26_103:object:4", + "source_name": "cutlery", + "description": "A silver fork and a knife resting on a yellow plate.", + "role_in_scene": "Sitting on the wooden table near the food preparation area" + }, + { + "name": "wooden_cabinet", + "source_index": 5, + "source_image_id": "EPIC-Kitchens:P26_121:object:12", + "source_name": "cabinet", + "description": "A wooden kitchen cabinet positioned above the counter.", + "role_in_scene": "Hanging on the wall in the upper right background" + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_cutlery_set.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_cutlery_set.png new file mode 100644 index 0000000000000000000000000000000000000000..d63a97be5a6ffb0dd4df741e6b9370be0aef898d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_cutlery_set.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81706818bd763d12ef2fa7f0c8b855d6dfbc74db4b554b8b66c646d59b2faf9a +size 110391 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_person_in_jacket.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_person_in_jacket.png new file mode 100644 index 0000000000000000000000000000000000000000..4d879e7a0638e4ba5bff07817ffc97ed5ec334e8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_person_in_jacket.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d240e93153aaa369155c8c83a4466aae249542d70023704a6cdb073df60d8b80 +size 385427 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_person_with_pan.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_person_with_pan.png new file mode 100644 index 0000000000000000000000000000000000000000..e206057cab636ee8298e8fab3b4b3d9cbfd9bc58 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_person_with_pan.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d18cd2a6442887578d675909b9d0f879bcfa67d188cf3ac3125180d3ec4d6f1d +size 381602 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_pizza_prep_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_pizza_prep_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..2d55e3beb05d383a5362460f19ca56d678bb9e8a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_pizza_prep_hands.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5ae8ffa508d048eddcb8e6637d34b5f770ed707ff2a71e99250d3d26e8c543 +size 438730 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_salt_box.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_salt_box.png new file mode 100644 index 0000000000000000000000000000000000000000..9f09b3b7717ff36ebfde8068c53a364b6be0d548 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_salt_box.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_wall_cabinet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_wall_cabinet.png new file mode 100644 index 0000000000000000000000000000000000000000..965d35d3d42fe029682b5c99d35ae2bdb3482b8c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/detect_refine_wall_cabinet.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b46b669cb7bd4acb066c386294f8c3a835a41d5d57faca91e7ece0bd8a6233 +size 110418 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_cutlery_set.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_cutlery_set.png new file mode 100644 index 0000000000000000000000000000000000000000..c79fdd5500cf3a441bc749bcc6e9686960851fbb --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_cutlery_set.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9350e58a91f406a340d2aa5aecf5db7d5bbef905c23c58ddc28b620955537f40 +size 134200 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_person_in_jacket.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_person_in_jacket.png new file mode 100644 index 0000000000000000000000000000000000000000..3c3c946225d6fc39380ecf6afc5e52f034238390 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_person_in_jacket.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cbe3ae698c6aa56f2e0fc8de98daaee5cdad67165970c73591e680e86521949 +size 602974 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_person_with_pan.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_person_with_pan.png new file mode 100644 index 0000000000000000000000000000000000000000..acb2e8f8d88bc0ed6abbe106d9e75772bebcc7c8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_person_with_pan.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e95a55c02e1ffbde7cd528f8254dfd7964f56fafc13aca9bb602842338d4146 +size 658623 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_pizza_prep_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_pizza_prep_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..99e76ab29ff96ebfd11762f3b78d9a774778ee8b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_pizza_prep_hands.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1618f39bb0e905217d400119e6bbbe528b3acceaa87c4b068233cbc2c795edbb +size 827303 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_salt_box.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_salt_box.png new file mode 100644 index 0000000000000000000000000000000000000000..bba5420ea33c08b4deaf9f59b53b29e95dba2e02 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_salt_box.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_wall_cabinet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_wall_cabinet.png new file mode 100644 index 0000000000000000000000000000000000000000..b67c4eafd9c47de144387c169de7bb089e23c83f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/crops/diversify_input_wall_cabinet.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31e5dddd4ed41153cec8a577a5b47cfac3a461a2726ad714cb638ecea3e53ac6 +size 134547 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/detections.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/detections.json new file mode 100644 index 0000000000000000000000000000000000000000..bece6a5d051596b65be04831bf5a0a2a8d4662b7 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/detections.json @@ -0,0 +1,116 @@ +[ + { + "name": "pizza_prep_hands", + "present": true, + "bbox": [ + 0.532, + 0.2245, + 1.0, + 0.7871 + ], + "confidence": 0.9, + "notes": "Hands and lower arms visible preparing food (cutting vegetables) on a wooden surface.", + "coarse_bbox": [ + 0.526, + 0.275, + 0.988, + 0.791 + ], + "refine_crop": "crops/detect_refine_pizza_prep_hands.png" + }, + { + "name": "person_in_jacket", + "present": true, + "bbox": [ + 0.0, + 0.0, + 0.361, + 0.675 + ], + "confidence": 1.0, + "notes": "refine failed; using coarse bbox", + "coarse_bbox": [ + 0.0, + 0.0, + 0.361, + 0.675 + ], + "refine_crop": "crops/detect_refine_person_in_jacket.png" + }, + { + "name": "person_with_pan", + "present": true, + "bbox": [ + 0.1195, + 0.0021, + 0.4483, + 0.6302 + ], + "confidence": 0.95, + "notes": "The person standing in the background holding a pan.", + "coarse_bbox": [ + 0.116, + 0.0, + 0.448, + 0.632 + ], + "refine_crop": "crops/detect_refine_person_with_pan.png" + }, + { + "name": "salt_box", + "present": true, + "bbox": [ + 0.5558, + 0.4006, + 0.6966, + 0.4937 + ], + "confidence": 0.5, + "notes": "A plain cardboard box is visible instead of a patterned salt box, but it fits the broader category of 'box'.", + "coarse_bbox": [ + 0.556, + 0.4, + 0.697, + 0.497 + ], + "refine_crop": "crops/detect_refine_salt_box.png" + }, + { + "name": "cutlery_set", + "present": true, + "bbox": [ + 0.1913, + 0.8329, + 0.6116, + 0.9636 + ], + "confidence": "high", + "notes": "Both the fork and the knife resting on the yellow plate are visible and bounded by this box.", + "coarse_bbox": [ + 0.195, + 0.834, + 0.608, + 0.971 + ], + "refine_crop": "crops/detect_refine_cutlery_set.png" + }, + { + "name": "wall_cabinet", + "present": true, + "bbox": [ + 0.5897, + 0.0017, + 0.9469, + 0.1735 + ], + "confidence": 0.99, + "notes": "A tight axis-aligned box around the wooden wall cabinet visible in the crop.", + "coarse_bbox": [ + 0.591, + 0.0, + 0.946, + 0.173 + ], + "refine_crop": "crops/detect_refine_wall_cabinet.png" + } +] diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/main_image.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/main_image.png new file mode 100644 index 0000000000000000000000000000000000000000..e16562d40b05ae64708a4a1cd6fbda497f870f49 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/main_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db79e1c803e696ef48b420ece0b5b5bbb4eba95cf8c352aae7e87834030afc59 +size 1348521 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/plan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/plan.json new file mode 100644 index 0000000000000000000000000000000000000000..6c6cb5c110903081fb7f8c884d0c0cc84d1d2010 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/plan.json @@ -0,0 +1,160 @@ +{ + "sample_id": "sample_000005", + "target_total": 6, + "target_people": 3, + "target_objects": 3, + "compose_prompt": { + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1152, + 864 + ], + "aspect_ratio": "4:3", + "style": "photorealistic" + }, + "scene": { + "setting": "A cozy, dimly lit home kitchen with wooden counters and a blue accent wall in the background.", + "activity": "Several people are engaged in meal preparation; one is actively making pizza in the foreground, another faces the cabinets, and a third stands holding a round pan in the background.", + "composition": "First-person perspective or extreme close-up on hands in the bottom foreground working on a pizza, with a box of salt and cutlery on a plate nearby. In the midground, a wooden cabinet hangs on the wall with a person standing before it. The background features another person near a blue wall to provide layered depth.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 4:3 composition", + "final canvas size 1152x864", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "pizza_prep_hands", + "source_index": 4, + "source_image_id": "EPIC-Kitchens:P11_106:person:0", + "source_name": "person", + "description": "Hands and lower arms visible, preparing a pizza on a wooden surface.", + "role_in_scene": "In the foreground, actively making a pizza." + }, + { + "name": "person_in_jacket", + "source_index": 5, + "source_image_id": "Ego4D:ego4d_video/EGO_22577.npy:person:0", + "source_name": "person", + "description": "Person seen mostly from the back, dark hair, wearing a dark jacket over a red shirt with yellow abstract patterns.", + "role_in_scene": "Standing in the midground, facing away toward the kitchen cabinets." + }, + { + "name": "person_with_pan", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "description": "Person seen from the torso down, holding a dark round object.", + "role_in_scene": "Standing in the background near a blue wall." + } + ], + "objects": [ + { + "name": "salt_box", + "source_index": 2, + "source_image_id": "EPIC-Kitchens:P35_102:object:1", + "source_name": "box of salt", + "description": "A small cardboard box with blue and black graphical patterns, resembling a salt container.", + "role_in_scene": "Resting on the wooden counter next to the pizza prep area." + }, + { + "name": "cutlery_set", + "source_index": 4, + "source_image_id": "EPIC-Kitchens:P26_103:object:4", + "source_name": "cutlery", + "description": "A silver fork and a knife resting on a yellow plate.", + "role_in_scene": "Placed on the counter in the foreground left." + }, + { + "name": "wall_cabinet", + "source_index": 5, + "source_image_id": "EPIC-Kitchens:P26_121:object:12", + "source_name": "cabinet", + "description": "A wooden cabinet positioned above a kitchen counter.", + "role_in_scene": "Mounted on the wall in the midground, directly above the counter." + } + ] + }, + "expected_subjects": [ + { + "name": "pizza_prep_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "EPIC-Kitchens:P11_106:person:0", + "source_name": "person", + "source_description": "The person's hands and lower arms are visible at the bottom of the frame, appearing to be in the middle of preparing food. Source dataset: EPIC-Kitchens. Scene context: A top-down view of a wooden kitchen table where two pizzas are being prepared with various ingredients like red onions, mushrooms, and tomatoes.", + "sub_caption": "person: Hands and lower arms visible, preparing a pizza on a wooden surface.. Scene role: In the foreground, actively making a pizza.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "person_in_jacket", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_22577.npy:person:0", + "source_name": "person", + "source_description": "A person visible mostly from the back, wearing a dark jacket over a red shirt with yellow text that includes the words 'STANLEY CUP'. The person has dark hair. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a red shirt with yellow text, seemingly engaged in an activity in a dimly lit indoor setting.", + "sub_caption": "person: Person seen mostly from the back, dark hair, wearing a dark jacket over a red shirt with yellow abstract patterns.. Scene role: Standing in the midground, facing away toward the kitchen cabinets.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "person_with_pan", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "source_description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall.", + "sub_caption": "person: Person seen from the torso down, holding a dark round object.. Scene role: Standing in the background near a blue wall.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "salt_box", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P35_102:object:1", + "source_name": "box of salt", + "source_description": "A small cardboard box with blue and black text, sitting on the counter near the stove. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink containing dirty dishes, a frying pan on a drying rack, and various cooking utensils.", + "sub_caption": "box of salt: A small cardboard box with blue and black graphical patterns, resembling a salt container.. Scene role: Resting on the wooden counter next to the pizza prep area.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "cutlery_set", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_103:object:4", + "source_name": "cutlery", + "source_description": "A silver fork and a knife resting on the yellow plate inside the right sink bowl. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground.", + "sub_caption": "cutlery: A silver fork and a knife resting on a yellow plate.. Scene role: Placed on the counter in the foreground left.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "wall_cabinet", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_121:object:12", + "source_name": "cabinet", + "source_description": "A wooden cabinet positioned above the counter on the right side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter top with a stove, a pan, a bowl of yellow liquid, a wooden cutting board, a plate of food, a water bottle, a large water jug, and a small metal lid.", + "sub_caption": "cabinet: A wooden cabinet positioned above a kitchen counter.. Scene role: Mounted on the wall in the midground, directly above the counter.", + "ref_style": "white_bg_encyclopedia_photo" + } + ], + "vocab_task_path": "sample_000005/vocab_task.json", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references.json new file mode 100644 index 0000000000000000000000000000000000000000..634870e35e6695b226a92bde7ff323dce722972b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references.json @@ -0,0 +1,197 @@ +{ + "references": [ + { + "name": "pizza_prep_hands", + "ref_image": "references/ref_pizza_prep_hands.png", + "raw_ref_image": "references/raw_ref_pizza_prep_hands_attempt_02.png", + "diversify_input": "crops/diversify_input_pizza_prep_hands.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_pizza_prep_hands_attempt_02.png", + "output": "references/ref_pizza_prep_hands.png", + "mask": "references/sam_mask_pizza_prep_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 349.0, + 14.0, + 677.0, + 1016.0 + ], + "mask_score": 3.451951, + "mask_area_ratio": 0.150558, + "elapsed_seconds": 10.2196 + }, + "reference_verify": "references/reference_verify_pizza_prep_hands.json", + "reference_verify_passed": true, + "reference_attempts": 2 + }, + { + "name": "person_in_jacket", + "ref_image": "references/ref_person_in_jacket.png", + "raw_ref_image": "references/raw_ref_person_in_jacket_attempt_02.png", + "diversify_input": "crops/diversify_input_person_in_jacket.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_in_jacket_attempt_02.png", + "output": "references/ref_person_in_jacket.png", + "mask": "references/sam_mask_person_in_jacket.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 334.0, + 15.0, + 684.0, + 1023.0 + ], + "mask_score": 3.414528, + "mask_area_ratio": 0.160464, + "elapsed_seconds": 10.2925 + }, + "reference_verify": "references/reference_verify_person_in_jacket.json", + "reference_verify_passed": true, + "reference_attempts": 2 + }, + { + "name": "person_with_pan", + "ref_image": "references/ref_person_with_pan.png", + "raw_ref_image": "references/raw_ref_person_with_pan_attempt_02.png", + "diversify_input": "crops/diversify_input_person_with_pan.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_with_pan_attempt_02.png", + "output": "references/ref_person_with_pan.png", + "mask": "references/sam_mask_person_with_pan.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 334.0, + 18.0, + 690.0, + 1023.0 + ], + "mask_score": 3.408831, + "mask_area_ratio": 0.163625, + "elapsed_seconds": 10.3028 + }, + "reference_verify": "references/reference_verify_person_with_pan.json", + "reference_verify_passed": true, + "reference_attempts": 2 + }, + { + "name": "salt_box", + "ref_image": "references/ref_salt_box.png", + "raw_ref_image": "references/raw_ref_salt_box_attempt_01.png", + "diversify_input": "crops/diversify_input_salt_box.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_salt_box_attempt_01.png", + "output": "references/ref_salt_box.png", + "mask": "references/sam_mask_salt_box.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 73.0, + 268.0, + 952.0, + 762.0 + ], + "mask_score": 3.471932, + "mask_area_ratio": 0.353847, + "elapsed_seconds": 10.266 + }, + "reference_verify": "references/reference_verify_salt_box.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "cutlery_set", + "ref_image": "references/ref_cutlery_set.png", + "raw_ref_image": "references/raw_ref_cutlery_set_attempt_02.png", + "diversify_input": "crops/diversify_input_cutlery_set.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_cutlery_set_attempt_02.png", + "output": "references/ref_cutlery_set.png", + "mask": "references/sam_mask_cutlery_set.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 28.0, + 122.0, + 1013.0, + 887.0 + ], + "mask_score": 3.237995, + "mask_area_ratio": 0.452152, + "elapsed_seconds": 10.821 + }, + "reference_verify": "references/reference_verify_cutlery_set.json", + "reference_verify_passed": true, + "reference_attempts": 2 + }, + { + "name": "wall_cabinet", + "ref_image": "references/ref_wall_cabinet.png", + "raw_ref_image": "references/raw_ref_wall_cabinet_attempt_01.png", + "diversify_input": "crops/diversify_input_wall_cabinet.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_wall_cabinet_attempt_01.png", + "output": "references/ref_wall_cabinet.png", + "mask": "references/sam_mask_wall_cabinet.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 15.0, + 31.0, + 1008.0, + 1013.0 + ], + "mask_score": 3.398914, + "mask_area_ratio": 0.735002, + "elapsed_seconds": 10.923 + }, + "reference_verify": "references/reference_verify_wall_cabinet.json", + "reference_verify_passed": true, + "reference_attempts": 1 + } + ], + "reference_errors": {} +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_cutlery_set.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_cutlery_set.png new file mode 100644 index 0000000000000000000000000000000000000000..ad5835015e78b6cd1b7e0f774c461b81e65f4a3d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_cutlery_set.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836775a2e95d6b18de2773271d37b6c0a2e08a5b8317b4018ffc40393e5f0062 +size 768959 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_person_in_jacket.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_person_in_jacket.png new file mode 100644 index 0000000000000000000000000000000000000000..5c0cfea3dec57d822b09ef50a33d32bb331ac508 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_person_in_jacket.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8a44628091aaca5f5fcb8f8b6c65985998c8d0865c00eaeee08dc6a27e416f5 +size 312064 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_person_with_pan.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_person_with_pan.png new file mode 100644 index 0000000000000000000000000000000000000000..5e9ae12fb45220f0243ae05acac90bf2577c5c90 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_person_with_pan.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d5482244c233777a04b27d584d058c313b77af14c6fe92d7fd2c6381e92203 +size 327906 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_pizza_prep_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_pizza_prep_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..d9dec89522d36d26c688350f3628f9180f5390a3 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_pizza_prep_hands.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:920c5878cd7a10c1ba7732eb4daa6f4a5eb733d1192d04cf48772cb20dc04e6f +size 273534 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_salt_box.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_salt_box.png new file mode 100644 index 0000000000000000000000000000000000000000..21f1ec0131ac4e52c9ac77513392c706f7ef6c1f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_salt_box.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85fb5d5a928fbe57d926f5561263febdc322811dcaa161001b41eaf345e18128 +size 584444 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_wall_cabinet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_wall_cabinet.png new file mode 100644 index 0000000000000000000000000000000000000000..c61bb0e725be1d16e98bc9004a855b2176934fcb --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/ref_wall_cabinet.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63b54b0e39b647515f7601af1b8eb09ef6ffe824370e886f91ed96751bfe23b +size 1175498 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_cutlery_set.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_cutlery_set.json new file mode 100644 index 0000000000000000000000000000000000000000..813aef8f278670c1caecc4ce6d66cdab0e717fd0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_cutlery_set.json @@ -0,0 +1,87 @@ +{ + "name": "cutlery_set", + "passed": true, + "accepted_attempt": 2, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_cutlery_set_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_cutlery_set_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_cutlery_set_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_cutlery_set_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_ref_cutlery_set_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_sam_mask_cutlery_set_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 22.0, + 245.0, + 1002.0, + 809.0 + ], + "mask_score": 3.134715, + "mask_area_ratio": 0.234831, + "elapsed_seconds": 10.0521 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": false, + "single_main_subject": false, + "white_background": true, + "failure_reasons": [ + "The intended subject (cutlery) is completely missing, replaced by white silhouettes." + ], + "notes": "The image shows a yellow plate, but the silver fork and knife have been cut out from the image, leaving only their silhouettes as empty space." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_cutlery_set_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_cutlery_set_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_cutlery_set_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_cutlery_set_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_ref_cutlery_set_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_sam_mask_cutlery_set_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 28.0, + 122.0, + 1013.0, + 887.0 + ], + "mask_score": 3.237995, + "mask_area_ratio": 0.452152, + "elapsed_seconds": 10.821 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a silver fork and knife resting on a yellow plate against a white background. Minor artifacts are present on the plate but do not affect subject recognizability." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_person_in_jacket.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_person_in_jacket.json new file mode 100644 index 0000000000000000000000000000000000000000..4a38595c0dbaa5c711a65d324bdde2d3f9bac730 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_person_in_jacket.json @@ -0,0 +1,89 @@ +{ + "name": "person_in_jacket", + "passed": true, + "accepted_attempt": 2, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_in_jacket_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_in_jacket_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_in_jacket_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_in_jacket_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_ref_person_in_jacket_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_sam_mask_person_in_jacket_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 339.0, + 8.0, + 685.0, + 1017.0 + ], + "mask_score": 3.091193, + "mask_area_ratio": 0.13997, + "elapsed_seconds": 9.9504 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "severe generation artifacts", + "top of head is missing or truncated", + "parts of the face, torso, and arm are missing due to glitches" + ], + "notes": "The subject has severe generation artifacts causing missing chunks from the head, face, torso, and arm. The top of the head is completely missing/flat." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_person_in_jacket_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_person_in_jacket_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_in_jacket_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_in_jacket_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_ref_person_in_jacket_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_sam_mask_person_in_jacket_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 334.0, + 15.0, + 684.0, + 1023.0 + ], + "mask_score": 3.414528, + "mask_area_ratio": 0.160464, + "elapsed_seconds": 10.2925 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body visible with good margins." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_person_with_pan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_person_with_pan.json new file mode 100644 index 0000000000000000000000000000000000000000..27812900f1f8e1e070a820ad0fdb1ff174c6780b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_person_with_pan.json @@ -0,0 +1,89 @@ +{ + "name": "person_with_pan", + "passed": true, + "accepted_attempt": 2, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_with_pan_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_with_pan_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_with_pan_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_with_pan_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_ref_person_with_pan_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_sam_mask_person_with_pan_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 306.0, + 45.0, + 718.0, + 1023.0 + ], + "mask_score": 0.784811, + "mask_area_ratio": 0.20154, + "elapsed_seconds": 10.1532 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": false, + "white_background": true, + "failure_reasons": [ + "Image generation appears to have failed; subject is mostly blank/washed out", + "Subject is not clearly visible", + "Full body is not visible from head to toe" + ], + "notes": "The image shows only a faint, overexposed silhouette of a person's upper body and is not a valid reference." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_person_with_pan_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_person_with_pan_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_with_pan_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_with_pan_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_ref_person_with_pan_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_sam_mask_person_with_pan_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 334.0, + 18.0, + 690.0, + 1023.0 + ], + "mask_score": 3.408831, + "mask_area_ratio": 0.163625, + "elapsed_seconds": 10.3028 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image contains a full-body view of a person on a white background, satisfying all hard requirements." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_pizza_prep_hands.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_pizza_prep_hands.json new file mode 100644 index 0000000000000000000000000000000000000000..81de9a0cfbe25338ff11e1944976ae029a32c251 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_pizza_prep_hands.json @@ -0,0 +1,89 @@ +{ + "name": "pizza_prep_hands", + "passed": true, + "accepted_attempt": 2, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_pizza_prep_hands_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_pizza_prep_hands_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_pizza_prep_hands_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_pizza_prep_hands_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_ref_pizza_prep_hands_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_sam_mask_pizza_prep_hands_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 266.0, + 18.0, + 757.0, + 1023.0 + ], + "mask_score": 3.20741, + "mask_area_ratio": 0.287721, + "elapsed_seconds": 10.591 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "Not a full-body shot; legs and feet are missing.", + "The person's lower body is cropped by the bottom image boundary.", + "The image is a half-body/upper-body crop, violating the full-body requirement." + ], + "notes": "The image shows a man from the mid-thigh up against a white background, which fails the hard requirement for a complete full-body reference image." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_pizza_prep_hands_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_pizza_prep_hands_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_pizza_prep_hands_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_pizza_prep_hands_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_ref_pizza_prep_hands_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_sam_mask_pizza_prep_hands_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 349.0, + 14.0, + 677.0, + 1016.0 + ], + "mask_score": 3.451951, + "mask_area_ratio": 0.150558, + "elapsed_seconds": 10.2196 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image successfully displays a full-body person with no cropped body parts and an adequate white margin." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_salt_box.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_salt_box.json new file mode 100644 index 0000000000000000000000000000000000000000..ea86e5c98f52f5b76a742d58bf6600708d8b0cac --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_salt_box.json @@ -0,0 +1,46 @@ +{ + "name": "salt_box", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_salt_box_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_salt_box_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_salt_box_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_salt_box_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_ref_salt_box_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_sam_mask_salt_box_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 73.0, + 268.0, + 952.0, + 762.0 + ], + "mask_score": 3.471932, + "mask_area_ratio": 0.353847, + "elapsed_seconds": 10.266 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a plain brown corrugated box rather than one with blue and black patterns, but it is a complete and isolated object on a white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_wall_cabinet.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_wall_cabinet.json new file mode 100644 index 0000000000000000000000000000000000000000..4d12840630db421c97b34a2b872f10ed7617103f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/reference_verify_wall_cabinet.json @@ -0,0 +1,46 @@ +{ + "name": "wall_cabinet", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_wall_cabinet_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_wall_cabinet_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_wall_cabinet_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_wall_cabinet_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_ref_wall_cabinet_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/candidate_sam_mask_wall_cabinet_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 15.0, + 31.0, + 1008.0, + 1013.0 + ], + "mask_score": 3.398914, + "mask_area_ratio": 0.735002, + "elapsed_seconds": 10.923 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The wall cabinet is fully visible and isolated on a white background, with some minor masking artifacts along the bottom edge." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_cutlery_set.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_cutlery_set.png new file mode 100644 index 0000000000000000000000000000000000000000..557954c0635f489b2bb350f0eb2c6e1feb6c3215 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_cutlery_set.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_person_in_jacket.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_person_in_jacket.png new file mode 100644 index 0000000000000000000000000000000000000000..6a7801827db3455cc71befb26b1fda87f0609d94 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_person_in_jacket.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_person_with_pan.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_person_with_pan.png new file mode 100644 index 0000000000000000000000000000000000000000..863f10bb2f1b74c081aabc9f026ba723103d3b51 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_person_with_pan.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_pizza_prep_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_pizza_prep_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..a6315aa2f9b80abd5c12b36a471248eb6e9ba2fa Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_pizza_prep_hands.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_salt_box.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_salt_box.png new file mode 100644 index 0000000000000000000000000000000000000000..de49dc348d1b38affcd93752e3d2bce75bdb27dc Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_salt_box.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_wall_cabinet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_wall_cabinet.png new file mode 100644 index 0000000000000000000000000000000000000000..5ea4eaf70c5c3c670b0617e8c9a14304275679fb Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/references/sam_mask_wall_cabinet.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/row.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/row.json new file mode 100644 index 0000000000000000000000000000000000000000..3fa46fac137429f70b533a51fc6cb221a17a584d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/row.json @@ -0,0 +1,302 @@ +{ + "sample_id": "sample_000005", + "target_total": 6, + "target_people": 3, + "target_objects": 3, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 6, + "n_detected": 6, + "n_subjects": 6, + "subjects": [ + { + "name": "pizza_prep_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "EPIC-Kitchens:P11_106:person:0", + "source_name": "person", + "source_description": "The person's hands and lower arms are visible at the bottom of the frame, appearing to be in the middle of preparing food. Source dataset: EPIC-Kitchens. Scene context: A top-down view of a wooden kitchen table where two pizzas are being prepared with various ingredients like red onions, mushrooms, and tomatoes.", + "sub_caption": "person: Hands and lower arms visible, preparing a pizza on a wooden surface.. Scene role: In the foreground, actively making a pizza.", + "measured_bbox": [ + 0.532, + 0.2245, + 1.0, + 0.7871 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_pizza_prep_hands.png", + "raw_ref_image": "references/raw_ref_pizza_prep_hands_attempt_02.png", + "reference_verify": "references/reference_verify_pizza_prep_hands.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_pizza_prep_hands_attempt_02.png", + "output": "references/ref_pizza_prep_hands.png", + "mask": "references/sam_mask_pizza_prep_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 349.0, + 14.0, + 677.0, + 1016.0 + ], + "mask_score": 3.451951, + "mask_area_ratio": 0.150558, + "elapsed_seconds": 10.2196 + } + }, + { + "name": "person_in_jacket", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_22577.npy:person:0", + "source_name": "person", + "source_description": "A person visible mostly from the back, wearing a dark jacket over a red shirt with yellow text that includes the words 'STANLEY CUP'. The person has dark hair. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a red shirt with yellow text, seemingly engaged in an activity in a dimly lit indoor setting.", + "sub_caption": "person: Person seen mostly from the back, dark hair, wearing a dark jacket over a red shirt with yellow abstract patterns.. Scene role: Standing in the midground, facing away toward the kitchen cabinets.", + "measured_bbox": [ + 0.0, + 0.0, + 0.361, + 0.675 + ], + "detection_confidence": 1.0, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_in_jacket.png", + "raw_ref_image": "references/raw_ref_person_in_jacket_attempt_02.png", + "reference_verify": "references/reference_verify_person_in_jacket.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_in_jacket_attempt_02.png", + "output": "references/ref_person_in_jacket.png", + "mask": "references/sam_mask_person_in_jacket.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 334.0, + 15.0, + 684.0, + 1023.0 + ], + "mask_score": 3.414528, + "mask_area_ratio": 0.160464, + "elapsed_seconds": 10.2925 + } + }, + { + "name": "person_with_pan", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "source_description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall.", + "sub_caption": "person: Person seen from the torso down, holding a dark round object.. Scene role: Standing in the background near a blue wall.", + "measured_bbox": [ + 0.1195, + 0.0021, + 0.4483, + 0.6302 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_with_pan.png", + "raw_ref_image": "references/raw_ref_person_with_pan_attempt_02.png", + "reference_verify": "references/reference_verify_person_with_pan.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_person_with_pan_attempt_02.png", + "output": "references/ref_person_with_pan.png", + "mask": "references/sam_mask_person_with_pan.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 334.0, + 18.0, + 690.0, + 1023.0 + ], + "mask_score": 3.408831, + "mask_area_ratio": 0.163625, + "elapsed_seconds": 10.3028 + } + }, + { + "name": "salt_box", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P35_102:object:1", + "source_name": "box of salt", + "source_description": "A small cardboard box with blue and black text, sitting on the counter near the stove. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink containing dirty dishes, a frying pan on a drying rack, and various cooking utensils.", + "sub_caption": "box of salt: A small cardboard box with blue and black graphical patterns, resembling a salt container.. Scene role: Resting on the wooden counter next to the pizza prep area.", + "measured_bbox": [ + 0.5558, + 0.4006, + 0.6966, + 0.4937 + ], + "detection_confidence": 0.5, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_salt_box.png", + "raw_ref_image": "references/raw_ref_salt_box_attempt_01.png", + "reference_verify": "references/reference_verify_salt_box.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_salt_box_attempt_01.png", + "output": "references/ref_salt_box.png", + "mask": "references/sam_mask_salt_box.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 73.0, + 268.0, + 952.0, + 762.0 + ], + "mask_score": 3.471932, + "mask_area_ratio": 0.353847, + "elapsed_seconds": 10.266 + } + }, + { + "name": "cutlery_set", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_103:object:4", + "source_name": "cutlery", + "source_description": "A silver fork and a knife resting on the yellow plate inside the right sink bowl. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground.", + "sub_caption": "cutlery: A silver fork and a knife resting on a yellow plate.. Scene role: Placed on the counter in the foreground left.", + "measured_bbox": [ + 0.1913, + 0.8329, + 0.6116, + 0.9636 + ], + "detection_confidence": "high", + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_cutlery_set.png", + "raw_ref_image": "references/raw_ref_cutlery_set_attempt_02.png", + "reference_verify": "references/reference_verify_cutlery_set.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_cutlery_set_attempt_02.png", + "output": "references/ref_cutlery_set.png", + "mask": "references/sam_mask_cutlery_set.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 28.0, + 122.0, + 1013.0, + 887.0 + ], + "mask_score": 3.237995, + "mask_area_ratio": 0.452152, + "elapsed_seconds": 10.821 + } + }, + { + "name": "wall_cabinet", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_121:object:12", + "source_name": "cabinet", + "source_description": "A wooden cabinet positioned above the counter on the right side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter top with a stove, a pan, a bowl of yellow liquid, a wooden cutting board, a plate of food, a water bottle, a large water jug, and a small metal lid.", + "sub_caption": "cabinet: A wooden cabinet positioned above a kitchen counter.. Scene role: Mounted on the wall in the midground, directly above the counter.", + "measured_bbox": [ + 0.5897, + 0.0017, + 0.9469, + 0.1735 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wall_cabinet.png", + "raw_ref_image": "references/raw_ref_wall_cabinet_attempt_01.png", + "reference_verify": "references/reference_verify_wall_cabinet.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000005/references/raw_ref_wall_cabinet_attempt_01.png", + "output": "references/ref_wall_cabinet.png", + "mask": "references/sam_mask_wall_cabinet.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 15.0, + 31.0, + 1008.0, + 1013.0 + ], + "mask_score": 3.398914, + "mask_area_ratio": 0.735002, + "elapsed_seconds": 10.923 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/vocab_task.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/vocab_task.json new file mode 100644 index 0000000000000000000000000000000000000000..04d2dce90f25d71d9277dee8332df1cae3b93579 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000005/vocab_task.json @@ -0,0 +1,98 @@ +{ + "task_id": "sample_000005", + "sample_id": "sample_000005", + "sample_index": 5, + "target_total": 6, + "target_people": 3, + "target_objects": 3, + "people_candidates": [ + { + "candidate_index": 0, + "source_offset": 366, + "image_id": "Ego4D:ego4d_video/EGO_221297.npy:person:0", + "name": "painter", + "description": "Only the silhouette of an arm and a hand holding a paintbrush is visible against the light surface, seen in low light. Source dataset: Ego4D. Scene context: A close-up view of a person painting a light-colored surface with a brush in a dimly lit setting." + }, + { + "candidate_index": 1, + "source_offset": 430, + "image_id": "Ego4D:ego4d_video/EGO_232131.npy:person:0", + "name": "person", + "description": "Partially visible in the dark, with a hand reaching towards a red object, wearing dark clothing. Source dataset: Ego4D. Scene context: A very dark, low-lit scene showing part of a hand touching what appears to be a red object." + }, + { + "candidate_index": 2, + "source_offset": 966, + "image_id": "Ego4D:ego4d_video/EGO_50078.npy:person:0", + "name": "person", + "description": "Only parts of a person's arms are vaguely visible due to the extreme darkness. One arm is illuminated with a reddish glow, reaching upwards and towards the right. Another arm, or possibly a shoulder, is slightly illuminated with a yellowish glow on the left. Source dataset: Ego4D. Scene context: A very dark, low-visibility scene showing mostly black with some faint, warm-toned shapes, possibly a person's arms reaching out." + }, + { + "candidate_index": 3, + "source_offset": 254, + "image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "name": "person", + "description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall." + }, + { + "candidate_index": 4, + "source_offset": 23, + "image_id": "EPIC-Kitchens:P11_106:person:0", + "name": "person", + "description": "The person's hands and lower arms are visible at the bottom of the frame, appearing to be in the middle of preparing food. Source dataset: EPIC-Kitchens. Scene context: A top-down view of a wooden kitchen table where two pizzas are being prepared with various ingredients like red onions, mushrooms, and tomatoes." + }, + { + "candidate_index": 5, + "source_offset": 389, + "image_id": "Ego4D:ego4d_video/EGO_22577.npy:person:0", + "name": "person", + "description": "A person visible mostly from the back, wearing a dark jacket over a red shirt with yellow text that includes the words 'STANLEY CUP'. The person has dark hair. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a red shirt with yellow text, seemingly engaged in an activity in a dimly lit indoor setting." + } + ], + "object_candidates": [ + { + "candidate_index": 0, + "source_offset": 376, + "image_id": "EPIC-Kitchens:P02_111:object:6", + "name": "purple exercise ball", + "description": "A large, dark purple inflatable exercise ball sitting on the floor. Source dataset: EPIC-Kitchens. Scene context: A high-angle view of a kitchen area showing a washing machine, various cleaning supplies, a large exercise ball, a trash can, and a cardboard box filled with items." + }, + { + "candidate_index": 1, + "source_offset": 3321, + "image_id": "EPIC-Kitchens:P30_107:object:20", + "name": "small bin", + "description": "A smaller grey bin or bucket containing various items, positioned near the bottom left corner. Source dataset: EPIC-Kitchens. Scene context: A high-angle view of a kitchen showing a dark countertop, a white washing machine with an open door, wooden cabinets, grey floor tiles, and a person's leg in the foreground." + }, + { + "candidate_index": 2, + "source_offset": 3590, + "image_id": "EPIC-Kitchens:P35_102:object:1", + "name": "box of salt", + "description": "A small cardboard box with blue and black text, sitting on the counter near the stove. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink containing dirty dishes, a frying pan on a drying rack, and various cooking utensils." + }, + { + "candidate_index": 3, + "source_offset": 7657, + "image_id": "Ego4D:ego4d_video/EGO_293906.npy:object:0", + "name": "dark blue shape", + "description": "A faint, unidentifiable dark blue shape visible against the black background on the right side of the image. Source dataset: Ego4D. Scene context: A predominantly dark scene with a faint, dark blue shape on the right edge and a small, bright green speck within it." + }, + { + "candidate_index": 4, + "source_offset": 2459, + "image_id": "EPIC-Kitchens:P26_103:object:4", + "name": "cutlery", + "description": "A silver fork and a knife resting on the yellow plate inside the right sink bowl. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground." + }, + { + "candidate_index": 5, + "source_offset": 2731, + "image_id": "EPIC-Kitchens:P26_121:object:12", + "name": "cabinet", + "description": "A wooden cabinet positioned above the counter on the right side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter top with a stove, a pan, a bowl of yellow liquid, a wooden cutting board, a plate of food, a water bottle, a large water jug, and a small metal lid." + } + ], + "rng_seed": 1782451638, + "created_at": 1782259667.6546454 +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/bbox_overlay.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/bbox_overlay.png new file mode 100644 index 0000000000000000000000000000000000000000..48bcfe0b3e18acd3347261a4662de9006a206a98 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/bbox_overlay.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59ec8a298adda1696abd5a86834e71c1eb756c5700d79cc81deb92f69432d932 +size 1443868 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/compose_prompt.txt b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/compose_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..7aea19fcae179a52670dc27477a697a90eaa85be --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/compose_prompt.txt @@ -0,0 +1,99 @@ +Render the following JSON scene specification as a photorealistic 1152x864 image using a true 4:3 canvas. Every listed person and object must appear visibly in the image. Keep normal proportions and the requested aspect ratio. The foreground must contain only subjects explicitly listed in the JSON scene specification. Do not add any unlisted foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects. Background context may include non-localizable scenery only when it does not introduce a distinct foreground subject. No text, no labels, no logos, no watermarks. +JSON scene specification: +{ + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1152, + 864 + ], + "aspect_ratio": "4:3", + "style": "photorealistic" + }, + "scene": { + "setting": "A bright, modern kitchen featuring a dark countertop, a stainless steel sink, and light wooden elements.", + "activity": "A person is reaching towards the sink area to prepare vegetables and wash dishes.", + "composition": "A slightly elevated first-person or medium shot focusing on the sink and counter in the foreground. The depth is established by the stove and a closed wooden door in the background.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 4:3 composition", + "final canvas size 1152x864", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_at_sink", + "source_index": 1, + "source_image_id": "EPIC-Kitchens:P26_103:person:0", + "source_name": "person", + "description": "A person's left hand and lower body, including dark trousers and black shoes with white soles, are visible in the foreground, reaching towards the sink.", + "role_in_scene": "Standing at the counter, reaching towards the sink to interact with the kitchen items." + } + ], + "objects": [ + { + "name": "wooden_door", + "source_index": 0, + "source_image_id": "EPIC-Kitchens:P12_104:object:0", + "source_name": "door", + "description": "A light brown wooden door, closed, with a metallic door handle.", + "role_in_scene": "Visible in the background, indicating the exit or entrance to the kitchen." + }, + { + "name": "wrapped_cucumber", + "source_index": 1, + "source_image_id": "EPIC-Kitchens:P37_101:object:1", + "source_name": "cucumber 2", + "description": "A long, green cucumber wrapped in clear plastic, resting horizontally on the dark countertop, slightly overlapping the other cucumber and positioned closer to the viewer.", + "role_in_scene": "Resting on the counter next to the sink, ready for food preparation." + }, + { + "name": "gas_stove", + "source_index": 3, + "source_image_id": "EPIC-Kitchens:P22_105:object:12", + "source_name": "stove", + "description": "A gas stove with black grates on the left side.", + "role_in_scene": "Built into the kitchen counter to the side of the sink area." + }, + { + "name": "water_pitcher", + "source_index": 5, + "source_image_id": "EPIC-Kitchens:P07_107:object:13", + "source_name": "water pitcher", + "description": "A clear plastic water pitcher with a white handle and lid, sitting on the counter.", + "role_in_scene": "Sitting on the back section of the countertop." + }, + { + "name": "silver_spoon", + "source_index": 10, + "source_image_id": "EPIC-Kitchens:P04_103:object:9", + "source_name": "Silver Spoon", + "description": "Small silver metal spoon resting near the black spatula handle.", + "role_in_scene": "Lying on the counter near the edge of the sink." + }, + { + "name": "large_black_pot", + "source_index": 11, + "source_image_id": "EPIC-Kitchens:P28_103:object:2", + "source_name": "large black pot", + "description": "A bulky black pot with a copper-colored bottom sitting upside down in the dish rack.", + "role_in_scene": "Drying upside down in a dish rack positioned next to the sink." + }, + { + "name": "chrome_faucet", + "source_index": 12, + "source_image_id": "EPIC-Kitchens:P28_101:object:4", + "source_name": "faucet", + "description": "A curved, chrome-finished kitchen faucet mounted on the sink.", + "role_in_scene": "Mounted centrally over the stainless steel sink basin." + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_gas_stove.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_gas_stove.png new file mode 100644 index 0000000000000000000000000000000000000000..31683ca428704ab5a398edfadb2893f91a3e0ffd Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_gas_stove.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_large_black_pot.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_large_black_pot.png new file mode 100644 index 0000000000000000000000000000000000000000..eff0d4ed001b153e4ceb93b9c8f982c1f7007234 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_large_black_pot.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699901e8dbe5dad936e6eca4aea02f7b103222b6aa28535ba8d71c958bd31eb5 +size 255947 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_person_at_sink.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_person_at_sink.png new file mode 100644 index 0000000000000000000000000000000000000000..f237e7c901bf20641be45df8a1481d7e9ae932ce --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_person_at_sink.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15fee8fd2269cc21477d8cc6c11b70ba1325006eaa2674d9cd6a46412c5ca1e6 +size 308771 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_silver_spoon.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_silver_spoon.png new file mode 100644 index 0000000000000000000000000000000000000000..077404d0dcb16b41eea8f2e97a0d8035410678e8 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_silver_spoon.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_sink_faucet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_sink_faucet.png new file mode 100644 index 0000000000000000000000000000000000000000..3e19d897fa5b1da01843e5484d91db963c415a23 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_sink_faucet.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_water_pitcher.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_water_pitcher.png new file mode 100644 index 0000000000000000000000000000000000000000..b3bcf07011d3a69fe39ec923256c1aad69fc71ff Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_water_pitcher.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_wooden_door.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_wooden_door.png new file mode 100644 index 0000000000000000000000000000000000000000..a6af4f39b38be0a7c64ff854dc513e0e30870075 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_wooden_door.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c06f8d7a085944206686391e0f789074822e6cf0b154d30844ff41ac7c46ae5 +size 123368 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_wrapped_cucumber.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_wrapped_cucumber.png new file mode 100644 index 0000000000000000000000000000000000000000..eb9c8329b3f2aa0794df7bbbb04faed9b443bb26 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/detect_refine_wrapped_cucumber.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_gas_stove.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_gas_stove.png new file mode 100644 index 0000000000000000000000000000000000000000..8b57ad48ce1d44819ece99c112edda83fc889186 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_gas_stove.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_large_black_pot.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_large_black_pot.png new file mode 100644 index 0000000000000000000000000000000000000000..15851f9867bc08d29b46d6b930278f1540da99b7 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_large_black_pot.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d40da4e95cc97b14e6d131874c53ebeba64c0f160ae3bcd03077bf1718f4c9dd +size 333732 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_person_at_sink.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_person_at_sink.png new file mode 100644 index 0000000000000000000000000000000000000000..3c0139fe6f4b2659e580344df744a881436656f1 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_person_at_sink.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88b84e433b186986a55d30f8cb77419920ab7661aa6a70e6b26fad0bb6e7ed29 +size 439864 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_silver_spoon.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_silver_spoon.png new file mode 100644 index 0000000000000000000000000000000000000000..4abc451f3e579e4ad9cc6000617486b22e9830c1 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_silver_spoon.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_sink_faucet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_sink_faucet.png new file mode 100644 index 0000000000000000000000000000000000000000..7240bb7b53603b33d6fcd022baacf89f9f3a6e00 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_sink_faucet.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ac3e360672b2eaa8d2796f3d408222ad17acc225ea4daf0a129d1ab30324ba +size 118383 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_water_pitcher.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_water_pitcher.png new file mode 100644 index 0000000000000000000000000000000000000000..8689d50628d076aab1433e02a617023e74af90a6 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_water_pitcher.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_wooden_door.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_wooden_door.png new file mode 100644 index 0000000000000000000000000000000000000000..7ba9c096ead17e3c48f1fef70208e8ac67e799fe --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_wooden_door.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf187649f4c110b63b3a24e96b6c043774929b1b56bc93ba00669d59b6de4fe +size 179330 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_wrapped_cucumber.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_wrapped_cucumber.png new file mode 100644 index 0000000000000000000000000000000000000000..fe9919655d49fddba35930462ef3c320430fe57c Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/crops/diversify_input_wrapped_cucumber.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/detections.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/detections.json new file mode 100644 index 0000000000000000000000000000000000000000..d18c27e6c6342cebfd0ee0d703d1acc3c6725edd --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/detections.json @@ -0,0 +1,154 @@ +[ + { + "name": "person_at_sink", + "present": true, + "bbox": [ + 0.0, + 0.3583, + 0.2441, + 1.0 + ], + "confidence": 0.95, + "notes": "Bounding box covers the visible portions of the person: the arm, hand, partial torso, leg, and shoe.", + "coarse_bbox": [ + 0.0, + 0.342, + 0.24, + 0.988 + ], + "refine_crop": "crops/detect_refine_person_at_sink.png" + }, + { + "name": "wooden_door", + "present": true, + "bbox": [ + 0.2139, + 0.0, + 0.3994, + 0.412 + ], + "confidence": 1.0, + "notes": "The light brown wooden door and its frame occupy the majority of the background.", + "coarse_bbox": [ + 0.215, + 0.002, + 0.395, + 0.375 + ], + "refine_crop": "crops/detect_refine_wooden_door.png" + }, + { + "name": "wrapped_cucumber", + "present": true, + "bbox": [ + 0.2215, + 0.4465, + 0.4029, + 0.5104 + ], + "confidence": 0.99, + "notes": "A long, green cucumber wrapped in clear plastic is clearly visible.", + "coarse_bbox": [ + 0.22, + 0.446, + 0.396, + 0.515 + ], + "refine_crop": "crops/detect_refine_wrapped_cucumber.png" + }, + { + "name": "gas_stove", + "present": true, + "bbox": [ + 0.03, + 0.345, + 0.318, + 0.444 + ], + "confidence": 0.99, + "notes": "refine failed; using coarse bbox", + "coarse_bbox": [ + 0.03, + 0.345, + 0.318, + 0.444 + ], + "refine_crop": "crops/detect_refine_gas_stove.png" + }, + { + "name": "water_pitcher", + "present": true, + "bbox": [ + 0.3327, + 0.2732, + 0.4536, + 0.4573 + ], + "confidence": 0.99, + "notes": "Tight bounding box around the clear plastic water pitcher with a white handle and lid.", + "coarse_bbox": [ + 0.332, + 0.275, + 0.454, + 0.456 + ], + "refine_crop": "crops/detect_refine_water_pitcher.png" + }, + { + "name": "silver_spoon", + "present": true, + "bbox": [ + 0.3001, + 0.4801, + 0.3988, + 0.539 + ], + "confidence": 0.99, + "notes": "A tight bounding box around the visible silver spoon resting on the counter.", + "coarse_bbox": [ + 0.303, + 0.482, + 0.4, + 0.539 + ], + "refine_crop": "crops/detect_refine_silver_spoon.png" + }, + { + "name": "large_black_pot", + "present": true, + "bbox": [ + 0.6623, + 0.3017, + 1.0, + 0.6915 + ], + "confidence": 0.98, + "notes": "Tight bounding box placed around the bulky black pot with a copper-colored bottom sitting upside down in the dish rack.", + "coarse_bbox": [ + 0.662, + 0.299, + 1.0, + 0.665 + ], + "refine_crop": "crops/detect_refine_large_black_pot.png" + }, + { + "name": "sink_faucet", + "present": true, + "bbox": [ + 0.4755, + 0.2815, + 0.6395, + 0.5232 + ], + "confidence": 0.95, + "notes": "A curved chrome kitchen faucet is clearly visible and fits the description perfectly.", + "coarse_bbox": [ + 0.471, + 0.279, + 0.63, + 0.523 + ], + "refine_crop": "crops/detect_refine_sink_faucet.png" + } +] diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/main_image.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/main_image.png new file mode 100644 index 0000000000000000000000000000000000000000..ef0a8a7c40f2a3b20a06528284c4e3a6190d78ec --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/main_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee0f68fdcf0d86794ca818ecaaf1eec4e28e156142f2ff725ee013fa479850d4 +size 1516707 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/plan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/plan.json new file mode 100644 index 0000000000000000000000000000000000000000..0e69030c51a7b50abe1a06d02a69dd9e7a58cf74 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/plan.json @@ -0,0 +1,198 @@ +{ + "sample_id": "sample_000006", + "target_total": 8, + "target_people": 1, + "target_objects": 7, + "compose_prompt": { + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1152, + 864 + ], + "aspect_ratio": "4:3", + "style": "photorealistic" + }, + "scene": { + "setting": "A well-lit, slightly messy domestic kitchen featuring dark countertops, a stainless steel sink, light cabinets, and a wooden floor.", + "activity": "A person is standing at the sink reaching for the faucet to wash dishes or prepare food.", + "composition": "First-person perspective looking slightly downward at the sink area. The person's hand and lower body are visible in the foreground. The sink and faucet are central. A dish rack with a pot is on the edge of the sink, while scattered items like a cucumber, pitcher, and spoon rest on the dark countertop. The stove and a closed wooden door form the background.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 4:3 composition", + "final canvas size 1152x864", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_at_sink", + "source_index": 1, + "source_image_id": "EPIC-Kitchens:P26_103:person:0", + "source_name": "person", + "description": "A person's left hand and lower body wearing dark trousers and black shoes with white soles.", + "role_in_scene": "Standing in the foreground, reaching their left hand towards the kitchen faucet." + } + ], + "objects": [ + { + "name": "wooden_door", + "source_index": 0, + "source_image_id": "EPIC-Kitchens:P12_104:object:0", + "source_name": "door", + "description": "A light brown wooden door, closed, with a metallic handle.", + "role_in_scene": "Visible in the background on the kitchen wall." + }, + { + "name": "wrapped_cucumber", + "source_index": 1, + "source_image_id": "EPIC-Kitchens:P37_101:object:1", + "source_name": "cucumber 2", + "description": "A long, green cucumber wrapped in clear plastic.", + "role_in_scene": "Resting horizontally on the dark countertop next to the sink." + }, + { + "name": "gas_stove", + "source_index": 3, + "source_image_id": "EPIC-Kitchens:P22_105:object:12", + "source_name": "stove", + "description": "A kitchen gas stove with black grates.", + "role_in_scene": "Positioned along the counter in the background." + }, + { + "name": "water_pitcher", + "source_index": 5, + "source_image_id": "EPIC-Kitchens:P07_107:object:13", + "source_name": "water pitcher", + "description": "A clear plastic water pitcher with a white handle and lid.", + "role_in_scene": "Sitting upright on the counter." + }, + { + "name": "silver_spoon", + "source_index": 10, + "source_image_id": "EPIC-Kitchens:P04_103:object:9", + "source_name": "Silver Spoon", + "description": "A small silver metal spoon.", + "role_in_scene": "Laying flat on the messy countertop near the cucumber." + }, + { + "name": "black_pot", + "source_index": 11, + "source_image_id": "EPIC-Kitchens:P28_103:object:2", + "source_name": "large black pot", + "description": "A bulky black pot with a copper-colored bottom.", + "role_in_scene": "Sitting upside down in a dish rack near the sink basin." + }, + { + "name": "kitchen_faucet", + "source_index": 12, + "source_image_id": "EPIC-Kitchens:P28_101:object:4", + "source_name": "faucet", + "description": "A curved, chrome-finished kitchen faucet.", + "role_in_scene": "Mounted on the sink, acting as the target of the person's reaching hand." + } + ] + }, + "expected_subjects": [ + { + "name": "person_at_sink", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "EPIC-Kitchens:P26_103:person:0", + "source_name": "person", + "source_description": "A person's left hand and lower body, including dark trousers and black shoes with white soles, are visible in the foreground, reaching towards the sink. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground.", + "sub_caption": "person: A person's left hand and lower body wearing dark trousers and black shoes with white soles.. Scene role: Standing in the foreground, reaching their left hand towards the kitchen faucet.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "wooden_door", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P12_104:object:0", + "source_name": "door", + "source_description": "A light brown wooden door, closed, with a metallic door handle. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a closed wooden door with a metal handle, positioned in a room with a kitchen area visible to the left.", + "sub_caption": "door: A light brown wooden door, closed, with a metallic handle.. Scene role: Visible in the background on the kitchen wall.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "wrapped_cucumber", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P37_101:object:1", + "source_name": "cucumber 2", + "source_description": "A long, green cucumber wrapped in clear plastic, resting horizontally on the dark countertop, slightly overlapping the other cucumber and positioned closer to the viewer. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a person holding two whole cucumbers over a dark kitchen counter, with an onion, garlic, a plastic container, a rice cooker, and a living area in the background.", + "sub_caption": "cucumber 2: A long, green cucumber wrapped in clear plastic.. Scene role: Resting horizontally on the dark countertop next to the sink.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "gas_stove", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P22_105:object:12", + "source_name": "stove", + "source_description": "A gas stove with black grates on the left side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a stove, a sink, various utensils, bottles, and cabinets.", + "sub_caption": "stove: A kitchen gas stove with black grates.. Scene role: Positioned along the counter in the background.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "water_pitcher", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P07_107:object:13", + "source_name": "water pitcher", + "source_description": "A clear plastic water pitcher with a white handle and lid, sitting on the counter. Source dataset: EPIC-Kitchens. Scene context: A narrow kitchen space with a dark floor, light cabinets, a sink counter with various items, a radiator on the wall, and a trash can on the floor.", + "sub_caption": "water pitcher: A clear plastic water pitcher with a white handle and lid.. Scene role: Sitting upright on the counter.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "silver_spoon", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P04_103:object:9", + "source_name": "Silver Spoon", + "source_description": "Small silver metal spoon resting near the black spatula handle. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with a stove, toaster, and various cooking utensils scattered around.", + "sub_caption": "Silver Spoon: A small silver metal spoon.. Scene role: Laying flat on the messy countertop near the cucumber.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "black_pot", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P28_103:object:2", + "source_name": "large black pot", + "source_description": "A bulky black pot with a copper-colored bottom sitting upside down in the dish rack. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area featuring a stainless steel sink, a red dish rack filled with clean dishes, and various cleaning supplies under bright sunlight.", + "sub_caption": "large black pot: A bulky black pot with a copper-colored bottom.. Scene role: Sitting upside down in a dish rack near the sink basin.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "kitchen_faucet", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P28_101:object:4", + "source_name": "faucet", + "source_description": "A curved, chrome-finished kitchen faucet mounted on the sink. Source dataset: EPIC-Kitchens. Scene context: A high-angle view of a kitchen showing a sink area on the left with unwashed dishes, and a light wood floor leading towards cabinets and a refrigerator on the right.", + "sub_caption": "faucet: A curved, chrome-finished kitchen faucet.. Scene role: Mounted on the sink, acting as the target of the person's reaching hand.", + "ref_style": "white_bg_encyclopedia_photo" + } + ], + "vocab_task_path": "sample_000006/vocab_task.json", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references.json new file mode 100644 index 0000000000000000000000000000000000000000..dce723ff080e694d9b3e3c2cf4bb9a175da75423 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references.json @@ -0,0 +1,261 @@ +{ + "references": [ + { + "name": "person_at_sink", + "ref_image": "references/ref_person_at_sink.png", + "raw_ref_image": "references/raw_ref_person_at_sink_attempt_01.png", + "diversify_input": "crops/diversify_input_person_at_sink.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_person_at_sink_attempt_01.png", + "output": "references/ref_person_at_sink.png", + "mask": "references/sam_mask_person_at_sink.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 320.0, + 24.0, + 689.0, + 1002.0 + ], + "mask_score": 3.450755, + "mask_area_ratio": 0.163542, + "elapsed_seconds": 31.0984 + }, + "reference_verify": "references/reference_verify_person_at_sink.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "wooden_door", + "ref_image": "references/ref_wooden_door.png", + "raw_ref_image": "references/raw_ref_wooden_door_attempt_01.png", + "diversify_input": "crops/diversify_input_wooden_door.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wooden_door_attempt_01.png", + "output": "references/ref_wooden_door.png", + "mask": "references/sam_mask_wooden_door.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 256.0, + 26.0, + 767.0, + 1015.0 + ], + "mask_score": 3.446321, + "mask_area_ratio": 0.388947, + "elapsed_seconds": 9.29 + }, + "reference_verify": "references/reference_verify_wooden_door.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "wrapped_cucumber", + "ref_image": "references/ref_wrapped_cucumber.png", + "raw_ref_image": "references/raw_ref_wrapped_cucumber_attempt_01.png", + "diversify_input": "crops/diversify_input_wrapped_cucumber.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wrapped_cucumber_attempt_01.png", + "output": "references/ref_wrapped_cucumber.png", + "mask": "references/sam_mask_wrapped_cucumber.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 33.0, + 397.0, + 1011.0, + 624.0 + ], + "mask_score": 3.430612, + "mask_area_ratio": 0.118574, + "elapsed_seconds": 7.2551 + }, + "reference_verify": "references/reference_verify_wrapped_cucumber.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "gas_stove", + "ref_image": "references/ref_gas_stove.png", + "raw_ref_image": "references/raw_ref_gas_stove_attempt_01.png", + "diversify_input": "crops/diversify_input_gas_stove.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_gas_stove_attempt_01.png", + "output": "references/ref_gas_stove.png", + "mask": "references/sam_mask_gas_stove.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 58.0, + 180.0, + 965.0, + 842.0 + ], + "mask_score": 3.470988, + "mask_area_ratio": 0.477615, + "elapsed_seconds": 7.3908 + }, + "reference_verify": "references/reference_verify_gas_stove.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "water_pitcher", + "ref_image": "references/ref_water_pitcher.png", + "raw_ref_image": "references/raw_ref_water_pitcher_attempt_01.png", + "diversify_input": "crops/diversify_input_water_pitcher.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_water_pitcher_attempt_01.png", + "output": "references/ref_water_pitcher.png", + "mask": "references/sam_mask_water_pitcher.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 242.0, + 115.0, + 856.0, + 972.0 + ], + "mask_score": 3.323768, + "mask_area_ratio": 0.450877, + "elapsed_seconds": 7.2849 + }, + "reference_verify": "references/reference_verify_water_pitcher.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "silver_spoon", + "ref_image": "references/ref_silver_spoon.png", + "raw_ref_image": "references/raw_ref_silver_spoon_attempt_01.png", + "diversify_input": "crops/diversify_input_silver_spoon.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_silver_spoon_attempt_01.png", + "output": "references/ref_silver_spoon.png", + "mask": "references/sam_mask_silver_spoon.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 88.0, + 77.0, + 916.0, + 950.0 + ], + "mask_score": 3.449565, + "mask_area_ratio": 0.092279, + "elapsed_seconds": 7.0866 + }, + "reference_verify": "references/reference_verify_silver_spoon.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "large_black_pot", + "ref_image": "references/ref_large_black_pot.png", + "raw_ref_image": "references/raw_ref_large_black_pot_attempt_01.png", + "diversify_input": "crops/diversify_input_large_black_pot.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_large_black_pot_attempt_01.png", + "output": "references/ref_large_black_pot.png", + "mask": "references/sam_mask_large_black_pot.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 44.0, + 140.0, + 981.0, + 873.0 + ], + "mask_score": 3.362381, + "mask_area_ratio": 0.36548, + "elapsed_seconds": 7.1148 + }, + "reference_verify": "references/reference_verify_large_black_pot.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "sink_faucet", + "ref_image": "references/ref_sink_faucet.png", + "raw_ref_image": "references/raw_ref_sink_faucet_attempt_01.png", + "diversify_input": "crops/diversify_input_sink_faucet.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_sink_faucet_attempt_01.png", + "output": "references/ref_sink_faucet.png", + "mask": "references/sam_mask_sink_faucet.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 143.0, + 51.0, + 881.0, + 967.0 + ], + "mask_score": 3.441042, + "mask_area_ratio": 0.117274, + "elapsed_seconds": 7.17 + }, + "reference_verify": "references/reference_verify_sink_faucet.json", + "reference_verify_passed": true, + "reference_attempts": 1 + } + ], + "reference_errors": {} +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_gas_stove.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_gas_stove.png new file mode 100644 index 0000000000000000000000000000000000000000..078adf5576bb0436cf780884e9031b6290d76c29 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_gas_stove.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea51615f7f98868a427285ef8f54213a207060b1ad5c0b41aa493851ff5eb7b +size 769513 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_large_black_pot.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_large_black_pot.png new file mode 100644 index 0000000000000000000000000000000000000000..0f748d57fecd09d746cdd4cb4ebfdd4bdeb1afa6 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_large_black_pot.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc2ea9070aba1897b2b2a853c574d61023e19ffbe53ad04b4ae2d4450b7df480 +size 722742 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_person_at_sink.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_person_at_sink.png new file mode 100644 index 0000000000000000000000000000000000000000..73e44a272e5555b72e660daea46bbc484c3ccd7d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_person_at_sink.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1046182d109752b5a54d7238b6c07af40bdc82a7c0fb4c623e6ce49ab6152696 +size 309241 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_silver_spoon.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_silver_spoon.png new file mode 100644 index 0000000000000000000000000000000000000000..897371c75953a6b5804c1a80ae41f9ac8ff66e1f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_silver_spoon.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3864a8044d890c004ad293fb7730291d1d505c90c6f6b3f66359cbda63190cc +size 154971 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_sink_faucet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_sink_faucet.png new file mode 100644 index 0000000000000000000000000000000000000000..9bce96ee5938688d0fcb00f95ded192a98ed5ea0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_sink_faucet.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84891e0981b729d23ad07eff4533bad92bfa85302654d7362ddb1ef2427f8a98 +size 245825 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_water_pitcher.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_water_pitcher.png new file mode 100644 index 0000000000000000000000000000000000000000..ebf3228318d6c5382e721ceef28b314d56542df7 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_water_pitcher.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7607987fa88ee9a754d921ac0e14b2190a1f55c629c7ebf309ebfde4526d9d7e +size 610060 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_wooden_door.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_wooden_door.png new file mode 100644 index 0000000000000000000000000000000000000000..7af4e9491c247e8d154b3cddfe90bf76624f3417 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_wooden_door.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce435737dc3302a58a24fb943f909bc928f9f6aa80ed2fc8a66536c1a1f01f7a +size 570198 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_wrapped_cucumber.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_wrapped_cucumber.png new file mode 100644 index 0000000000000000000000000000000000000000..c0b0e71929ed94250d96d4696f79f470f81e9724 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/ref_wrapped_cucumber.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b385615ab1f0d78c4521d4252da1c696cbaf98e001a7eeceb6af2a67104fab +size 273701 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_gas_stove.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_gas_stove.json new file mode 100644 index 0000000000000000000000000000000000000000..96517764fe2b6da6528c0bc06f97a09f3c9cb176 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_gas_stove.json @@ -0,0 +1,46 @@ +{ + "name": "gas_stove", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_gas_stove_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_gas_stove_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_gas_stove_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_gas_stove_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_ref_gas_stove_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_sam_mask_gas_stove_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 58.0, + 180.0, + 965.0, + 842.0 + ], + "mask_score": 3.470988, + "mask_area_ratio": 0.477615, + "elapsed_seconds": 7.3908 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a complete, isolated gas stove with black metal grates on a white background. It meets all hard requirements." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_large_black_pot.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_large_black_pot.json new file mode 100644 index 0000000000000000000000000000000000000000..5ba54b12befeb30e64f6db4948f2e6858fa37cfb --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_large_black_pot.json @@ -0,0 +1,46 @@ +{ + "name": "large_black_pot", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_large_black_pot_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_large_black_pot_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_large_black_pot_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_large_black_pot_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_ref_large_black_pot_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_sam_mask_large_black_pot_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 44.0, + 140.0, + 981.0, + 873.0 + ], + "mask_score": 3.362381, + "mask_area_ratio": 0.36548, + "elapsed_seconds": 7.1148 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a large black pot with a copper-colored bottom, positioned upside down on a white background. It satisfies all hard requirements." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_person_at_sink.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_person_at_sink.json new file mode 100644 index 0000000000000000000000000000000000000000..bb3748504004d346d7c58f6d21780738151d35a0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_person_at_sink.json @@ -0,0 +1,46 @@ +{ + "name": "person_at_sink", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_at_sink_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_at_sink_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_at_sink_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_person_at_sink_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_ref_person_at_sink_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_sam_mask_person_at_sink_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 320.0, + 24.0, + 689.0, + 1002.0 + ], + "mask_score": 3.450755, + "mask_area_ratio": 0.163542, + "elapsed_seconds": 31.0984 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a full body of a single person on a white background without any cropping." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_silver_spoon.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_silver_spoon.json new file mode 100644 index 0000000000000000000000000000000000000000..3f3c343df5899b805fe19971f56aeaed262be269 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_silver_spoon.json @@ -0,0 +1,46 @@ +{ + "name": "silver_spoon", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_silver_spoon_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_silver_spoon_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_silver_spoon_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_silver_spoon_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_ref_silver_spoon_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_sam_mask_silver_spoon_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 88.0, + 77.0, + 916.0, + 950.0 + ], + "mask_score": 3.449565, + "mask_area_ratio": 0.092279, + "elapsed_seconds": 7.0866 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a single silver spoon isolated on a white background. It is complete and not cropped." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_sink_faucet.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_sink_faucet.json new file mode 100644 index 0000000000000000000000000000000000000000..1a1f4800bf73539d2cc7d81efdbd9923ad5d8b7e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_sink_faucet.json @@ -0,0 +1,46 @@ +{ + "name": "sink_faucet", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_sink_faucet_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_sink_faucet_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_sink_faucet_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_sink_faucet_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_ref_sink_faucet_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_sam_mask_sink_faucet_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 143.0, + 51.0, + 881.0, + 967.0 + ], + "mask_score": 3.441042, + "mask_area_ratio": 0.117274, + "elapsed_seconds": 7.17 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a complete, isolated sink faucet on a white background, serving as a good reference for the object." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_water_pitcher.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_water_pitcher.json new file mode 100644 index 0000000000000000000000000000000000000000..e6caa88e48bf1faf6ac92287ec7340f010724dc7 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_water_pitcher.json @@ -0,0 +1,46 @@ +{ + "name": "water_pitcher", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_water_pitcher_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_water_pitcher_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_water_pitcher_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_water_pitcher_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_ref_water_pitcher_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_sam_mask_water_pitcher_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 242.0, + 115.0, + 856.0, + 972.0 + ], + "mask_score": 3.323768, + "mask_area_ratio": 0.450877, + "elapsed_seconds": 7.2849 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The water pitcher is fully visible, not cropped, and sits alone on a white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_wooden_door.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_wooden_door.json new file mode 100644 index 0000000000000000000000000000000000000000..bb81dba1220639bc3a1823aa28ac11b51ec0b531 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_wooden_door.json @@ -0,0 +1,46 @@ +{ + "name": "wooden_door", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_wooden_door_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_wooden_door_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_wooden_door_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wooden_door_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_ref_wooden_door_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_sam_mask_wooden_door_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 256.0, + 26.0, + 767.0, + 1015.0 + ], + "mask_score": 3.446321, + "mask_area_ratio": 0.388947, + "elapsed_seconds": 9.29 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The light brown wooden door with metallic handle is clearly visible, complete, isolated on a white background, and functions as an excellent reference." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_wrapped_cucumber.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_wrapped_cucumber.json new file mode 100644 index 0000000000000000000000000000000000000000..79a210d08b1124473ca55d9ddfc769cfedc644ed --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/reference_verify_wrapped_cucumber.json @@ -0,0 +1,46 @@ +{ + "name": "wrapped_cucumber", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_wrapped_cucumber_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_wrapped_cucumber_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_wrapped_cucumber_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wrapped_cucumber_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_ref_wrapped_cucumber_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/candidate_sam_mask_wrapped_cucumber_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 33.0, + 397.0, + 1011.0, + 624.0 + ], + "mask_score": 3.430612, + "mask_area_ratio": 0.118574, + "elapsed_seconds": 7.2551 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a single complete wrapped cucumber on a white background, which is an acceptable reference." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_gas_stove.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_gas_stove.png new file mode 100644 index 0000000000000000000000000000000000000000..8dab616579a0d38064148df631ade1b97cf61588 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_gas_stove.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_large_black_pot.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_large_black_pot.png new file mode 100644 index 0000000000000000000000000000000000000000..10aaf01e90f1c74f25a42ff184801cfc04190ccb Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_large_black_pot.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_person_at_sink.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_person_at_sink.png new file mode 100644 index 0000000000000000000000000000000000000000..bc0c865d8fa12c44afabad83dd2dd1ed7593e7e0 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_person_at_sink.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_silver_spoon.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_silver_spoon.png new file mode 100644 index 0000000000000000000000000000000000000000..d806cdcba193cd5b85e22f89f249d27e505fbd35 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_silver_spoon.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_sink_faucet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_sink_faucet.png new file mode 100644 index 0000000000000000000000000000000000000000..c2a8dcb5591395ebd0cee48ac1b8fb748532f93a Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_sink_faucet.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_water_pitcher.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_water_pitcher.png new file mode 100644 index 0000000000000000000000000000000000000000..08451f485049cb95ec070d3e74ec35757d9f9fd4 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_water_pitcher.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_wooden_door.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_wooden_door.png new file mode 100644 index 0000000000000000000000000000000000000000..dfdcfd3f26a1d0728bb3e83e420b8cad6d6566ef Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_wooden_door.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_wrapped_cucumber.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_wrapped_cucumber.png new file mode 100644 index 0000000000000000000000000000000000000000..8971cb8777dd2126201666282397cc61e08e2a95 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/references/sam_mask_wrapped_cucumber.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/row.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/row.json new file mode 100644 index 0000000000000000000000000000000000000000..f1a23fe97b9b73fd13a72199ed0ac66fceb1c7b5 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/row.json @@ -0,0 +1,311 @@ +{ + "sample_id": "sample_000006", + "target_total": 8, + "target_people": 1, + "target_objects": 7, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 8, + "n_detected": 8, + "n_subjects": 6, + "subjects": [ + { + "name": "person_at_sink", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "EPIC-Kitchens:P26_103:person:0", + "source_name": "person", + "source_description": "A person's left hand and lower body, including dark trousers and black shoes with white soles, are visible in the foreground, reaching towards the sink. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground.", + "sub_caption": "person: A person's left hand and lower body wearing dark trousers and black shoes with white soles.. Scene role: Standing in the foreground, reaching their left hand towards the kitchen faucet.", + "measured_bbox": [ + 0.0, + 0.3583, + 0.2441, + 1.0 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_at_sink.png", + "raw_ref_image": "references/raw_ref_person_at_sink_attempt_01.png", + "reference_verify": "references/reference_verify_person_at_sink.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_person_at_sink_attempt_01.png", + "output": "references/ref_person_at_sink.png", + "mask": "references/sam_mask_person_at_sink.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 320.0, + 24.0, + 689.0, + 1002.0 + ], + "mask_score": 3.450755, + "mask_area_ratio": 0.163542, + "elapsed_seconds": 31.0984 + } + }, + { + "name": "wooden_door", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P12_104:object:0", + "source_name": "door", + "source_description": "A light brown wooden door, closed, with a metallic door handle. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a closed wooden door with a metal handle, positioned in a room with a kitchen area visible to the left.", + "sub_caption": "door: A light brown wooden door, closed, with a metallic handle.. Scene role: Visible in the background on the kitchen wall.", + "measured_bbox": [ + 0.2139, + 0.0, + 0.3994, + 0.412 + ], + "detection_confidence": 1.0, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wooden_door.png", + "raw_ref_image": "references/raw_ref_wooden_door_attempt_01.png", + "reference_verify": "references/reference_verify_wooden_door.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wooden_door_attempt_01.png", + "output": "references/ref_wooden_door.png", + "mask": "references/sam_mask_wooden_door.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 256.0, + 26.0, + 767.0, + 1015.0 + ], + "mask_score": 3.446321, + "mask_area_ratio": 0.388947, + "elapsed_seconds": 9.29 + } + }, + { + "name": "wrapped_cucumber", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P37_101:object:1", + "source_name": "cucumber 2", + "source_description": "A long, green cucumber wrapped in clear plastic, resting horizontally on the dark countertop, slightly overlapping the other cucumber and positioned closer to the viewer. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a person holding two whole cucumbers over a dark kitchen counter, with an onion, garlic, a plastic container, a rice cooker, and a living area in the background.", + "sub_caption": "cucumber 2: A long, green cucumber wrapped in clear plastic.. Scene role: Resting horizontally on the dark countertop next to the sink.", + "measured_bbox": [ + 0.2215, + 0.4465, + 0.4029, + 0.5104 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wrapped_cucumber.png", + "raw_ref_image": "references/raw_ref_wrapped_cucumber_attempt_01.png", + "reference_verify": "references/reference_verify_wrapped_cucumber.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_wrapped_cucumber_attempt_01.png", + "output": "references/ref_wrapped_cucumber.png", + "mask": "references/sam_mask_wrapped_cucumber.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 33.0, + 397.0, + 1011.0, + 624.0 + ], + "mask_score": 3.430612, + "mask_area_ratio": 0.118574, + "elapsed_seconds": 7.2551 + } + }, + { + "name": "gas_stove", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P22_105:object:12", + "source_name": "stove", + "source_description": "A gas stove with black grates on the left side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a stove, a sink, various utensils, bottles, and cabinets.", + "sub_caption": "stove: A kitchen gas stove with black grates.. Scene role: Positioned along the counter in the background.", + "measured_bbox": [ + 0.03, + 0.345, + 0.318, + 0.444 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_gas_stove.png", + "raw_ref_image": "references/raw_ref_gas_stove_attempt_01.png", + "reference_verify": "references/reference_verify_gas_stove.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_gas_stove_attempt_01.png", + "output": "references/ref_gas_stove.png", + "mask": "references/sam_mask_gas_stove.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 58.0, + 180.0, + 965.0, + 842.0 + ], + "mask_score": 3.470988, + "mask_area_ratio": 0.477615, + "elapsed_seconds": 7.3908 + } + }, + { + "name": "water_pitcher", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P07_107:object:13", + "source_name": "water pitcher", + "source_description": "A clear plastic water pitcher with a white handle and lid, sitting on the counter. Source dataset: EPIC-Kitchens. Scene context: A narrow kitchen space with a dark floor, light cabinets, a sink counter with various items, a radiator on the wall, and a trash can on the floor.", + "sub_caption": "water pitcher: A clear plastic water pitcher with a white handle and lid.. Scene role: Sitting upright on the counter.", + "measured_bbox": [ + 0.3327, + 0.2732, + 0.4536, + 0.4573 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_water_pitcher.png", + "raw_ref_image": "references/raw_ref_water_pitcher_attempt_01.png", + "reference_verify": "references/reference_verify_water_pitcher.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_water_pitcher_attempt_01.png", + "output": "references/ref_water_pitcher.png", + "mask": "references/sam_mask_water_pitcher.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 242.0, + 115.0, + 856.0, + 972.0 + ], + "mask_score": 3.323768, + "mask_area_ratio": 0.450877, + "elapsed_seconds": 7.2849 + } + }, + { + "name": "silver_spoon", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P04_103:object:9", + "source_name": "Silver Spoon", + "source_description": "Small silver metal spoon resting near the black spatula handle. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with a stove, toaster, and various cooking utensils scattered around.", + "sub_caption": "Silver Spoon: A small silver metal spoon.. Scene role: Laying flat on the messy countertop near the cucumber.", + "measured_bbox": [ + 0.3001, + 0.4801, + 0.3988, + 0.539 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_silver_spoon.png", + "raw_ref_image": "references/raw_ref_silver_spoon_attempt_01.png", + "reference_verify": "references/reference_verify_silver_spoon.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000006/references/raw_ref_silver_spoon_attempt_01.png", + "output": "references/ref_silver_spoon.png", + "mask": "references/sam_mask_silver_spoon.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 88.0, + 77.0, + 916.0, + 950.0 + ], + "mask_score": 3.449565, + "mask_area_ratio": 0.092279, + "elapsed_seconds": 7.0866 + } + } + ], + "not_emitted": [ + { + "name": "black_pot", + "reason": "not_detected" + }, + { + "name": "kitchen_faucet", + "reason": "not_detected" + } + ], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/vocab_task.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/vocab_task.json new file mode 100644 index 0000000000000000000000000000000000000000..1daaa5c80c7ebcc95d16c3afe0b3aa2c54f6bd42 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000006/vocab_task.json @@ -0,0 +1,126 @@ +{ + "task_id": "sample_000006", + "sample_id": "sample_000006", + "sample_index": 6, + "target_total": 8, + "target_people": 1, + "target_objects": 7, + "people_candidates": [ + { + "candidate_index": 0, + "source_offset": 864, + "image_id": "Ego4D:ego4d_video/EGO_3996.npy:person:0", + "name": "person", + "description": "Only the person's hands are visible. The hands appear to be illuminated by a dim, red light source, making them barely discernible against the surrounding darkness. The fingers are slightly spread apart. Source dataset: Ego4D. Scene context: In an extremely dark and poorly lit scene, two hands are faintly visible, seemingly glowing in a reddish hue against a completely black background." + }, + { + "candidate_index": 1, + "source_offset": 29, + "image_id": "EPIC-Kitchens:P26_103:person:0", + "name": "person", + "description": "A person's left hand and lower body, including dark trousers and black shoes with white soles, are visible in the foreground, reaching towards the sink. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a stainless steel kitchen sink containing dirty dishes and a blue cloth, with a hand visible in the foreground." + } + ], + "object_candidates": [ + { + "candidate_index": 0, + "source_offset": 1965, + "image_id": "EPIC-Kitchens:P12_104:object:0", + "name": "door", + "description": "A light brown wooden door, closed, with a metallic door handle. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a closed wooden door with a metal handle, positioned in a room with a kitchen area visible to the left." + }, + { + "candidate_index": 1, + "source_offset": 3849, + "image_id": "EPIC-Kitchens:P37_101:object:1", + "name": "cucumber 2", + "description": "A long, green cucumber wrapped in clear plastic, resting horizontally on the dark countertop, slightly overlapping the other cucumber and positioned closer to the viewer. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a person holding two whole cucumbers over a dark kitchen counter, with an onion, garlic, a plastic container, a rice cooker, and a living area in the background." + }, + { + "candidate_index": 2, + "source_offset": 9286, + "image_id": "Ego4D:ego4d_video/EGO_95202.npy:object:1", + "name": "tool", + "description": "A thin, elongated instrument held by the hand, partially illuminated by the red light. Source dataset: Ego4D. Scene context: A close-up view of a person using a tool on a large, glowing, round object in a dark environment." + }, + { + "candidate_index": 3, + "source_offset": 2181, + "image_id": "EPIC-Kitchens:P22_105:object:12", + "name": "stove", + "description": "A gas stove with black grates on the left side. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a stove, a sink, various utensils, bottles, and cabinets." + }, + { + "candidate_index": 4, + "source_offset": 4348, + "image_id": "Ego4D:ego4d_video/EGO_157265.npy:object:0", + "name": "red streak", + "description": "A faint, slightly curved red streak visible near the bottom center of the mostly dark image. Source dataset: Ego4D. Scene context: The image is predominantly dark, with only a few faint red, blue, and yellowish shapes visible near the bottom, possibly indicating a low-light or obscured view." + }, + { + "candidate_index": 5, + "source_offset": 1613, + "image_id": "EPIC-Kitchens:P07_107:object:13", + "name": "water pitcher", + "description": "A clear plastic water pitcher with a white handle and lid, sitting on the counter. Source dataset: EPIC-Kitchens. Scene context: A narrow kitchen space with a dark floor, light cabinets, a sink counter with various items, a radiator on the wall, and a trash can on the floor." + }, + { + "candidate_index": 6, + "source_offset": 5143, + "image_id": "Ego4D:ego4d_video/EGO_172550.npy:object:1", + "name": "window or screen", + "description": "A large, partially visible rectangular area in the background, showing a dim, possibly outdoor scene. Source dataset: Ego4D. Scene context: A close-up view of a person holding a red object in a dimly lit environment, possibly outdoors or near a window." + }, + { + "candidate_index": 7, + "source_offset": 7823, + "image_id": "Ego4D:ego4d_video/EGO_294134.npy:object:2", + "name": "faint blue shape on bottom left", + "description": "A very faint area of blueish color near the bottom left corner. Source dataset: Ego4D. Scene context: A very dark image with a prominent blurred circular blue-green light or object in the center, and faint, mostly unidentifiable shapes with slight blue and gray tinges near the right edge and bottom left." + }, + { + "candidate_index": 8, + "source_offset": 4995, + "image_id": "Ego4D:ego4d_video/EGO_165541.npy:object:1", + "name": "blue patch", + "description": "A dark blue, somewhat rectangular patch of color in the lower left area, possibly an object or a reflection. Source dataset: Ego4D. Scene context: A very dark, low-visibility scene, possibly outdoors at night, with vague, mostly obscured shapes and minimal illumination." + }, + { + "candidate_index": 9, + "source_offset": 8656, + "image_id": "Ego4D:ego4d_video/EGO_49930.npy:object:1", + "name": "cylinder-like object", + "description": "A faint, cylindrical object with what appears to be a label or markings, located towards the upper right. Source dataset: Ego4D. Scene context: A very dark scene with barely visible objects, illuminated only by a faint, reddish light source." + }, + { + "candidate_index": 10, + "source_offset": 1312, + "image_id": "EPIC-Kitchens:P04_103:object:9", + "name": "Silver Spoon", + "description": "Small silver metal spoon resting near the black spatula handle. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with a stove, toaster, and various cooking utensils scattered around." + }, + { + "candidate_index": 11, + "source_offset": 2886, + "image_id": "EPIC-Kitchens:P28_103:object:2", + "name": "large black pot", + "description": "A bulky black pot with a copper-colored bottom sitting upside down in the dish rack. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area featuring a stainless steel sink, a red dish rack filled with clean dishes, and various cleaning supplies under bright sunlight." + }, + { + "candidate_index": 12, + "source_offset": 2800, + "image_id": "EPIC-Kitchens:P28_101:object:4", + "name": "faucet", + "description": "A curved, chrome-finished kitchen faucet mounted on the sink. Source dataset: EPIC-Kitchens. Scene context: A high-angle view of a kitchen showing a sink area on the left with unwashed dishes, and a light wood floor leading towards cabinets and a refrigerator on the right." + }, + { + "candidate_index": 13, + "source_offset": 4833, + "image_id": "Ego4D:ego4d_video/EGO_165438.npy:object:1", + "name": "nails", + "description": "Several small, dark nails embedded in the wooden structures, with some protruding outwards. Source dataset: Ego4D. Scene context: A close-up view of wooden structures with nails." + } + ], + "rng_seed": 1782556367, + "created_at": 1782259667.7967138 +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/bbox_overlay.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/bbox_overlay.png new file mode 100644 index 0000000000000000000000000000000000000000..e9bb0684f10fc1aeb4d018a9bf80f8283fd26720 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/bbox_overlay.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06f75c5f0804ca2fa16a93b15c90a4fb20e35b91f8721b1e93c9c9260b06b68b +size 1151865 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/compose_prompt.txt b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/compose_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..eb63b9debd3cef2a8ef1d3251173a328de5dd82b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/compose_prompt.txt @@ -0,0 +1,147 @@ +Render the following JSON scene specification as a photorealistic 1248x832 image using a true 3:2 canvas. Every listed person and object must appear visibly in the image. Keep normal proportions and the requested aspect ratio. The foreground must contain only subjects explicitly listed in the JSON scene specification. Do not add any unlisted foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects. Background context may include non-localizable scenery only when it does not introduce a distinct foreground subject. No text, no labels, no logos, no watermarks. +JSON scene specification: +{ + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1248, + 832 + ], + "aspect_ratio": "3:2", + "style": "photorealistic" + }, + "scene": { + "setting": "A dimly lit room at night, illuminated mostly by the soft blue glow of screens, glowing keyboards, and small LED indicators.", + "activity": "A silhouetted person is typing on an illuminated keyboard at a desk, surrounded by glowing electronic devices and cables stretching into deep shadows.", + "composition": "Camera positioned slightly behind and to the side of the typist, focusing on the illuminated keyboard in the foreground, with glowing devices on the desk, trailing cables leading to the grid-patterned floor where a backpack sits, and deep shadowed areas in the background.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:2 composition", + "final canvas size 1248x832", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "typist", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_39231.npy:person:0", + "source_name": "typist", + "description": "Visible as a dark silhouette with one hand interacting with an illuminated keyboard.", + "role_in_scene": "Typing at the desk in the center of the frame, serving as the main subject of the scene." + } + ], + "objects": [ + { + "name": "textured_fabric_area", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_293237.npy:object:0", + "source_name": "textured fabric area", + "description": "A faint blueish, textured area that resembles rough fabric.", + "role_in_scene": "Draped loosely over the back of the typist's chair, catching some of the ambient blue light." + }, + { + "name": "textured_surface", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_39224.npy:object:0", + "source_name": "textured surface", + "description": "A dark surface featuring a repeating pattern of small, raised shapes, resembling a dimpled texture.", + "role_in_scene": "Acting as a large desk mat or mousepad underneath the glowing keyboard." + }, + { + "name": "blue_lines", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_260800.npy:object:0", + "source_name": "blue lines", + "description": "Faint, indistinct blue lines glowing in the dark.", + "role_in_scene": "An LED light strip illuminating the edge of the desk." + }, + { + "name": "grid_patterned_floor", + "source_index": 5, + "source_image_id": "Ego4D:ego4d_video/EGO_196981.npy:object:0", + "source_name": "floor", + "description": "A dark, grid-patterned floor surface, likely made of tiles.", + "role_in_scene": "Visible in the lower portion of the room beneath the desk area." + }, + { + "name": "shadowy_shape", + "source_index": 8, + "source_image_id": "Ego4D:ego4d_video/EGO_165180.npy:object:2", + "source_name": "shadowy shape", + "description": "A large, unidentifiable shadowy shape.", + "role_in_scene": "Looming in the background, suggesting stacked boxes or furniture in the darkness." + }, + { + "name": "blue_light_source", + "source_index": 11, + "source_image_id": "Ego4D:ego4d_video/EGO_97566.npy:object:0", + "source_name": "blue light source", + "description": "A small, hazy blue glowing area, appearing slightly irregular in shape.", + "role_in_scene": "A glowing component or hub device resting on the desk." + }, + { + "name": "thin_curved_object", + "source_index": 12, + "source_image_id": "Ego4D:ego4d_video/EGO_259561.npy:object:0", + "source_name": "thin curved object", + "description": "A thin, metallic or reflective curved object.", + "role_in_scene": "Resting on the desk near the keyboard, resembling the band of a pair of headphones reflecting the monitor light." + }, + { + "name": "electronic_device", + "source_index": 14, + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "description": "A faint red rectangular shape with some texture, resembling a phone.", + "role_in_scene": "Lying on the desk near the typist's left arm, casting a slight red glow that contrasts with the blue lights." + }, + { + "name": "blue_light", + "source_index": 19, + "source_image_id": "Ego4D:ego4d_video/EGO_98584.npy:object:0", + "source_name": "blue light", + "description": "A small blue light source.", + "role_in_scene": "A standby light on a computer tower sitting on the floor." + }, + { + "name": "cable", + "source_index": 20, + "source_image_id": "EPIC-Kitchens:P02_137:object:11", + "source_name": "cable", + "description": "A light-colored cable trailing down from the counter area towards the floor.", + "role_in_scene": "Hanging down from the edge of the desk toward the floor, connecting devices." + }, + { + "name": "backpack", + "source_index": 22, + "source_image_id": "EPIC-Kitchens:P07_104:object:15", + "source_name": "backpack", + "description": "A blue and black backpack.", + "role_in_scene": "Resting quietly on the grid-patterned floor near the trailing cable." + }, + { + "name": "small_blue_rectangle", + "source_index": 23, + "source_image_id": "Ego4D:ego4d_video/EGO_25245.npy:object:1", + "source_name": "small blue rectangle", + "description": "A tiny, bright blue rectangular glow.", + "role_in_scene": "A small secondary display or digital clock sitting on the corner of the desk." + }, + { + "name": "dark_area", + "source_index": 24, + "source_image_id": "Ego4D:ego4d_video/EGO_192730.npy:object:3", + "source_name": "dark area", + "description": "A deeply shadowed region dominating the space.", + "role_in_scene": "Filling the left side of the room, creating an atmospheric, isolated mood around the typist's setup." + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_backpack.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_backpack.png new file mode 100644 index 0000000000000000000000000000000000000000..bfe4a51084b87c24f714e7c6d0e86713fb823e4e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_backpack.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1242cb102d71cca7199a788c3b0607f015d404414af11afbbee542873119bcc +size 126802 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_blue_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_blue_light.png new file mode 100644 index 0000000000000000000000000000000000000000..442b6b97ff39281d85ad8553efde24bcf4ae73b2 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_blue_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_blue_light_source.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_blue_light_source.png new file mode 100644 index 0000000000000000000000000000000000000000..212de0cdd4ff27de027b585fb022ced4f8e2ea18 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_blue_light_source.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_blue_lines.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_blue_lines.png new file mode 100644 index 0000000000000000000000000000000000000000..99fb4ea0fe5d477064ddc4cf5a5567c231329522 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_blue_lines.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_cable.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_cable.png new file mode 100644 index 0000000000000000000000000000000000000000..0191898f8fe263565b7115747b08233c83dc6c25 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_cable.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f59fa4f8fc64e7a4398b9816ab9e5d3b7ca3e040d3b37e31940ffb9dbe4fc18 +size 398135 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_dark_area.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_dark_area.png new file mode 100644 index 0000000000000000000000000000000000000000..9de7edbaa901f3def9de6c0920f7585a93471326 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_dark_area.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e14c98a5da93a177cafb687e12efa86dac4ed9e952143a44bc344e5ff34420a +size 271401 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_electronic_device.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_electronic_device.png new file mode 100644 index 0000000000000000000000000000000000000000..695afa1390f9657219d06d04bad373983f6a8d84 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_electronic_device.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_grid_patterned_floor.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_grid_patterned_floor.png new file mode 100644 index 0000000000000000000000000000000000000000..e32073203a88a14b772954aa7365641ac701028f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_grid_patterned_floor.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4feacedaa4dfcbf45f721b59f63b9d7b303aa1fa82bdf9fb5a6482cf7de06b13 +size 584583 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_shadowy_shape.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_shadowy_shape.png new file mode 100644 index 0000000000000000000000000000000000000000..f83090a2de027ee4391c20c65f56a1853c9e1998 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_shadowy_shape.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fbef5552c7a3283110d6cf5cff02dc8c1b98985115e6b89e29d1813ccd2225f +size 288463 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_small_blue_rectangle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_small_blue_rectangle.png new file mode 100644 index 0000000000000000000000000000000000000000..91d6248c7d8ac6060fb9c82bd92742b95a39fd92 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_small_blue_rectangle.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_textured_fabric_area.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_textured_fabric_area.png new file mode 100644 index 0000000000000000000000000000000000000000..a389147f456940470f09988bb5a2cf73a5de1b66 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_textured_fabric_area.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad8027132d94cedfa6dcd67aa13376a1ce40f7b732e6327c4193582a29e4151d +size 170738 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_textured_surface.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_textured_surface.png new file mode 100644 index 0000000000000000000000000000000000000000..2a7a109010391caea47b267c16965110525c867e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_textured_surface.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e81d45d0a607c188fc7dc721bc438dfbd448842e04d9e6da19d1b5e8906c031 +size 378672 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_thin_curved_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_thin_curved_object.png new file mode 100644 index 0000000000000000000000000000000000000000..bb23914e9fcfe363929ee377f51ca4cfa2237175 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_thin_curved_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_typist.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_typist.png new file mode 100644 index 0000000000000000000000000000000000000000..15ba80c5deb9c33f42c794658c695fe11055f430 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/detect_refine_typist.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:045ceb643da8af3624980966b006baa3271157d5bfbf12845505a0461a9acbb9 +size 559317 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_backpack.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_backpack.png new file mode 100644 index 0000000000000000000000000000000000000000..d7a4a7a5c47d56089671ae956966c3eab2d94b4b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_backpack.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef767848260f926f7e66bfb98cb0b7c839ca36c5cf708e5419bff0b01687e20 +size 153155 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_blue_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_blue_light.png new file mode 100644 index 0000000000000000000000000000000000000000..f7a747abf2dfd03de46d7f3276a88a1aaf941c7d Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_blue_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_blue_light_source.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_blue_light_source.png new file mode 100644 index 0000000000000000000000000000000000000000..43944beaa707cf4739603d915c3a29ee861647b8 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_blue_light_source.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_blue_lines.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_blue_lines.png new file mode 100644 index 0000000000000000000000000000000000000000..cfb4fa3f7bcd9b80931fc6f0a32f74034140b1a4 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_blue_lines.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ffd72efd61ebfed6ca35ecb30f39b83e55d1c5ac2618bd2f7b5f68f249c01b2 +size 120705 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_cable.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_cable.png new file mode 100644 index 0000000000000000000000000000000000000000..8dc10519046b5b70548edfcb4ee1c97f78617acc --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_cable.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31b7dd2ad5887d16158a07f9753b70b4b8cb904effd58b6b18a99e87b13da89 +size 269649 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_dark_area.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_dark_area.png new file mode 100644 index 0000000000000000000000000000000000000000..161e430abd61d2ed500805830c7ad4a8a12aa876 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_dark_area.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d44eee5437317f8fe7163584b3e0149753b74706c027c27ade4847ad2df8976 +size 278177 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_electronic_device.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_electronic_device.png new file mode 100644 index 0000000000000000000000000000000000000000..75f605d4a179a3273107ecfc4cb4a7b0c19cf888 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_electronic_device.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_grid_patterned_floor.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_grid_patterned_floor.png new file mode 100644 index 0000000000000000000000000000000000000000..1f43b33027f38ce880733ce7fa9990b09da775de --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_grid_patterned_floor.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e94df8cee50d62936051b6573e961f728d965961d8a5706c52f99fe3ea6385e +size 579653 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_shadowy_shape.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_shadowy_shape.png new file mode 100644 index 0000000000000000000000000000000000000000..c017191a852e6ee1598854235c5f286bae5632f4 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_shadowy_shape.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1213df3af86bb5467df5464a249d45d8392222622444f0c160c4376c1ba36326 +size 114034 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_small_blue_rectangle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_small_blue_rectangle.png new file mode 100644 index 0000000000000000000000000000000000000000..c286fb5a684d2a274b43b8239aa7b94c876af5b4 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_small_blue_rectangle.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_textured_fabric_area.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_textured_fabric_area.png new file mode 100644 index 0000000000000000000000000000000000000000..dbcb210adc8e2a7d84efa8f47ef38cd7526afad6 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_textured_fabric_area.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:739d362b4cf6e285ac2aedf6a340bf2ea26e19b55417dd264fe00e940c1509bc +size 195007 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_textured_surface.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_textured_surface.png new file mode 100644 index 0000000000000000000000000000000000000000..7ceda182b530303122c62e4554a60453dbf3bb3e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_textured_surface.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e3390c11be7e790d4d9ed68ddf3cb9f9a54b08352d3371ec76a7f1e77e7b87 +size 461310 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_thin_curved_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_thin_curved_object.png new file mode 100644 index 0000000000000000000000000000000000000000..e16ed5616314c52d88a97e32b22d8ea9ff654ff7 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_thin_curved_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_typist.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_typist.png new file mode 100644 index 0000000000000000000000000000000000000000..565be74569060f70f3ec94594d2f1fd6cfb9ce21 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/crops/diversify_input_typist.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2198916816ad3af0f64f81e2e3c49a63f5a90eaac57c30380360cd53e62f3386 +size 783378 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/detections.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/detections.json new file mode 100644 index 0000000000000000000000000000000000000000..94fc3349d36cb989daaaaca61f1ba8177058c553 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/detections.json @@ -0,0 +1,268 @@ +[ + { + "name": "typist", + "present": true, + "bbox": [ + 0.5473, + 0.0, + 0.9968, + 1.0 + ], + "confidence": 0.95, + "notes": "The typist is the person shown in the image. I have drawn a box around the entire person.", + "coarse_bbox": [ + 0.588, + 0.0, + 0.996, + 1.0 + ], + "refine_crop": "crops/detect_refine_typist.png" + }, + { + "name": "textured_fabric_area", + "present": true, + "bbox": [ + 0.5434, + 0.5078, + 0.7285, + 0.8985 + ], + "confidence": 0.95, + "notes": "The object is a faint blueish, textured area that resembles rough fabric.", + "coarse_bbox": [ + 0.542, + 0.504, + 0.734, + 0.917 + ], + "refine_crop": "crops/detect_refine_textured_fabric_area.png" + }, + { + "name": "textured_surface", + "present": true, + "bbox": [ + 0.3547, + 0.1364, + 0.8811, + 0.479 + ], + "confidence": 0.95, + "notes": "The large dark desk mat with a dimpled texture covering a significant portion of the desk, acting as a base for the keyboard, phone, and hand.", + "coarse_bbox": [ + 0.345, + 0.145, + 0.888, + 0.481 + ], + "refine_crop": "crops/detect_refine_textured_surface.png" + }, + { + "name": "blue_lines", + "present": true, + "bbox": [ + 0.3714, + 0.3069, + 0.5828, + 0.4931 + ], + "confidence": 0.9, + "notes": "An LED light strip illuminating the edge of the desk.", + "coarse_bbox": [ + 0.375, + 0.323, + 0.599, + 0.493 + ], + "refine_crop": "crops/detect_refine_blue_lines.png" + }, + { + "name": "grid_patterned_floor", + "present": true, + "bbox": [ + 0.0039, + 0.5389, + 0.6911, + 0.9893 + ], + "confidence": 0.9, + "notes": "The floor is visible in the lower left portion of the image, showing the grid pattern.", + "coarse_bbox": [ + 0.0, + 0.487, + 0.714, + 0.994 + ], + "refine_crop": "crops/detect_refine_grid_patterned_floor.png" + }, + { + "name": "shadowy_shape", + "present": true, + "bbox": [ + 0.0709, + 0.0492, + 0.3072, + 0.2887 + ], + "confidence": 0.8, + "notes": "A large, unidentifiable shadowy shape looming in the background behind the box.", + "coarse_bbox": [ + 0.003, + 0.0, + 0.351, + 0.596 + ], + "refine_crop": "crops/detect_refine_shadowy_shape.png" + }, + { + "name": "blue_light_source", + "present": true, + "bbox": [ + 0.3661, + 0.1534, + 0.448, + 0.2196 + ], + "confidence": 0.95, + "notes": "The large glowing blue object on the desk.", + "coarse_bbox": [ + 0.364, + 0.148, + 0.447, + 0.22 + ], + "refine_crop": "crops/detect_refine_blue_light_source.png" + }, + { + "name": "thin_curved_object", + "present": true, + "bbox": [ + 0.5384, + 0.1284, + 0.674, + 0.181 + ], + "confidence": 0.9, + "notes": "Tight bounding box around the curved, metallic-looking object illuminated in blue.", + "coarse_bbox": [ + 0.55, + 0.111, + 0.672, + 0.178 + ], + "refine_crop": "crops/detect_refine_thin_curved_object.png" + }, + { + "name": "electronic_device", + "present": true, + "bbox": [ + 0.4103, + 0.2767, + 0.5015, + 0.3797 + ], + "confidence": 0.99, + "notes": "Red rectangular electronic device resembling a phone is clearly visible.", + "coarse_bbox": [ + 0.407, + 0.275, + 0.5, + 0.384 + ], + "refine_crop": "crops/detect_refine_electronic_device.png" + }, + { + "name": "blue_light", + "present": true, + "bbox": [ + 0.523, + 0.747, + 0.539, + 0.788 + ], + "confidence": 0.95, + "notes": "The bounding box covers the bright central part of the blue light.", + "coarse_bbox": [ + 0.523, + 0.747, + 0.539, + 0.788 + ], + "refine_crop": "crops/detect_refine_blue_light.png" + }, + { + "name": "cable", + "present": true, + "bbox": [ + 0.3307, + 0.1951, + 0.482, + 0.9804 + ], + "confidence": 0.95, + "notes": "The light-colored cable trailing from the top part of the desk down towards the floor.", + "coarse_bbox": [ + 0.215, + 0.188, + 0.493, + 0.955 + ], + "refine_crop": "crops/detect_refine_cable.png" + }, + { + "name": "backpack", + "present": true, + "bbox": [ + 0.2708, + 0.6305, + 0.4662, + 0.9996 + ], + "confidence": 0.95, + "notes": "Tight bounding box around the blue and black backpack visible in the crop.", + "coarse_bbox": [ + 0.263, + 0.644, + 0.466, + 0.994 + ], + "refine_crop": "crops/detect_refine_backpack.png" + }, + { + "name": "small_blue_rectangle", + "present": true, + "bbox": [ + 0.7651, + 0.0635, + 0.8412, + 0.1295 + ], + "confidence": 0.95, + "notes": "Tight bounding box around the visible bright blue rectangular glow representing the digital clock or secondary display.", + "coarse_bbox": [ + 0.762, + 0.054, + 0.842, + 0.135 + ], + "refine_crop": "crops/detect_refine_small_blue_rectangle.png" + }, + { + "name": "dark_area", + "present": true, + "bbox": [ + 0.003, + 0.0, + 0.3044, + 0.5863 + ], + "confidence": 0.8, + "notes": "The deeply shadowed area dominates the left and upper sections of the image crop.", + "coarse_bbox": [ + 0.003, + 0.0, + 0.339, + 0.589 + ], + "refine_crop": "crops/detect_refine_dark_area.png" + } +] diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/main_image.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/main_image.png new file mode 100644 index 0000000000000000000000000000000000000000..498671d342c5ec13603224ae09c235d659377652 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/main_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af95088f99b6a478b560aabfab7eb0f9bf015b0b67acc74bd0b2aea323a45eb5 +size 1266177 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/plan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/plan.json new file mode 100644 index 0000000000000000000000000000000000000000..87f0968491ccce5db549106a36068f76863a96e9 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/plan.json @@ -0,0 +1,312 @@ +{ + "sample_id": "sample_000007", + "target_total": 14, + "target_people": 1, + "target_objects": 13, + "compose_prompt": { + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1248, + 832 + ], + "aspect_ratio": "3:2", + "style": "photorealistic" + }, + "scene": { + "setting": "A dimly lit room at night, illuminated mostly by the soft blue glow of screens, glowing keyboards, and small LED indicators.", + "activity": "A silhouetted person is typing on an illuminated keyboard at a desk, surrounded by glowing electronic devices and cables stretching into deep shadows.", + "composition": "Camera positioned slightly behind and to the side of the typist, focusing on the illuminated keyboard in the foreground, with glowing devices on the desk, trailing cables leading to the grid-patterned floor where a backpack sits, and deep shadowed areas in the background.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:2 composition", + "final canvas size 1248x832", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "typist", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_39231.npy:person:0", + "source_name": "typist", + "description": "Visible as a dark silhouette with one hand interacting with an illuminated keyboard.", + "role_in_scene": "Typing at the desk in the center of the frame, serving as the main subject of the scene." + } + ], + "objects": [ + { + "name": "textured_fabric_area", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_293237.npy:object:0", + "source_name": "textured fabric area", + "description": "A faint blueish, textured area that resembles rough fabric.", + "role_in_scene": "Draped loosely over the back of the typist's chair, catching some of the ambient blue light." + }, + { + "name": "textured_surface", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_39224.npy:object:0", + "source_name": "textured surface", + "description": "A dark surface featuring a repeating pattern of small, raised shapes, resembling a dimpled texture.", + "role_in_scene": "Acting as a large desk mat or mousepad underneath the glowing keyboard." + }, + { + "name": "blue_lines", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_260800.npy:object:0", + "source_name": "blue lines", + "description": "Faint, indistinct blue lines glowing in the dark.", + "role_in_scene": "An LED light strip illuminating the edge of the desk." + }, + { + "name": "grid_patterned_floor", + "source_index": 5, + "source_image_id": "Ego4D:ego4d_video/EGO_196981.npy:object:0", + "source_name": "floor", + "description": "A dark, grid-patterned floor surface, likely made of tiles.", + "role_in_scene": "Visible in the lower portion of the room beneath the desk area." + }, + { + "name": "shadowy_shape", + "source_index": 8, + "source_image_id": "Ego4D:ego4d_video/EGO_165180.npy:object:2", + "source_name": "shadowy shape", + "description": "A large, unidentifiable shadowy shape.", + "role_in_scene": "Looming in the background, suggesting stacked boxes or furniture in the darkness." + }, + { + "name": "blue_light_source", + "source_index": 11, + "source_image_id": "Ego4D:ego4d_video/EGO_97566.npy:object:0", + "source_name": "blue light source", + "description": "A small, hazy blue glowing area, appearing slightly irregular in shape.", + "role_in_scene": "A glowing component or hub device resting on the desk." + }, + { + "name": "thin_curved_object", + "source_index": 12, + "source_image_id": "Ego4D:ego4d_video/EGO_259561.npy:object:0", + "source_name": "thin curved object", + "description": "A thin, metallic or reflective curved object.", + "role_in_scene": "Resting on the desk near the keyboard, resembling the band of a pair of headphones reflecting the monitor light." + }, + { + "name": "electronic_device", + "source_index": 14, + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "description": "A faint red rectangular shape with some texture, resembling a phone.", + "role_in_scene": "Lying on the desk near the typist's left arm, casting a slight red glow that contrasts with the blue lights." + }, + { + "name": "blue_light", + "source_index": 19, + "source_image_id": "Ego4D:ego4d_video/EGO_98584.npy:object:0", + "source_name": "blue light", + "description": "A small blue light source.", + "role_in_scene": "A standby light on a computer tower sitting on the floor." + }, + { + "name": "cable", + "source_index": 20, + "source_image_id": "EPIC-Kitchens:P02_137:object:11", + "source_name": "cable", + "description": "A light-colored cable trailing down from the counter area towards the floor.", + "role_in_scene": "Hanging down from the edge of the desk toward the floor, connecting devices." + }, + { + "name": "backpack", + "source_index": 22, + "source_image_id": "EPIC-Kitchens:P07_104:object:15", + "source_name": "backpack", + "description": "A blue and black backpack.", + "role_in_scene": "Resting quietly on the grid-patterned floor near the trailing cable." + }, + { + "name": "small_blue_rectangle", + "source_index": 23, + "source_image_id": "Ego4D:ego4d_video/EGO_25245.npy:object:1", + "source_name": "small blue rectangle", + "description": "A tiny, bright blue rectangular glow.", + "role_in_scene": "A small secondary display or digital clock sitting on the corner of the desk." + }, + { + "name": "dark_area", + "source_index": 24, + "source_image_id": "Ego4D:ego4d_video/EGO_192730.npy:object:3", + "source_name": "dark area", + "description": "A deeply shadowed region dominating the space.", + "role_in_scene": "Filling the left side of the room, creating an atmospheric, isolated mood around the typist's setup." + } + ] + }, + "expected_subjects": [ + { + "name": "typist", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39231.npy:person:0", + "source_name": "typist", + "source_description": "Visible as a dark silhouette with one hand interacting with the illuminated keyboard. Source dataset: Ego4D. Scene context: A close-up view of a person typing on a keyboard illuminated with blue backlighting in a dark room.", + "sub_caption": "typist: Visible as a dark silhouette with one hand interacting with an illuminated keyboard.. Scene role: Typing at the desk in the center of the frame, serving as the main subject of the scene.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "textured_fabric_area", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_293237.npy:object:0", + "source_name": "textured fabric area", + "source_description": "A faint blueish, textured area on the left side of the image, possibly fabric or a rough surface. Source dataset: Ego4D. Scene context: An extremely dark scene with minimal visibility, showing only a few faint, blurry shapes and small points of light.", + "sub_caption": "textured fabric area: A faint blueish, textured area that resembles rough fabric.. Scene role: Draped loosely over the back of the typist's chair, catching some of the ambient blue light.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "textured_surface", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39224.npy:object:0", + "source_name": "textured surface", + "source_description": "A dark surface featuring a repeating pattern of small, raised shapes that catch the faint blue light, resembling a woven or dimpled fabric texture. Source dataset: Ego4D. Scene context: A close-up view of a textured surface, possibly fabric, illuminated by a faint blue light in an otherwise dark environment.", + "sub_caption": "textured surface: A dark surface featuring a repeating pattern of small, raised shapes, resembling a dimpled texture.. Scene role: Acting as a large desk mat or mousepad underneath the glowing keyboard.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "blue_lines", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_260800.npy:object:0", + "source_name": "blue lines", + "source_description": "Faint, indistinct blue lines in the dark. Source dataset: Ego4D. Scene context: A very dark, almost pitch-black scene with only faint, indistinct blue lines visible in the lower right.", + "sub_caption": "blue lines: Faint, indistinct blue lines glowing in the dark.. Scene role: An LED light strip illuminating the edge of the desk.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "grid_patterned_floor", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_196981.npy:object:0", + "source_name": "floor", + "source_description": "A dark, grid-patterned floor surface, likely made of tiles or a similar material, covering the right side of the image. Source dataset: Ego4D. Scene context: A close-up view of a floor corner with a grid-patterned surface meeting a solid, light-colored wall.", + "sub_caption": "floor: A dark, grid-patterned floor surface, likely made of tiles.. Scene role: Visible in the lower portion of the room beneath the desk area.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "shadowy_shape", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_165180.npy:object:2", + "source_name": "shadowy shape", + "source_description": "A large, unidentifiable shadowy shape in the center of the scene. Source dataset: Ego4D. Scene context: A very dark, low-visibility scene, possibly outdoors at night or in a deeply shadowed area, with vague shapes illuminated by faint ambient light.", + "sub_caption": "shadowy shape: A large, unidentifiable shadowy shape.. Scene role: Looming in the background, suggesting stacked boxes or furniture in the darkness.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "blue_light_source", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_97566.npy:object:0", + "source_name": "blue light source", + "source_description": "A small, indistinct, hazy blue glowing area in the otherwise black image, appearing somewhat irregular in shape. Source dataset: Ego4D. Scene context: A very dark, almost completely black scene with a single, small, hazy blue glowing object or light source visible towards the right side.", + "sub_caption": "blue light source: A small, hazy blue glowing area, appearing slightly irregular in shape.. Scene role: A glowing component or hub device resting on the desk.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "thin_curved_object", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_259561.npy:object:0", + "source_name": "thin curved object", + "source_description": "A thin, metallic or reflective curved object held between the hands in the center of the image. Source dataset: Ego4D. Scene context: A close-up view of hands manipulating objects in a very dark setting, with only a few items partially visible under weak lighting.", + "sub_caption": "thin curved object: A thin, metallic or reflective curved object.. Scene role: Resting on the desk near the keyboard, resembling the band of a pair of headphones reflecting the monitor light.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "electronic_device", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "source_description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device.", + "sub_caption": "electronic device: A faint red rectangular shape with some texture, resembling a phone.. Scene role: Lying on the desk near the typist's left arm, casting a slight red glow that contrasts with the blue lights.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "blue_light", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_98584.npy:object:0", + "source_name": "blue light", + "source_description": "a small blue light source in a dark setting Source dataset: Ego4D. Scene context: A dark image with a small blue light.", + "sub_caption": "blue light: A small blue light source.. Scene role: A standby light on a computer tower sitting on the floor.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "cable", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P02_137:object:11", + "source_name": "cable", + "source_description": "A light-colored cable trailing down from the counter area towards the floor near the dark bag. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a washing machine, a large exercise ball, and various items on a counter near a window.", + "sub_caption": "cable: A light-colored cable trailing down from the counter area towards the floor.. Scene role: Hanging down from the edge of the desk toward the floor, connecting devices.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "backpack", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P07_104:object:15", + "source_name": "backpack", + "source_description": "A blue and black backpack partially visible on the floor in the bottom left corner. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with unwashed dishes, cleaning supplies, a bunch of bananas, and an oven with a colorful towel hanging on its handle.", + "sub_caption": "backpack: A blue and black backpack.. Scene role: Resting quietly on the grid-patterned floor near the trailing cable.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "small_blue_rectangle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_25245.npy:object:1", + "source_name": "small blue rectangle", + "source_description": "A tiny, bright blue rectangular glow in the bottom right corner. Source dataset: Ego4D. Scene context: The image is almost completely dark, with a faint blue shape visible towards the center-right.", + "sub_caption": "small blue rectangle: A tiny, bright blue rectangular glow.. Scene role: A small secondary display or digital clock sitting on the corner of the desk.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "dark_area", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192730.npy:object:3", + "source_name": "dark area", + "source_description": "A deeply shadowed region dominating the left side of the scene. Source dataset: Ego4D. Scene context: A dark room with a bright light reflecting off a wall, possibly near a doorway or closet.", + "sub_caption": "dark area: A deeply shadowed region dominating the space.. Scene role: Filling the left side of the room, creating an atmospheric, isolated mood around the typist's setup.", + "ref_style": "white_bg_encyclopedia_photo" + } + ], + "vocab_task_path": "sample_000007/vocab_task.json", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references.json new file mode 100644 index 0000000000000000000000000000000000000000..7f474f967f698e4d7594d3a66878ec8cf254af9f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references.json @@ -0,0 +1,232 @@ +{ + "references": [ + { + "name": "typist", + "ref_image": "references/ref_typist.png", + "raw_ref_image": "references/raw_ref_typist_attempt_01.png", + "diversify_input": "crops/diversify_input_typist.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_typist.png", + "mask": "references/sam_mask_typist.png" + }, + "reference_verify": "references/reference_verify_typist.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "textured_fabric_area", + "ref_image": "references/ref_textured_fabric_area.png", + "raw_ref_image": "references/raw_ref_textured_fabric_area_attempt_01.png", + "diversify_input": "crops/diversify_input_textured_fabric_area.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_textured_fabric_area.png", + "mask": "references/sam_mask_textured_fabric_area.png" + }, + "reference_verify": "references/reference_verify_textured_fabric_area.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "textured_surface", + "ref_image": "references/ref_textured_surface.png", + "raw_ref_image": "references/raw_ref_textured_surface_attempt_01.png", + "diversify_input": "crops/diversify_input_textured_surface.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_textured_surface.png", + "mask": "references/sam_mask_textured_surface.png" + }, + "reference_verify": "references/reference_verify_textured_surface.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "blue_lines", + "ref_image": "references/ref_blue_lines.png", + "raw_ref_image": "references/raw_ref_blue_lines_attempt_01.png", + "diversify_input": "crops/diversify_input_blue_lines.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_lines.png", + "mask": "references/sam_mask_blue_lines.png" + }, + "reference_verify": "references/reference_verify_blue_lines.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "grid_patterned_floor", + "ref_image": "references/ref_grid_patterned_floor.png", + "raw_ref_image": "references/raw_ref_grid_patterned_floor_attempt_01.png", + "diversify_input": "crops/diversify_input_grid_patterned_floor.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_grid_patterned_floor.png", + "mask": "references/sam_mask_grid_patterned_floor.png" + }, + "reference_verify": "references/reference_verify_grid_patterned_floor.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "shadowy_shape", + "ref_image": "references/ref_shadowy_shape.png", + "raw_ref_image": "references/raw_ref_shadowy_shape_attempt_01.png", + "diversify_input": "crops/diversify_input_shadowy_shape.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_shadowy_shape.png", + "mask": "references/sam_mask_shadowy_shape.png" + }, + "reference_verify": "references/reference_verify_shadowy_shape.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "blue_light_source", + "ref_image": "references/ref_blue_light_source.png", + "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", + "diversify_input": "crops/diversify_input_blue_light_source.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_light_source.png", + "mask": "references/sam_mask_blue_light_source.png" + }, + "reference_verify": "references/reference_verify_blue_light_source.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "thin_curved_object", + "ref_image": "references/ref_thin_curved_object.png", + "raw_ref_image": "references/raw_ref_thin_curved_object_attempt_01.png", + "diversify_input": "crops/diversify_input_thin_curved_object.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_thin_curved_object.png", + "mask": "references/sam_mask_thin_curved_object.png" + }, + "reference_verify": "references/reference_verify_thin_curved_object.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "electronic_device", + "ref_image": "references/ref_electronic_device.png", + "raw_ref_image": "references/raw_ref_electronic_device_attempt_01.png", + "diversify_input": "crops/diversify_input_electronic_device.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_electronic_device.png", + "mask": "references/sam_mask_electronic_device.png" + }, + "reference_verify": "references/reference_verify_electronic_device.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "blue_light", + "ref_image": "references/ref_blue_light.png", + "raw_ref_image": "references/raw_ref_blue_light_attempt_01.png", + "diversify_input": "crops/diversify_input_blue_light.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_light.png", + "mask": "references/sam_mask_blue_light.png" + }, + "reference_verify": "references/reference_verify_blue_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "cable", + "ref_image": "references/ref_cable.png", + "raw_ref_image": "references/raw_ref_cable_attempt_01.png", + "diversify_input": "crops/diversify_input_cable.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_cable.png", + "mask": "references/sam_mask_cable.png" + }, + "reference_verify": "references/reference_verify_cable.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "backpack", + "ref_image": "references/ref_backpack.png", + "raw_ref_image": "references/raw_ref_backpack_attempt_01.png", + "diversify_input": "crops/diversify_input_backpack.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_backpack.png", + "mask": "references/sam_mask_backpack.png" + }, + "reference_verify": "references/reference_verify_backpack.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "small_blue_rectangle", + "ref_image": "references/ref_small_blue_rectangle.png", + "raw_ref_image": "references/raw_ref_small_blue_rectangle_attempt_01.png", + "diversify_input": "crops/diversify_input_small_blue_rectangle.png", + "sam_white_bg": { + "cached": true, + "output": "references/ref_small_blue_rectangle.png", + "mask": "references/sam_mask_small_blue_rectangle.png" + }, + "reference_verify": "references/reference_verify_small_blue_rectangle.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "cached_reference": true + }, + { + "name": "dark_area", + "ref_image": "references/ref_dark_area.png", + "raw_ref_image": "references/raw_ref_dark_area_attempt_09.png", + "diversify_input": "crops/diversify_input_dark_area.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_09.png", + "output": "references/ref_dark_area.png", + "mask": "references/sam_mask_dark_area.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 88.0, + 52.0, + 897.0, + 902.0 + ], + "mask_score": 3.468953, + "mask_area_ratio": 0.45142, + "elapsed_seconds": 7.1708 + }, + "reference_verify": "references/reference_verify_dark_area.json", + "reference_verify_passed": true, + "reference_attempts": 9 + } + ], + "reference_errors": {} +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_backpack.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_backpack.png new file mode 100644 index 0000000000000000000000000000000000000000..906adb8de228de64de317b653d7540e2a3ff44fa --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_backpack.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39376fe6c0d7011ef626a40ddbd57d0a410daf7e4ab132c6c25c68f16385f496 +size 1108444 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_light.png new file mode 100644 index 0000000000000000000000000000000000000000..676f14be12f908bb7b11c14f1423e35862050a93 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a37bb31327a1d64ba58832fc28f16691ce5fe0429642d7523fefb0c7ea2ae55 +size 104883 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_light_source.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_light_source.png new file mode 100644 index 0000000000000000000000000000000000000000..155ed700a66c1ca4b964e7a377468f76f60c5f4a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_light_source.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63fad19105ff274d3a1b90c5fe76e6ffa6a8ac11a5c2f91a33c5ace82e0a01ef +size 336242 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_lines.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_lines.png new file mode 100644 index 0000000000000000000000000000000000000000..34dd24d777baefacf5052c4a34473d6efcfd4061 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_blue_lines.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a66b84c929239f94734627e79c9b42aaf5674dfded37d781c5bf1eb674b22069 +size 589843 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_cable.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_cable.png new file mode 100644 index 0000000000000000000000000000000000000000..7198fab3132e75692625e254abd33450d078457f Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_cable.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_dark_area.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_dark_area.png new file mode 100644 index 0000000000000000000000000000000000000000..f40032af9bf49c026fc9262822559881ef54ae27 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_dark_area.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb6f4b3aba52a2185757c06343583f9b598619b5182c5990d22511cc82921f62 +size 337161 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_electronic_device.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_electronic_device.png new file mode 100644 index 0000000000000000000000000000000000000000..6defb90a9b5f2d73c77d31a1ab28ba876d49c178 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_electronic_device.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c17fa29f43538253a2565bf5e4a8438d1a2d81862295629f6877226477b86bf +size 292628 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_grid_patterned_floor.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_grid_patterned_floor.png new file mode 100644 index 0000000000000000000000000000000000000000..99f82c5da824c9ab783c7e61d1d7dcb745decf29 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_grid_patterned_floor.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8942f16b87403308a9d22d3e05d48917c0e7ab52fb5cf4e3e7f98610b7a9d7e +size 741187 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_shadowy_shape.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_shadowy_shape.png new file mode 100644 index 0000000000000000000000000000000000000000..f411e5f05694a7a1ae235eeae752479e8039d857 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_shadowy_shape.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c37472ff1b09ba5a328cc7bc8c9cf63c0d150086a5f04cf992d6215192051b06 +size 917202 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_small_blue_rectangle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_small_blue_rectangle.png new file mode 100644 index 0000000000000000000000000000000000000000..4b2f2071f4414bad08c056e2bf6ec33962bdc001 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_small_blue_rectangle.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26014af81e1466781af347fa27a23ca95c7ec66d9d4c125a873f306998a58217 +size 488080 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_textured_fabric_area.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_textured_fabric_area.png new file mode 100644 index 0000000000000000000000000000000000000000..3a2618d102a5b5f86f620694000d99e1e2101f8e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_textured_fabric_area.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b67987750a92cd44116cfcbcfdf4b99d48de64d2d8001c2d3ba18fd76e10aa5 +size 677159 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_textured_surface.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_textured_surface.png new file mode 100644 index 0000000000000000000000000000000000000000..8b02a160edf6057588cd368a7725b56e28c2e0b0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_textured_surface.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b80fc622bd0091ba08dbb933eeab8dd89c2864a5fa20cc7ee8a6324a2142a1a +size 923882 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_thin_curved_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_thin_curved_object.png new file mode 100644 index 0000000000000000000000000000000000000000..d7a74a290b7c27b46d5b81c82be04f93ab569fca --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_thin_curved_object.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7473567d832df860139627ab830aa3d758cb039cd2eb19c502c88e859a3f073 +size 346611 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_typist.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_typist.png new file mode 100644 index 0000000000000000000000000000000000000000..c5cc0a31d7b14a2f69ed9a5020b9022930db3828 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/ref_typist.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8642e8510d0bcab85bd525056df0c9fa3f629e5b042b96e2903217a2f7c74cb1 +size 254927 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_backpack.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_backpack.json new file mode 100644 index 0000000000000000000000000000000000000000..744109e663946a2d215c78402e70909fb5d57519 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_backpack.json @@ -0,0 +1,46 @@ +{ + "name": "backpack", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_backpack_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_backpack_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_backpack_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_backpack_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_backpack_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_backpack_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 122.0, + 15.0, + 902.0, + 1008.0 + ], + "mask_score": 3.461148, + "mask_area_ratio": 0.459841, + "elapsed_seconds": 7.1782 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image clearly shows a complete blue and black backpack isolated on a white background without any cropping or truncation. It meets all hard requirements for a dataset subject reference." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_blue_light.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_blue_light.json new file mode 100644 index 0000000000000000000000000000000000000000..1ea18bbfaa68f7b1acecaa74f5705fb12242c28e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_blue_light.json @@ -0,0 +1,46 @@ +{ + "name": "blue_light", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_blue_light_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_blue_light_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_blue_light_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_blue_light_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_blue_light_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_blue_light_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 377.0, + 308.0, + 646.0, + 715.0 + ], + "mask_score": 3.457546, + "mask_area_ratio": 0.059873, + "elapsed_seconds": 7.1009 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a clear, isolated blue light source centered on a white background, perfectly meeting the requirements." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_blue_light_source.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_blue_light_source.json new file mode 100644 index 0000000000000000000000000000000000000000..195236ff1178103529a9921cc5a798304af896b6 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_blue_light_source.json @@ -0,0 +1,46 @@ +{ + "name": "blue_light_source", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_blue_light_source_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_blue_light_source_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_blue_light_source_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_blue_light_source_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_blue_light_source_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 262.0, + 1023.0, + 827.0 + ], + "mask_score": 2.92097, + "mask_area_ratio": 0.187185, + "elapsed_seconds": 7.1518 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a blue glowing object isolated on a white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_blue_lines.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_blue_lines.json new file mode 100644 index 0000000000000000000000000000000000000000..5bad2b490d512213d1c9efab4670d935f75c787a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_blue_lines.json @@ -0,0 +1,46 @@ +{ + "name": "blue_lines", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_blue_lines_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_blue_lines_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_blue_lines_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_blue_lines_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_blue_lines_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_blue_lines_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 36.0, + 224.0, + 945.0, + 783.0 + ], + "mask_score": 3.456756, + "mask_area_ratio": 0.321631, + "elapsed_seconds": 7.3869 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image clearly shows a coiled blue LED light strip on a white background, matching the intended subject." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_cable.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_cable.json new file mode 100644 index 0000000000000000000000000000000000000000..7e49c4a49ee103abe32787b3ad6a31d14104705c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_cable.json @@ -0,0 +1,46 @@ +{ + "name": "cable", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_cable_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_cable_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_cable_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_cable_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_cable_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_cable_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 357.0, + 0.0, + 696.0, + 1023.0 + ], + "mask_score": 3.34156, + "mask_area_ratio": 0.012691, + "elapsed_seconds": 7.111 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a light-colored cable isolated against a white background. It spans top to bottom, which means minor edge cropping at the very ends, but this is acceptable for a long continuous object like a cable." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_dark_area.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_dark_area.json new file mode 100644 index 0000000000000000000000000000000000000000..2f4523dbcc13e71b06790795bf454d7c2ddf38e1 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_dark_area.json @@ -0,0 +1,381 @@ +{ + "name": "dark_area", + "passed": true, + "accepted_attempt": 9, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_dark_area_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_dark_area_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_dark_area_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_dark_area_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_dark_area_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 103.0, + 87.0, + 948.0, + 972.0 + ], + "mask_score": 3.438675, + "mask_area_ratio": 0.44753, + "elapsed_seconds": 9.2165 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": false, + "single_main_subject": false, + "white_background": true, + "failure_reasons": [ + "intended subject is absent", + "image shows a cardboard box instead of a dark area" + ], + "notes": "The image displays a cardboard box, completely failing to match the intended subject of a dark area or deeply shadowed region." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_dark_area_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_dark_area_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_dark_area_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_dark_area_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_dark_area_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 175.0, + 158.0, + 847.0, + 866.0 + ], + "mask_score": 3.490162, + "mask_area_ratio": 0.308594, + "elapsed_seconds": 8.4733 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "Intended subject (dark area) is absent; the image shows a cardboard box." + ], + "notes": "The provided image displays a cardboard box on a white background, which does not match the requested subject 'dark area'." + } + }, + { + "attempt": 3, + "raw_ref_image": "references/raw_ref_dark_area_attempt_03.png", + "candidate_ref_image": "references/candidate_ref_dark_area_attempt_03.png", + "candidate_sam_mask": "references/candidate_sam_mask_dark_area_attempt_03.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_03.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_dark_area_attempt_03.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_dark_area_attempt_03.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 46.0, + 83.0, + 975.0, + 967.0 + ], + "mask_score": 3.415364, + "mask_area_ratio": 0.615692, + "elapsed_seconds": 10.1999 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "The intended subject (dark_area) is completely absent.", + "The image depicts a cardboard box instead of a deeply shadowed region." + ], + "notes": "The image shows a plain cardboard box, which does not match the requested subject 'dark area'." + } + }, + { + "attempt": 4, + "raw_ref_image": "references/raw_ref_dark_area_attempt_04.png", + "candidate_ref_image": "references/candidate_ref_dark_area_attempt_04.png", + "candidate_sam_mask": "references/candidate_sam_mask_dark_area_attempt_04.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_04.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_dark_area_attempt_04.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_dark_area_attempt_04.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 43.0, + 144.0, + 966.0, + 950.0 + ], + "mask_score": 3.447901, + "mask_area_ratio": 0.423297, + "elapsed_seconds": 7.4703 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "intended subject is absent", + "image shows a cardboard box instead of a dark shadowed area" + ], + "notes": "The generated image completely ignores the subject prompt ('dark area', 'deeply shadowed region') and instead shows a plain cardboard box." + } + }, + { + "attempt": 5, + "raw_ref_image": "references/raw_ref_dark_area_attempt_05.png", + "candidate_ref_image": "references/candidate_ref_dark_area_attempt_05.png", + "candidate_sam_mask": "references/candidate_sam_mask_dark_area_attempt_05.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_05.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_dark_area_attempt_05.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_dark_area_attempt_05.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 68.0, + 148.0, + 956.0, + 909.0 + ], + "mask_score": 3.480557, + "mask_area_ratio": 0.474544, + "elapsed_seconds": 7.2817 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "Intended subject 'dark area' is absent", + "Image shows a cardboard box instead of the requested subject" + ], + "notes": "The provided image is a cardboard box, completely unrelated to the requested subject 'dark area'." + } + }, + { + "attempt": 6, + "raw_ref_image": "references/raw_ref_dark_area_attempt_06.png", + "candidate_ref_image": "references/candidate_ref_dark_area_attempt_06.png", + "candidate_sam_mask": "references/candidate_sam_mask_dark_area_attempt_06.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_06.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_dark_area_attempt_06.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_dark_area_attempt_06.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 40.0, + 112.0, + 999.0, + 934.0 + ], + "mask_score": 3.460206, + "mask_area_ratio": 0.573447, + "elapsed_seconds": 7.308 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "intended subject is absent", + "image shows a cardboard box instead of a dark shadowed region" + ], + "notes": "The image displays a plain cardboard box on a white background, completely failing to match the requested subject 'dark_area'." + } + }, + { + "attempt": 7, + "raw_ref_image": "references/raw_ref_dark_area_attempt_07.png", + "candidate_ref_image": "references/candidate_ref_dark_area_attempt_07.png", + "candidate_sam_mask": "references/candidate_sam_mask_dark_area_attempt_07.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_07.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_dark_area_attempt_07.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_dark_area_attempt_07.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 61.0, + 135.0, + 952.0, + 920.0 + ], + "mask_score": 3.476222, + "mask_area_ratio": 0.479733, + "elapsed_seconds": 7.4353 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "Intended subject is absent.", + "Image shows a cardboard box instead of a shadowed region or dark area." + ], + "notes": "The image clearly displays a brown cardboard box on a white background, which entirely fails to match the requested subject 'dark_area'." + } + }, + { + "attempt": 8, + "raw_ref_image": "references/raw_ref_dark_area_attempt_08.png", + "candidate_ref_image": "references/candidate_ref_dark_area_attempt_08.png", + "candidate_sam_mask": "references/candidate_sam_mask_dark_area_attempt_08.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_08.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_dark_area_attempt_08.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_dark_area_attempt_08.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 56.0, + 133.0, + 967.0, + 919.0 + ], + "mask_score": 3.462403, + "mask_area_ratio": 0.486423, + "elapsed_seconds": 7.2585 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "intended subject is absent", + "image shows a cardboard box instead of a dark shadowed region" + ], + "notes": "The generated image depicts a cardboard box on a white background rather than the requested abstract 'dark area' or shadowed region." + } + }, + { + "attempt": 9, + "raw_ref_image": "references/raw_ref_dark_area_attempt_09.png", + "candidate_ref_image": "references/candidate_ref_dark_area_attempt_09.png", + "candidate_sam_mask": "references/candidate_sam_mask_dark_area_attempt_09.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_09.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_dark_area_attempt_09.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_dark_area_attempt_09.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 88.0, + 52.0, + 897.0, + 902.0 + ], + "mask_score": 3.468953, + "mask_area_ratio": 0.45142, + "elapsed_seconds": 7.1708 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The subject 'dark area' has been extracted as an isolated dark shape on a white background, which fulfills the requirements for an environmental or abstract subject." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_electronic_device.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_electronic_device.json new file mode 100644 index 0000000000000000000000000000000000000000..b0f80885d9ca8d2c52d18e7d7f0b5baa0eb78303 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_electronic_device.json @@ -0,0 +1,46 @@ +{ + "name": "electronic_device", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_electronic_device_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_electronic_device_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_electronic_device_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_electronic_device_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_electronic_device_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_electronic_device_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 164.0, + 248.0, + 850.0, + 780.0 + ], + "mask_score": 3.479641, + "mask_area_ratio": 0.181291, + "elapsed_seconds": 7.1556 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a complete, isolated electronic device (resembling a phone with a red screen) on a white background. It meets all requirements." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_grid_patterned_floor.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_grid_patterned_floor.json new file mode 100644 index 0000000000000000000000000000000000000000..4360aa7219c0b90b45b12efb9d00294211a29199 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_grid_patterned_floor.json @@ -0,0 +1,46 @@ +{ + "name": "grid_patterned_floor", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_grid_patterned_floor_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_grid_patterned_floor_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_grid_patterned_floor_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_grid_patterned_floor_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_grid_patterned_floor_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_grid_patterned_floor_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 140.0, + 140.0, + 883.0, + 883.0 + ], + "mask_score": 3.379652, + "mask_area_ratio": 0.433741, + "elapsed_seconds": 7.2947 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Representative crop of a dark tiled floor surface. The left edge has minor artifacting from isolation, but the pattern is clearly visible and acceptable as a reference." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_shadowy_shape.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_shadowy_shape.json new file mode 100644 index 0000000000000000000000000000000000000000..e3681eae03709a1d907c317e13fac77e2693de9b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_shadowy_shape.json @@ -0,0 +1,46 @@ +{ + "name": "shadowy_shape", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_shadowy_shape_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_shadowy_shape_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_shadowy_shape_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_shadowy_shape_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_shadowy_shape_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_shadowy_shape_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 64.0, + 106.0, + 978.0, + 953.0 + ], + "mask_score": 3.43267, + "mask_area_ratio": 0.526105, + "elapsed_seconds": 7.3034 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image displays a stacked set of black furniture pieces, which aligns with the subject description of 'shadowy shape... suggesting stacked boxes or furniture'. It is well-isolated on a white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_small_blue_rectangle.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_small_blue_rectangle.json new file mode 100644 index 0000000000000000000000000000000000000000..980e070213c17318d860e78646ee649c5cc997d8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_small_blue_rectangle.json @@ -0,0 +1,46 @@ +{ + "name": "small_blue_rectangle", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_small_blue_rectangle_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_small_blue_rectangle_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_small_blue_rectangle_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_small_blue_rectangle_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_small_blue_rectangle_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_small_blue_rectangle_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 22.0, + 196.0, + 1001.0, + 827.0 + ], + "mask_score": 3.344954, + "mask_area_ratio": 0.416469, + "elapsed_seconds": 7.361 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a blue rectangular display on a white background, matching the subject description perfectly." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_textured_fabric_area.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_textured_fabric_area.json new file mode 100644 index 0000000000000000000000000000000000000000..ae47f91ef7ec4022744e511909c83dcfaa9dd147 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_textured_fabric_area.json @@ -0,0 +1,46 @@ +{ + "name": "textured_fabric_area", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_textured_fabric_area_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_textured_fabric_area_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_textured_fabric_area_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_textured_fabric_area_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_textured_fabric_area_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_textured_fabric_area_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 170.0, + 102.0, + 910.0, + 928.0 + ], + "mask_score": 3.466105, + "mask_area_ratio": 0.275766, + "elapsed_seconds": 7.1312 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Isolated blue textured fabric on a white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_textured_surface.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_textured_surface.json new file mode 100644 index 0000000000000000000000000000000000000000..363aaa458c3e08ee015f0fcd7c038126ff2c444d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_textured_surface.json @@ -0,0 +1,46 @@ +{ + "name": "textured_surface", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_textured_surface_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_textured_surface_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_textured_surface_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_textured_surface_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_textured_surface_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_textured_surface_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 78.0, + 72.0, + 945.0, + 951.0 + ], + "mask_score": 3.429365, + "mask_area_ratio": 0.477687, + "elapsed_seconds": 7.3076 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The textured surface (acting as a desk mat) is fully visible, isolated on a white background, and uncropped. The dimpled texture is clearly shown." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_thin_curved_object.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_thin_curved_object.json new file mode 100644 index 0000000000000000000000000000000000000000..616144c09c6f47f35ae0543aff22917875881852 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_thin_curved_object.json @@ -0,0 +1,46 @@ +{ + "name": "thin_curved_object", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_thin_curved_object_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_thin_curved_object_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_thin_curved_object_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_thin_curved_object_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_thin_curved_object_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_thin_curved_object_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 22.0, + 200.0, + 1001.0, + 822.0 + ], + "mask_score": 3.288864, + "mask_area_ratio": 0.309387, + "elapsed_seconds": 7.1839 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a thin, curved metallic object isolated on a white background, which matches the subject description and satisfies all requirements." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_typist.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_typist.json new file mode 100644 index 0000000000000000000000000000000000000000..aa4914e13858d3c07980e376c04197f72c7a3e60 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/reference_verify_typist.json @@ -0,0 +1,46 @@ +{ + "name": "typist", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_typist_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_typist_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_typist_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_typist_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_ref_typist_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/candidate_sam_mask_typist_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 356.0, + 43.0, + 678.0, + 998.0 + ], + "mask_score": 3.44444, + "mask_area_ratio": 0.136213, + "elapsed_seconds": 7.3863 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image features a complete person with no cropping and enough white background. It meets all hard requirements." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_backpack.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_backpack.png new file mode 100644 index 0000000000000000000000000000000000000000..7f62648ca2506d8c173463be258744f2e2b90435 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_backpack.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_blue_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_blue_light.png new file mode 100644 index 0000000000000000000000000000000000000000..df75d3103d64ea9497e8965a10f0f1ae214620f6 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_blue_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_blue_light_source.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_blue_light_source.png new file mode 100644 index 0000000000000000000000000000000000000000..51d96a27385c063ff03605ba656bc0ab8c91eabc Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_blue_light_source.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_blue_lines.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_blue_lines.png new file mode 100644 index 0000000000000000000000000000000000000000..2d5c926564273055ca6401b54b1490dd447f5bb1 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_blue_lines.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_cable.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_cable.png new file mode 100644 index 0000000000000000000000000000000000000000..e33bc1743c3806ed10e2d5e901b4302221bff32c Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_cable.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_dark_area.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_dark_area.png new file mode 100644 index 0000000000000000000000000000000000000000..1df29997bf140e1b35765e21449d4b603025451f Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_dark_area.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_electronic_device.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_electronic_device.png new file mode 100644 index 0000000000000000000000000000000000000000..9f68c6c78fb3638504a826aaaa9d45598a0c670a Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_electronic_device.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_grid_patterned_floor.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_grid_patterned_floor.png new file mode 100644 index 0000000000000000000000000000000000000000..1b67915ca574a0039f1e6b661d61e4689d885e8e Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_grid_patterned_floor.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_shadowy_shape.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_shadowy_shape.png new file mode 100644 index 0000000000000000000000000000000000000000..af16ebf6c8de04956157dcd26c13c5a07ba992b4 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_shadowy_shape.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_small_blue_rectangle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_small_blue_rectangle.png new file mode 100644 index 0000000000000000000000000000000000000000..471ec1caa8f81ab677416a87d7f4a63ca7c258dc Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_small_blue_rectangle.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_textured_fabric_area.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_textured_fabric_area.png new file mode 100644 index 0000000000000000000000000000000000000000..53a3d751ea80817f5761049f42b9bf3c59c47938 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_textured_fabric_area.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_textured_surface.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_textured_surface.png new file mode 100644 index 0000000000000000000000000000000000000000..b0a76dd2d5e1bdcabad424470150f0d48dccb9f2 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_textured_surface.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_thin_curved_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_thin_curved_object.png new file mode 100644 index 0000000000000000000000000000000000000000..cb64caadd004e06ca98354abb8199a1bbb4802cf Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_thin_curved_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_typist.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_typist.png new file mode 100644 index 0000000000000000000000000000000000000000..26445862f0c25faf7a7fd615ae3353ed578483b7 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/references/sam_mask_typist.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/row.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/row.json new file mode 100644 index 0000000000000000000000000000000000000000..85b89a99d8beed815342f6394094e7c928fbc4f8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/row.json @@ -0,0 +1,436 @@ +{ + "sample_id": "sample_000007", + "target_total": 14, + "target_people": 1, + "target_objects": 13, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 14, + "n_detected": 14, + "n_subjects": 14, + "subjects": [ + { + "name": "typist", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39231.npy:person:0", + "source_name": "typist", + "source_description": "Visible as a dark silhouette with one hand interacting with the illuminated keyboard. Source dataset: Ego4D. Scene context: A close-up view of a person typing on a keyboard illuminated with blue backlighting in a dark room.", + "sub_caption": "typist: Visible as a dark silhouette with one hand interacting with an illuminated keyboard.. Scene role: Typing at the desk in the center of the frame, serving as the main subject of the scene.", + "measured_bbox": [ + 0.5473, + 0.0, + 0.9968, + 1.0 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_typist.png", + "raw_ref_image": "references/raw_ref_typist_attempt_01.png", + "reference_verify": "references/reference_verify_typist.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_typist.png", + "mask": "references/sam_mask_typist.png" + } + }, + { + "name": "textured_fabric_area", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_293237.npy:object:0", + "source_name": "textured fabric area", + "source_description": "A faint blueish, textured area on the left side of the image, possibly fabric or a rough surface. Source dataset: Ego4D. Scene context: An extremely dark scene with minimal visibility, showing only a few faint, blurry shapes and small points of light.", + "sub_caption": "textured fabric area: A faint blueish, textured area that resembles rough fabric.. Scene role: Draped loosely over the back of the typist's chair, catching some of the ambient blue light.", + "measured_bbox": [ + 0.5434, + 0.5078, + 0.7285, + 0.8985 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_textured_fabric_area.png", + "raw_ref_image": "references/raw_ref_textured_fabric_area_attempt_01.png", + "reference_verify": "references/reference_verify_textured_fabric_area.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_textured_fabric_area.png", + "mask": "references/sam_mask_textured_fabric_area.png" + } + }, + { + "name": "textured_surface", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39224.npy:object:0", + "source_name": "textured surface", + "source_description": "A dark surface featuring a repeating pattern of small, raised shapes that catch the faint blue light, resembling a woven or dimpled fabric texture. Source dataset: Ego4D. Scene context: A close-up view of a textured surface, possibly fabric, illuminated by a faint blue light in an otherwise dark environment.", + "sub_caption": "textured surface: A dark surface featuring a repeating pattern of small, raised shapes, resembling a dimpled texture.. Scene role: Acting as a large desk mat or mousepad underneath the glowing keyboard.", + "measured_bbox": [ + 0.3547, + 0.1364, + 0.8811, + 0.479 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_textured_surface.png", + "raw_ref_image": "references/raw_ref_textured_surface_attempt_01.png", + "reference_verify": "references/reference_verify_textured_surface.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_textured_surface.png", + "mask": "references/sam_mask_textured_surface.png" + } + }, + { + "name": "blue_lines", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_260800.npy:object:0", + "source_name": "blue lines", + "source_description": "Faint, indistinct blue lines in the dark. Source dataset: Ego4D. Scene context: A very dark, almost pitch-black scene with only faint, indistinct blue lines visible in the lower right.", + "sub_caption": "blue lines: Faint, indistinct blue lines glowing in the dark.. Scene role: An LED light strip illuminating the edge of the desk.", + "measured_bbox": [ + 0.3714, + 0.3069, + 0.5828, + 0.4931 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_lines.png", + "raw_ref_image": "references/raw_ref_blue_lines_attempt_01.png", + "reference_verify": "references/reference_verify_blue_lines.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_lines.png", + "mask": "references/sam_mask_blue_lines.png" + } + }, + { + "name": "grid_patterned_floor", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_196981.npy:object:0", + "source_name": "floor", + "source_description": "A dark, grid-patterned floor surface, likely made of tiles or a similar material, covering the right side of the image. Source dataset: Ego4D. Scene context: A close-up view of a floor corner with a grid-patterned surface meeting a solid, light-colored wall.", + "sub_caption": "floor: A dark, grid-patterned floor surface, likely made of tiles.. Scene role: Visible in the lower portion of the room beneath the desk area.", + "measured_bbox": [ + 0.0039, + 0.5389, + 0.6911, + 0.9893 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_grid_patterned_floor.png", + "raw_ref_image": "references/raw_ref_grid_patterned_floor_attempt_01.png", + "reference_verify": "references/reference_verify_grid_patterned_floor.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_grid_patterned_floor.png", + "mask": "references/sam_mask_grid_patterned_floor.png" + } + }, + { + "name": "shadowy_shape", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_165180.npy:object:2", + "source_name": "shadowy shape", + "source_description": "A large, unidentifiable shadowy shape in the center of the scene. Source dataset: Ego4D. Scene context: A very dark, low-visibility scene, possibly outdoors at night or in a deeply shadowed area, with vague shapes illuminated by faint ambient light.", + "sub_caption": "shadowy shape: A large, unidentifiable shadowy shape.. Scene role: Looming in the background, suggesting stacked boxes or furniture in the darkness.", + "measured_bbox": [ + 0.0709, + 0.0492, + 0.3072, + 0.2887 + ], + "detection_confidence": 0.8, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_shadowy_shape.png", + "raw_ref_image": "references/raw_ref_shadowy_shape_attempt_01.png", + "reference_verify": "references/reference_verify_shadowy_shape.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_shadowy_shape.png", + "mask": "references/sam_mask_shadowy_shape.png" + } + }, + { + "name": "blue_light_source", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_97566.npy:object:0", + "source_name": "blue light source", + "source_description": "A small, indistinct, hazy blue glowing area in the otherwise black image, appearing somewhat irregular in shape. Source dataset: Ego4D. Scene context: A very dark, almost completely black scene with a single, small, hazy blue glowing object or light source visible towards the right side.", + "sub_caption": "blue light source: A small, hazy blue glowing area, appearing slightly irregular in shape.. Scene role: A glowing component or hub device resting on the desk.", + "measured_bbox": [ + 0.3661, + 0.1534, + 0.448, + 0.2196 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light_source.png", + "raw_ref_image": "references/raw_ref_blue_light_source_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light_source.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_light_source.png", + "mask": "references/sam_mask_blue_light_source.png" + } + }, + { + "name": "thin_curved_object", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_259561.npy:object:0", + "source_name": "thin curved object", + "source_description": "A thin, metallic or reflective curved object held between the hands in the center of the image. Source dataset: Ego4D. Scene context: A close-up view of hands manipulating objects in a very dark setting, with only a few items partially visible under weak lighting.", + "sub_caption": "thin curved object: A thin, metallic or reflective curved object.. Scene role: Resting on the desk near the keyboard, resembling the band of a pair of headphones reflecting the monitor light.", + "measured_bbox": [ + 0.5384, + 0.1284, + 0.674, + 0.181 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_thin_curved_object.png", + "raw_ref_image": "references/raw_ref_thin_curved_object_attempt_01.png", + "reference_verify": "references/reference_verify_thin_curved_object.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_thin_curved_object.png", + "mask": "references/sam_mask_thin_curved_object.png" + } + }, + { + "name": "electronic_device", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "source_description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device.", + "sub_caption": "electronic device: A faint red rectangular shape with some texture, resembling a phone.. Scene role: Lying on the desk near the typist's left arm, casting a slight red glow that contrasts with the blue lights.", + "measured_bbox": [ + 0.4103, + 0.2767, + 0.5015, + 0.3797 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_electronic_device.png", + "raw_ref_image": "references/raw_ref_electronic_device_attempt_01.png", + "reference_verify": "references/reference_verify_electronic_device.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_electronic_device.png", + "mask": "references/sam_mask_electronic_device.png" + } + }, + { + "name": "blue_light", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_98584.npy:object:0", + "source_name": "blue light", + "source_description": "a small blue light source in a dark setting Source dataset: Ego4D. Scene context: A dark image with a small blue light.", + "sub_caption": "blue light: A small blue light source.. Scene role: A standby light on a computer tower sitting on the floor.", + "measured_bbox": [ + 0.523, + 0.747, + 0.539, + 0.788 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light.png", + "raw_ref_image": "references/raw_ref_blue_light_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_blue_light.png", + "mask": "references/sam_mask_blue_light.png" + } + }, + { + "name": "cable", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P02_137:object:11", + "source_name": "cable", + "source_description": "A light-colored cable trailing down from the counter area towards the floor near the dark bag. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a washing machine, a large exercise ball, and various items on a counter near a window.", + "sub_caption": "cable: A light-colored cable trailing down from the counter area towards the floor.. Scene role: Hanging down from the edge of the desk toward the floor, connecting devices.", + "measured_bbox": [ + 0.3307, + 0.1951, + 0.482, + 0.9804 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_cable.png", + "raw_ref_image": "references/raw_ref_cable_attempt_01.png", + "reference_verify": "references/reference_verify_cable.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_cable.png", + "mask": "references/sam_mask_cable.png" + } + }, + { + "name": "backpack", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P07_104:object:15", + "source_name": "backpack", + "source_description": "A blue and black backpack partially visible on the floor in the bottom left corner. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with unwashed dishes, cleaning supplies, a bunch of bananas, and an oven with a colorful towel hanging on its handle.", + "sub_caption": "backpack: A blue and black backpack.. Scene role: Resting quietly on the grid-patterned floor near the trailing cable.", + "measured_bbox": [ + 0.2708, + 0.6305, + 0.4662, + 0.9996 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_backpack.png", + "raw_ref_image": "references/raw_ref_backpack_attempt_01.png", + "reference_verify": "references/reference_verify_backpack.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_backpack.png", + "mask": "references/sam_mask_backpack.png" + } + }, + { + "name": "small_blue_rectangle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_25245.npy:object:1", + "source_name": "small blue rectangle", + "source_description": "A tiny, bright blue rectangular glow in the bottom right corner. Source dataset: Ego4D. Scene context: The image is almost completely dark, with a faint blue shape visible towards the center-right.", + "sub_caption": "small blue rectangle: A tiny, bright blue rectangular glow.. Scene role: A small secondary display or digital clock sitting on the corner of the desk.", + "measured_bbox": [ + 0.7651, + 0.0635, + 0.8412, + 0.1295 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_small_blue_rectangle.png", + "raw_ref_image": "references/raw_ref_small_blue_rectangle_attempt_01.png", + "reference_verify": "references/reference_verify_small_blue_rectangle.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "cached": true, + "output": "references/ref_small_blue_rectangle.png", + "mask": "references/sam_mask_small_blue_rectangle.png" + } + }, + { + "name": "dark_area", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192730.npy:object:3", + "source_name": "dark area", + "source_description": "A deeply shadowed region dominating the left side of the scene. Source dataset: Ego4D. Scene context: A dark room with a bright light reflecting off a wall, possibly near a doorway or closet.", + "sub_caption": "dark area: A deeply shadowed region dominating the space.. Scene role: Filling the left side of the room, creating an atmospheric, isolated mood around the typist's setup.", + "measured_bbox": [ + 0.003, + 0.0, + 0.3044, + 0.5863 + ], + "detection_confidence": 0.8, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_dark_area.png", + "raw_ref_image": "references/raw_ref_dark_area_attempt_09.png", + "reference_verify": "references/reference_verify_dark_area.json", + "reference_verify_passed": true, + "reference_attempts": 9, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000007/references/raw_ref_dark_area_attempt_09.png", + "output": "references/ref_dark_area.png", + "mask": "references/sam_mask_dark_area.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 88.0, + 52.0, + 897.0, + 902.0 + ], + "mask_score": 3.468953, + "mask_area_ratio": 0.45142, + "elapsed_seconds": 7.1708 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/vocab_task.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/vocab_task.json new file mode 100644 index 0000000000000000000000000000000000000000..c7daa7f9951394da079b5102744e4ad7d93b259d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000007/vocab_task.json @@ -0,0 +1,210 @@ +{ + "task_id": "sample_000007", + "sample_id": "sample_000007", + "sample_index": 7, + "target_total": 14, + "target_people": 1, + "target_objects": 13, + "people_candidates": [ + { + "candidate_index": 0, + "source_offset": 340, + "image_id": "Ego4D:ego4d_video/EGO_208022.npy:person:0", + "name": "person", + "description": "A person visible in the dark, wearing a blue shirt and a cap. Only part of the face, neck, and shoulder are illuminated. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a blue shirt and a cap in a very dark environment." + }, + { + "candidate_index": 1, + "source_offset": 855, + "image_id": "Ego4D:ego4d_video/EGO_39231.npy:person:0", + "name": "typist", + "description": "Visible as a dark silhouette with one hand interacting with the illuminated keyboard. Source dataset: Ego4D. Scene context: A close-up view of a person typing on a keyboard illuminated with blue backlighting in a dark room." + } + ], + "object_candidates": [ + { + "candidate_index": 0, + "source_offset": 7624, + "image_id": "Ego4D:ego4d_video/EGO_293237.npy:object:0", + "name": "textured fabric area", + "description": "A faint blueish, textured area on the left side of the image, possibly fabric or a rough surface. Source dataset: Ego4D. Scene context: An extremely dark scene with minimal visibility, showing only a few faint, blurry shapes and small points of light." + }, + { + "candidate_index": 1, + "source_offset": 8212, + "image_id": "Ego4D:ego4d_video/EGO_39224.npy:object:0", + "name": "textured surface", + "description": "A dark surface featuring a repeating pattern of small, raised shapes that catch the faint blue light, resembling a woven or dimpled fabric texture. Source dataset: Ego4D. Scene context: A close-up view of a textured surface, possibly fabric, illuminated by a faint blue light in an otherwise dark environment." + }, + { + "candidate_index": 2, + "source_offset": 1211, + "image_id": "EPIC-Kitchens:P04_114:object:1", + "name": "dish rack", + "description": "A large, clear plastic dish rack filled with dishes, placed inside the sink. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter corner with a sink, stove, appliances, and various items scattered around, including a pot with a cloth on the stove and a bottle of ketchup on the counter, with a towel on the floor." + }, + { + "candidate_index": 3, + "source_offset": 7265, + "image_id": "Ego4D:ego4d_video/EGO_260800.npy:object:0", + "name": "blue lines", + "description": "Faint, indistinct blue lines in the dark. Source dataset: Ego4D. Scene context: A very dark, almost pitch-black scene with only faint, indistinct blue lines visible in the lower right." + }, + { + "candidate_index": 4, + "source_offset": 3207, + "image_id": "EPIC-Kitchens:P30_108:object:17", + "name": "floor", + "description": "A grey tiled floor covering the kitchen area. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with dirty dishes in the sink, a toaster, a plate with a fork and knife, and a washing machine underneath." + }, + { + "candidate_index": 5, + "source_offset": 5653, + "image_id": "Ego4D:ego4d_video/EGO_196981.npy:object:0", + "name": "floor", + "description": "A dark, grid-patterned floor surface, likely made of tiles or a similar material, covering the right side of the image. Source dataset: Ego4D. Scene context: A close-up view of a floor corner with a grid-patterned surface meeting a solid, light-colored wall." + }, + { + "candidate_index": 6, + "source_offset": 2629, + "image_id": "EPIC-Kitchens:P25_107:object:6", + "name": "kitchen sink", + "description": "A stainless steel kitchen sink with a ribbed draining board on the left side and a curved metal faucet. Source dataset: EPIC-Kitchens. Scene context: A kitchen stove and sink area with a pan on the stove and some eggplants and zucchinis resting on the sink's draining board." + }, + { + "candidate_index": 7, + "source_offset": 5392, + "image_id": "Ego4D:ego4d_video/EGO_192222.npy:object:2", + "name": "white hexagonal shapes", + "description": "Two flat, white shapes with a hexagonal outline, slightly overlapping each other on the right side of the table. Source dataset: Ego4D. Scene context: A person's hands are interacting with hexagonal shapes and scissors on a white table." + }, + { + "candidate_index": 8, + "source_offset": 4687, + "image_id": "Ego4D:ego4d_video/EGO_165180.npy:object:2", + "name": "shadowy shape", + "description": "A large, unidentifiable shadowy shape in the center of the scene. Source dataset: Ego4D. Scene context: A very dark, low-visibility scene, possibly outdoors at night or in a deeply shadowed area, with vague shapes illuminated by faint ambient light." + }, + { + "candidate_index": 9, + "source_offset": 459, + "image_id": "EPIC-Kitchens:P02_136:object:3", + "name": "cabinetry", + "description": "Wooden kitchen cabinets with dark green panels and silver handles. Source dataset: EPIC-Kitchens. Scene context: A kitchen or utility room floor with a washing machine, a trash can, a paper bag, and other household items." + }, + { + "candidate_index": 10, + "source_offset": 1460, + "image_id": "EPIC-Kitchens:P06_105:object:13", + "name": "open shelves", + "description": "White open shelves on the wall to the right, holding various items including a yellow box. Source dataset: EPIC-Kitchens. Scene context: A view from a room looking into a kitchen, showing a refrigerator, washing machine, and various kitchen items." + }, + { + "candidate_index": 11, + "source_offset": 9607, + "image_id": "Ego4D:ego4d_video/EGO_97566.npy:object:0", + "name": "blue light source", + "description": "A small, indistinct, hazy blue glowing area in the otherwise black image, appearing somewhat irregular in shape. Source dataset: Ego4D. Scene context: A very dark, almost completely black scene with a single, small, hazy blue glowing object or light source visible towards the right side." + }, + { + "candidate_index": 12, + "source_offset": 7059, + "image_id": "Ego4D:ego4d_video/EGO_259561.npy:object:0", + "name": "thin curved object", + "description": "A thin, metallic or reflective curved object held between the hands in the center of the image. Source dataset: Ego4D. Scene context: A close-up view of hands manipulating objects in a very dark setting, with only a few items partially visible under weak lighting." + }, + { + "candidate_index": 13, + "source_offset": 5735, + "image_id": "Ego4D:ego4d_video/EGO_202338.npy:object:1", + "name": "small dark object", + "description": "A small, dark, unidentifiable object held in the left hand, possibly being painted or worked on. Source dataset: Ego4D. Scene context: A close-up view of hands holding a paintbrush and a small dark object, illuminated by a strong red light in an otherwise dark environment." + }, + { + "candidate_index": 14, + "source_offset": 7473, + "image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "name": "electronic device", + "description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device." + }, + { + "candidate_index": 15, + "source_offset": 3540, + "image_id": "EPIC-Kitchens:P34_108:object:9", + "name": "bottles and jars", + "description": "A collection of various bottles and jars containing sauces and spices, clustered together towards the back right of the counter. Source dataset: EPIC-Kitchens. Scene context: A first-person view looking down at a kitchen counter with various items, including a package of noodles and an assortment of bottles, while the person's feet in flip-flops and one hand are visible below." + }, + { + "candidate_index": 16, + "source_offset": 755, + "image_id": "EPIC-Kitchens:P03_117:object:4", + "name": "striped towel", + "description": "A towel with vertical black and white stripes hanging on the oven door handle. Source dataset: EPIC-Kitchens. Scene context: A high-angle view of a narrow, cluttered kitchen with a wooden floor, featuring counters on both sides, a stove at the far end, and a washing machine and refrigerator on the right." + }, + { + "candidate_index": 17, + "source_offset": 3119, + "image_id": "EPIC-Kitchens:P30_101:object:11", + "name": "plastic container", + "description": "A rectangular clear plastic container with a yellow lid and label, containing food. Source dataset: EPIC-Kitchens. Scene context: A person's hands are visible moving over a kitchen counter with various items including bread, a cutting board, knives, a sink with dishes, a toaster, and mugs." + }, + { + "candidate_index": 18, + "source_offset": 1085, + "image_id": "EPIC-Kitchens:P04_111:object:14", + "name": "yellow sponge", + "description": "A small yellow rectangular sponge resting on the edge of the kitchen sink. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with a frying pan containing chopped tofu on an induction stove, an open dishwasher rack, and various bottles and kitchen items scattered around." + }, + { + "candidate_index": 19, + "source_offset": 10224, + "image_id": "Ego4D:ego4d_video/EGO_98584.npy:object:0", + "name": "blue light", + "description": "a small blue light source in a dark setting Source dataset: Ego4D. Scene context: A dark image with a small blue light." + }, + { + "candidate_index": 20, + "source_offset": 486, + "image_id": "EPIC-Kitchens:P02_137:object:11", + "name": "cable", + "description": "A light-colored cable trailing down from the counter area towards the floor near the dark bag. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a washing machine, a large exercise ball, and various items on a counter near a window." + }, + { + "candidate_index": 21, + "source_offset": 1801, + "image_id": "EPIC-Kitchens:P07_114:object:20", + "name": "black trash bag", + "description": "A partially filled black plastic trash bag sitting on the floor near the radiator. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen with black countertops, light cream-colored cabinets, various appliances, and a dark wooden floor." + }, + { + "candidate_index": 22, + "source_offset": 1573, + "image_id": "EPIC-Kitchens:P07_104:object:15", + "name": "backpack", + "description": "A blue and black backpack partially visible on the floor in the bottom left corner. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with unwashed dishes, cleaning supplies, a bunch of bananas, and an oven with a colorful towel hanging on its handle." + }, + { + "candidate_index": 23, + "source_offset": 6829, + "image_id": "Ego4D:ego4d_video/EGO_25245.npy:object:1", + "name": "small blue rectangle", + "description": "A tiny, bright blue rectangular glow in the bottom right corner. Source dataset: Ego4D. Scene context: The image is almost completely dark, with a faint blue shape visible towards the center-right." + }, + { + "candidate_index": 24, + "source_offset": 5517, + "image_id": "Ego4D:ego4d_video/EGO_192730.npy:object:3", + "name": "dark area", + "description": "A deeply shadowed region dominating the left side of the scene. Source dataset: Ego4D. Scene context: A dark room with a bright light reflecting off a wall, possibly near a doorway or closet." + }, + { + "candidate_index": 25, + "source_offset": 2080, + "image_id": "EPIC-Kitchens:P22_107:object:6", + "name": "white bowl", + "description": "A small white bowl or cup resting sideways in the draining rack under the yellow bowl. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a double stainless steel kitchen sink with a draining rack and various items around it." + } + ], + "rng_seed": 1782661096, + "created_at": 1782259667.8115654 +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/bbox_overlay.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/bbox_overlay.png new file mode 100644 index 0000000000000000000000000000000000000000..b2d86089c3363ab9661ae6e89e480433e1e2a980 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/bbox_overlay.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cb4fd72a384d59ba252bdd0aa95bc79b7b6bc3916868451288e9be57d37934e +size 1186130 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/compose_prompt.txt b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/compose_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..c3acbc96513c2c4980a15aa1f6188fa9fff0bda9 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/compose_prompt.txt @@ -0,0 +1,115 @@ +Render the following JSON scene specification as a photorealistic 1280x720 image using a true 16:9 canvas. Every listed person and object must appear visibly in the image. Keep normal proportions and the requested aspect ratio. The foreground must contain only subjects explicitly listed in the JSON scene specification. Do not add any unlisted foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects. Background context may include non-localizable scenery only when it does not introduce a distinct foreground subject. No text, no labels, no logos, no watermarks. +JSON scene specification: +{ + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1280, + 720 + ], + "aspect_ratio": "16:9", + "style": "photorealistic" + }, + "scene": { + "setting": "a dimly lit, cyberpunk-style underground electronics workshop illuminated by harsh red, blue, and purple neon lights", + "activity": "several individuals are intensely focused on tinkering, soldering, and inspecting various electronic devices and glowing components in the shadows", + "composition": "wide shot showing a long cluttered workbench in the foreground where multiple sets of hands are visible working under colorful task lights, with silhouetted figures standing in the shadowy background; deep shadows and high contrast glowing elements", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 16:9 composition", + "final canvas size 1280x720", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_by_screens", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_165518.npy:person:0", + "source_name": "person", + "description": "a shadowy figure in dark clothing, faintly outlined by the glow of rectangular panels", + "role_in_scene": "standing in the background, monitoring the glowing screens" + }, + { + "name": "reaching_hands", + "source_index": 2, + "source_image_id": "Ego4D:ego4d_video/EGO_282541.npy:person:0", + "source_name": "person", + "description": "a pair of hands reaching forward, catching sharp reflections of red and blue ambient light", + "role_in_scene": "reaching across the workbench to grab a tool" + }, + { + "name": "hands_holding_part", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_76427.npy:person:0", + "source_name": "person", + "description": "hands bathed in deep red light, carefully gripping a mechanical object", + "role_in_scene": "holding a component steady on the workbench" + }, + { + "name": "hands_with_small_object", + "source_index": 4, + "source_image_id": "Ego4D:ego4d_video/EGO_76415.npy:person:0", + "source_name": "person", + "description": "hands illuminated by a red glow, with one hand open and the other pinching a tiny object", + "role_in_scene": "inspecting a small microchip" + }, + { + "name": "hands_with_smoldering_tool", + "source_index": 5, + "source_image_id": "Ego4D:ego4d_video/EGO_202366.npy:person:0", + "source_name": "person", + "description": "hands lit by red light, grasping a smoking device or tool", + "role_in_scene": "soldering a wire, emitting a trail of smoke" + }, + { + "name": "helmeted_figure", + "source_index": 6, + "source_image_id": "Ego4D:ego4d_video/EGO_282799.npy:person:0", + "source_name": "person", + "description": "a person partially obscured by darkness wearing a protective helmet and a clear face shield reflecting ambient light", + "role_in_scene": "leaning closely over the workbench to inspect the soldering work" + }, + { + "name": "silhouette_with_phone", + "source_index": 11, + "source_image_id": "Ego4D:ego4d_video/EGO_2030.npy:person:0", + "source_name": "person", + "description": "a dark silhouette starkly contrasted against bright red and purple lighting, holding up a rectangular device", + "role_in_scene": "standing on the side, illuminating the workspace with a phone flashlight" + }, + { + "name": "hands_passing_object", + "source_index": 13, + "source_image_id": "Ego4D:ego4d_video/EGO_76362.npy:person:0", + "source_name": "person", + "description": "hands bathed in dim red and blue reflections, tightly holding an unidentifiable dark object", + "role_in_scene": "passing a heavy piece of hardware to another worker" + } + ], + "objects": [ + { + "name": "blue_light_module", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_25200.npy:object:0", + "source_name": "blue light", + "description": "a small, intensely bright blue rectangular light glowing through the shadows", + "role_in_scene": "sitting on the edge of the workbench, casting a blue beam across the tools" + }, + { + "name": "red_device", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "description": "a faintly glowing red rectangular electronic device with a textured surface", + "role_in_scene": "lying flat on the workbench next to the busy hands, functioning as a diagnostic remote" + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_blue_light_module.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_blue_light_module.png new file mode 100644 index 0000000000000000000000000000000000000000..8575368b4b26b2b8b2aa92fb678f69f136e9539d Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_blue_light_module.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_holding_part.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_holding_part.png new file mode 100644 index 0000000000000000000000000000000000000000..027b7653acb9b05a8625a1dc942f75c1133d6ed2 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_holding_part.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c85c750f21bbc10ba4e14fa64dba216744db04f290fe29cdadb529c35cb15081 +size 148085 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_passing_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_passing_object.png new file mode 100644 index 0000000000000000000000000000000000000000..55f650896259f880a2c059e81cb563d41eca6a1a Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_passing_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_with_small_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_with_small_object.png new file mode 100644 index 0000000000000000000000000000000000000000..c9035c07573600f0c23e841ba58fcf57c3be1eff --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_with_small_object.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52686cd53e132d6f784d43737ccd97661080b52adcc85671dfb02ba1be8f80bb +size 173979 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_with_smoldering_tool.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_with_smoldering_tool.png new file mode 100644 index 0000000000000000000000000000000000000000..8275c430249ee0dea024c656d63d0ecb5e26b9ca Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_hands_with_smoldering_tool.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_helmeted_figure.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_helmeted_figure.png new file mode 100644 index 0000000000000000000000000000000000000000..5b52225a9a41a82fc8d5dac5cd5d1c7a47ecc870 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_helmeted_figure.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f79f5314038c7ec6789a51d66ff433dc9fec09df4c3cbd930b61d466ac2644 +size 398665 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_person_by_screens.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_person_by_screens.png new file mode 100644 index 0000000000000000000000000000000000000000..5f6e167beb57bb0b53330ca2d87b1848c03d3d86 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_person_by_screens.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8054b120d258fd595768c034e1ff19b40cc2115c464cf1491bfba11392cecda +size 116377 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_reaching_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_reaching_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..781803007768574ba97c77586ec93b08eec9b5d6 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_reaching_hands.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_red_device.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_red_device.png new file mode 100644 index 0000000000000000000000000000000000000000..c8b824f096ed82e9909b3e62d4b7d8fc28cda5dd Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_red_device.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_silhouette_with_phone.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_silhouette_with_phone.png new file mode 100644 index 0000000000000000000000000000000000000000..b65156fffd7dd43b33690b98ed3fc1fe09b5db59 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/detect_refine_silhouette_with_phone.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81aa9aa9ca83fc6b65133491abbee0c0231508a83a4c5e9791b90933b5d3107f +size 346381 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_blue_light_module.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_blue_light_module.png new file mode 100644 index 0000000000000000000000000000000000000000..d07ed7b809cdc40fbde7e3f683c046ad14ad96ca Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_blue_light_module.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_holding_part.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_holding_part.png new file mode 100644 index 0000000000000000000000000000000000000000..0bc9c9e9bd55b39a0689e8f90ba9e52364b81241 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_holding_part.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_passing_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_passing_object.png new file mode 100644 index 0000000000000000000000000000000000000000..4d366cbb06d85c5944e0b259db2ddffc2b07a1c9 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_passing_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_with_small_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_with_small_object.png new file mode 100644 index 0000000000000000000000000000000000000000..c1ea62468440ecc1961ccbd6801aca48754b3a68 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_with_small_object.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8870487d27bc8a2b564a6873820b89e65809f483036e52368bf9427de76dbc3 +size 138115 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_with_smoldering_tool.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_with_smoldering_tool.png new file mode 100644 index 0000000000000000000000000000000000000000..a6949936edbdf057d983015c8cc05602f3d7e539 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_hands_with_smoldering_tool.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:165f46d5485638d80a51f1b6bae71c2ce5b6839b337d9e4bbcba48bba13b467f +size 148310 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_helmeted_figure.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_helmeted_figure.png new file mode 100644 index 0000000000000000000000000000000000000000..38b668f5bae014f9db0bdbe61e588ac8411a3f4a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_helmeted_figure.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:321b7e1b803da102e35efc0d1ffa787cc07b106dc83a444b41d070a7ba2423b4 +size 506856 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_person_by_screens.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_person_by_screens.png new file mode 100644 index 0000000000000000000000000000000000000000..eb585358e99ca6fc093e108817d6bfba22c542dc --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_person_by_screens.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b2ea361968fe522a5a00826197d565cf5ef5ca779bf93fa451ea9e0cef4579 +size 220110 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_reaching_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_reaching_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..8a6579b876e25b1125cbf2fd94fcc5c33e006163 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_reaching_hands.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_red_device.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_red_device.png new file mode 100644 index 0000000000000000000000000000000000000000..4a53018a603c7cc3c8e746d730f428b06a4c3279 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_red_device.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_silhouette_with_phone.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_silhouette_with_phone.png new file mode 100644 index 0000000000000000000000000000000000000000..90eb77603b6cb615e574dec4b0607e285cbaf55f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/crops/diversify_input_silhouette_with_phone.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb532a98b2b7e875406debfa1e0e193087f2bbd82a8790a2a92d7f732cbd9cff +size 595042 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/detections.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/detections.json new file mode 100644 index 0000000000000000000000000000000000000000..bc002f6ef630149c50183298bae7f7d44316259f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/detections.json @@ -0,0 +1,192 @@ +[ + { + "name": "person_by_screens", + "present": true, + "bbox": [ + 0.1313, + 0.1321, + 0.2804, + 0.5589 + ], + "confidence": 0.95, + "notes": "The shadowy figure in dark clothing, outlined by the glow of the rectangular panels, stands facing away from the camera in the background.", + "coarse_bbox": [ + 0.133, + 0.132, + 0.278, + 0.581 + ], + "refine_crop": "crops/detect_refine_person_by_screens.png" + }, + { + "name": "reaching_hands", + "present": true, + "bbox": [ + 0.4313, + 0.3821, + 0.5678, + 0.5521 + ], + "confidence": 0.9, + "notes": "The primary hand reaching forward from the left side of the frame, catching red and blue light reflections.", + "coarse_bbox": [ + 0.444, + 0.342, + 0.566, + 0.556 + ], + "refine_crop": "crops/detect_refine_reaching_hands.png" + }, + { + "name": "hands_holding_part", + "present": true, + "bbox": [ + 0.2769, + 0.4337, + 0.3938, + 0.5372 + ], + "confidence": 0.95, + "notes": "Tight bounding box capturing the hands gripping a mechanical component under red light.", + "coarse_bbox": [ + 0.224, + 0.275, + 0.556, + 0.515 + ], + "refine_crop": "crops/detect_refine_hands_holding_part.png" + }, + { + "name": "hands_with_small_object", + "present": true, + "bbox": [ + 0.3697, + 0.5225, + 0.5251, + 0.7126 + ], + "confidence": 0.95, + "notes": "Red-illuminated hand pinching the microchip", + "coarse_bbox": [ + 0.24, + 0.434, + 0.526, + 0.717 + ], + "refine_crop": "crops/detect_refine_hands_with_small_object.png" + }, + { + "name": "hands_with_smoldering_tool", + "present": true, + "bbox": [ + 0.7136, + 0.5556, + 0.8779, + 0.7618 + ], + "confidence": 0.98, + "notes": "Both hands holding the soldering iron and solder are clearly visible and well-framed.", + "coarse_bbox": [ + 0.712, + 0.574, + 0.877, + 0.754 + ], + "refine_crop": "crops/detect_refine_hands_with_smoldering_tool.png" + }, + { + "name": "helmeted_figure", + "present": true, + "bbox": [ + 0.717, + 0.081, + 1.0, + 0.862 + ], + "confidence": 0.98, + "notes": "refine failed; using coarse bbox", + "coarse_bbox": [ + 0.717, + 0.081, + 1.0, + 0.862 + ], + "refine_crop": "crops/detect_refine_helmeted_figure.png" + }, + { + "name": "silhouette_with_phone", + "present": true, + "bbox": [ + 0.5717, + 0.0262, + 0.857, + 0.6433 + ], + "confidence": 0.95, + "notes": "The silhouette of a person standing in the background, holding a phone with the flashlight on.", + "coarse_bbox": [ + 0.57, + 0.025, + 0.857, + 0.644 + ], + "refine_crop": "crops/detect_refine_silhouette_with_phone.png" + }, + { + "name": "hands_passing_object", + "present": true, + "bbox": [ + 0.6035, + 0.4687, + 0.7144, + 0.5946 + ], + "confidence": "high", + "notes": "The hands in dim red and blue reflections holding the dark piece of hardware.", + "coarse_bbox": [ + 0.451, + 0.419, + 0.71, + 0.589 + ], + "refine_crop": "crops/detect_refine_hands_passing_object.png" + }, + { + "name": "blue_light_module", + "present": true, + "bbox": [ + 0.2793, + 0.8032, + 0.3903, + 0.9054 + ], + "confidence": 0.95, + "notes": "The blue light module including its dark casing.", + "coarse_bbox": [ + 0.275, + 0.794, + 0.396, + 0.91 + ], + "refine_crop": "crops/detect_refine_blue_light_module.png" + }, + { + "name": "red_device", + "present": true, + "bbox": [ + 0.7228, + 0.7939, + 0.902, + 0.8912 + ], + "confidence": 0.98, + "notes": "Tight bounding box around the red electronic device.", + "coarse_bbox": [ + 0.723, + 0.786, + 0.9, + 0.888 + ], + "refine_crop": "crops/detect_refine_red_device.png" + } +] diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/main_image.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/main_image.png new file mode 100644 index 0000000000000000000000000000000000000000..64c778c3e16a99f825beba2fa3e0e94ae04b1fbe --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/main_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cff7371c485afa8401f58ec9d5bfee8f3d4307a86a129563ff9c447e95309d6 +size 1292180 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/plan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/plan.json new file mode 100644 index 0000000000000000000000000000000000000000..0d1dab2fb885c3c8bc2a34734b95f96453c6cc65 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/plan.json @@ -0,0 +1,236 @@ +{ + "sample_id": "sample_000008", + "target_total": 10, + "target_people": 8, + "target_objects": 2, + "compose_prompt": { + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1280, + 720 + ], + "aspect_ratio": "16:9", + "style": "photorealistic" + }, + "scene": { + "setting": "a dimly lit, cyberpunk-style underground electronics workshop illuminated by harsh red, blue, and purple neon lights", + "activity": "several individuals are intensely focused on tinkering, soldering, and inspecting various electronic devices and glowing components in the shadows", + "composition": "wide shot showing a long cluttered workbench in the foreground where multiple sets of hands are visible working under colorful task lights, with silhouetted figures standing in the shadowy background; deep shadows and high contrast glowing elements", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 16:9 composition", + "final canvas size 1280x720", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_by_screens", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_165518.npy:person:0", + "source_name": "person", + "description": "a shadowy figure in dark clothing, faintly outlined by the glow of rectangular panels", + "role_in_scene": "standing in the background, monitoring the glowing screens" + }, + { + "name": "reaching_hands", + "source_index": 2, + "source_image_id": "Ego4D:ego4d_video/EGO_282541.npy:person:0", + "source_name": "person", + "description": "a pair of hands reaching forward, catching sharp reflections of red and blue ambient light", + "role_in_scene": "reaching across the workbench to grab a tool" + }, + { + "name": "hands_holding_part", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_76427.npy:person:0", + "source_name": "person", + "description": "hands bathed in deep red light, carefully gripping a mechanical object", + "role_in_scene": "holding a component steady on the workbench" + }, + { + "name": "hands_with_small_object", + "source_index": 4, + "source_image_id": "Ego4D:ego4d_video/EGO_76415.npy:person:0", + "source_name": "person", + "description": "hands illuminated by a red glow, with one hand open and the other pinching a tiny object", + "role_in_scene": "inspecting a small microchip" + }, + { + "name": "hands_with_smoldering_tool", + "source_index": 5, + "source_image_id": "Ego4D:ego4d_video/EGO_202366.npy:person:0", + "source_name": "person", + "description": "hands lit by red light, grasping a smoking device or tool", + "role_in_scene": "soldering a wire, emitting a trail of smoke" + }, + { + "name": "helmeted_figure", + "source_index": 6, + "source_image_id": "Ego4D:ego4d_video/EGO_282799.npy:person:0", + "source_name": "person", + "description": "a person partially obscured by darkness wearing a protective helmet and a clear face shield reflecting ambient light", + "role_in_scene": "leaning closely over the workbench to inspect the soldering work" + }, + { + "name": "silhouette_with_phone", + "source_index": 11, + "source_image_id": "Ego4D:ego4d_video/EGO_2030.npy:person:0", + "source_name": "person", + "description": "a dark silhouette starkly contrasted against bright red and purple lighting, holding up a rectangular device", + "role_in_scene": "standing on the side, illuminating the workspace with a phone flashlight" + }, + { + "name": "hands_passing_object", + "source_index": 13, + "source_image_id": "Ego4D:ego4d_video/EGO_76362.npy:person:0", + "source_name": "person", + "description": "hands bathed in dim red and blue reflections, tightly holding an unidentifiable dark object", + "role_in_scene": "passing a heavy piece of hardware to another worker" + } + ], + "objects": [ + { + "name": "blue_light_module", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_25200.npy:object:0", + "source_name": "blue light", + "description": "a small, intensely bright blue rectangular light glowing through the shadows", + "role_in_scene": "sitting on the edge of the workbench, casting a blue beam across the tools" + }, + { + "name": "red_device", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "description": "a faintly glowing red rectangular electronic device with a textured surface", + "role_in_scene": "lying flat on the workbench next to the busy hands, functioning as a diagnostic remote" + } + ] + }, + "expected_subjects": [ + { + "name": "person_by_screens", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_165518.npy:person:0", + "source_name": "person", + "source_description": "A figure visible in the center, mostly obscured by darkness, wearing dark clothing. Only a faint outline and some lighter parts of clothing or skin are visible. Source dataset: Ego4D. Scene context: A very dark, low-light indoor scene, possibly a room or stage, with a person partially visible in the center and illuminated rectangular panels or screens visible in the background and foreground.", + "sub_caption": "person: a shadowy figure in dark clothing, faintly outlined by the glow of rectangular panels. Scene role: standing in the background, monitoring the glowing screens", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "reaching_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282541.npy:person:0", + "source_name": "person", + "source_description": "Visible hands reaching forward. Source dataset: Ego4D. Scene context: A very dark image with red and blue lights, showing a person's hands.", + "sub_caption": "person: a pair of hands reaching forward, catching sharp reflections of red and blue ambient light. Scene role: reaching across the workbench to grab a tool", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "hands_holding_part", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76427.npy:person:0", + "source_name": "person", + "source_description": "Visible only by their hands, which are illuminated by red light. The hands are positioned as if holding or manipulating something. Source dataset: Ego4D. Scene context: A very dark image mostly showing black space, with dim reddish lighting catching what appears to be a person's hands holding an object.", + "sub_caption": "person: hands bathed in deep red light, carefully gripping a mechanical object. Scene role: holding a component steady on the workbench", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "hands_with_small_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76415.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, illuminated in red light. The left hand is open, and the right hand is partially obscured, appearing to hold a small object. Source dataset: Ego4D. Scene context: Two hands are visible in a dark environment, possibly holding or manipulating a small object.", + "sub_caption": "person: hands illuminated by a red glow, with one hand open and the other pinching a tiny object. Scene role: inspecting a small microchip", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "hands_with_smoldering_tool", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_202366.npy:person:0", + "source_name": "person", + "source_description": "Visible only by their hands, illuminated by a red light, holding and manipulating a smoking device. Source dataset: Ego4D. Scene context: A close-up view of hands holding a smoking device, illuminated by a red light in a very dark environment.", + "sub_caption": "person: hands lit by red light, grasping a smoking device or tool. Scene role: soldering a wire, emitting a trail of smoke", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "helmeted_figure", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282799.npy:person:0", + "source_name": "person", + "source_description": "A person whose features are mostly obscured by darkness; they are wearing a helmet and what appears to be a clear face shield. Source dataset: Ego4D. Scene context: A highly obscured and poorly lit scene where a person wearing a helmet and a face shield is somewhat visible.", + "sub_caption": "person: a person partially obscured by darkness wearing a protective helmet and a clear face shield reflecting ambient light. Scene role: leaning closely over the workbench to inspect the soldering work", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "silhouette_with_phone", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_2030.npy:person:0", + "source_name": "person", + "source_description": "A person visible mainly as a dark silhouette against the red and purple light, holding a rectangular object that appears to be a phone. Source dataset: Ego4D. Scene context: A dark, low-light scene illuminated by red and purple light, with a person holding a phone.", + "sub_caption": "person: a dark silhouette starkly contrasted against bright red and purple lighting, holding up a rectangular device. Scene role: standing on the side, illuminating the workspace with a phone flashlight", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "hands_passing_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76362.npy:person:0", + "source_name": "person", + "source_description": "Visible by their hands, which are illuminated by a red light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in a very dark environment with some red and blue light reflections.", + "sub_caption": "person: hands bathed in dim red and blue reflections, tightly holding an unidentifiable dark object. Scene role: passing a heavy piece of hardware to another worker", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "blue_light_module", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_25200.npy:object:0", + "source_name": "blue light", + "source_description": "A small, bright blue rectangular light glowing faintly in the lower right area of the dark scene. Source dataset: Ego4D. Scene context: The image is almost completely dark, showing only a small, faint blue rectangular light source near the bottom right.", + "sub_caption": "blue light: a small, intensely bright blue rectangular light glowing through the shadows. Scene role: sitting on the edge of the workbench, casting a blue beam across the tools", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "red_device", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "source_description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device.", + "sub_caption": "electronic device: a faintly glowing red rectangular electronic device with a textured surface. Scene role: lying flat on the workbench next to the busy hands, functioning as a diagnostic remote", + "ref_style": "white_bg_encyclopedia_photo" + } + ], + "vocab_task_path": "sample_000008/vocab_task.json", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references.json new file mode 100644 index 0000000000000000000000000000000000000000..7d7847d57d356f87e1a589cee6fb73bb54db375a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references.json @@ -0,0 +1,325 @@ +{ + "references": [ + { + "name": "person_by_screens", + "ref_image": "references/ref_person_by_screens.png", + "raw_ref_image": "references/raw_ref_person_by_screens_attempt_02.png", + "diversify_input": "crops/diversify_input_person_by_screens.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_person_by_screens_attempt_02.png", + "output": "references/ref_person_by_screens.png", + "mask": "references/sam_mask_person_by_screens.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 339.0, + 55.0, + 687.0, + 1009.0 + ], + "mask_score": 3.450623, + "mask_area_ratio": 0.151286, + "elapsed_seconds": 10.4397 + }, + "reference_verify": "references/reference_verify_person_by_screens.json", + "reference_verify_passed": true, + "reference_attempts": 2 + }, + { + "name": "reaching_hands", + "ref_image": "references/ref_reaching_hands.png", + "raw_ref_image": "references/raw_ref_reaching_hands_attempt_01.png", + "diversify_input": "crops/diversify_input_reaching_hands.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_reaching_hands_attempt_01.png", + "output": "references/ref_reaching_hands.png", + "mask": "references/sam_mask_reaching_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 388.0, + 183.0, + 637.0, + 867.0 + ], + "mask_score": 3.461161, + "mask_area_ratio": 0.070283, + "elapsed_seconds": 10.0646 + }, + "reference_verify": "references/reference_verify_reaching_hands.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "hands_holding_part", + "ref_image": "references/ref_hands_holding_part.png", + "raw_ref_image": "references/raw_ref_hands_holding_part_attempt_01.png", + "diversify_input": "crops/diversify_input_hands_holding_part.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_holding_part_attempt_01.png", + "output": "references/ref_hands_holding_part.png", + "mask": "references/sam_mask_hands_holding_part.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 345.0, + 90.0, + 675.0, + 1015.0 + ], + "mask_score": 3.449955, + "mask_area_ratio": 0.13985, + "elapsed_seconds": 9.9678 + }, + "reference_verify": "references/reference_verify_hands_holding_part.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "hands_with_small_object", + "ref_image": "references/ref_hands_with_small_object.png", + "raw_ref_image": "references/raw_ref_hands_with_small_object_attempt_01.png", + "diversify_input": "crops/diversify_input_hands_with_small_object.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_small_object_attempt_01.png", + "output": "references/ref_hands_with_small_object.png", + "mask": "references/sam_mask_hands_with_small_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 317.0, + 60.0, + 684.0, + 992.0 + ], + "mask_score": 3.441398, + "mask_area_ratio": 0.128698, + "elapsed_seconds": 10.4408 + }, + "reference_verify": "references/reference_verify_hands_with_small_object.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "hands_with_smoldering_tool", + "ref_image": "references/ref_hands_with_smoldering_tool.png", + "raw_ref_image": "references/raw_ref_hands_with_smoldering_tool_attempt_03.png", + "diversify_input": "crops/diversify_input_hands_with_smoldering_tool.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_smoldering_tool_attempt_03.png", + "output": "references/ref_hands_with_smoldering_tool.png", + "mask": "references/sam_mask_hands_with_smoldering_tool.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 344.0, + 43.0, + 685.0, + 1000.0 + ], + "mask_score": 3.455653, + "mask_area_ratio": 0.157475, + "elapsed_seconds": 10.0065 + }, + "reference_verify": "references/reference_verify_hands_with_smoldering_tool.json", + "reference_verify_passed": true, + "reference_attempts": 3 + }, + { + "name": "helmeted_figure", + "ref_image": "references/ref_helmeted_figure.png", + "raw_ref_image": "references/raw_ref_helmeted_figure_attempt_02.png", + "diversify_input": "crops/diversify_input_helmeted_figure.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_helmeted_figure_attempt_02.png", + "output": "references/ref_helmeted_figure.png", + "mask": "references/sam_mask_helmeted_figure.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 333.0, + 17.0, + 696.0, + 1018.0 + ], + "mask_score": 3.313146, + "mask_area_ratio": 0.166679, + "elapsed_seconds": 10.3423 + }, + "reference_verify": "references/reference_verify_helmeted_figure.json", + "reference_verify_passed": true, + "reference_attempts": 2 + }, + { + "name": "silhouette_with_phone", + "ref_image": "references/ref_silhouette_with_phone.png", + "raw_ref_image": "references/raw_ref_silhouette_with_phone_attempt_01.png", + "diversify_input": "crops/diversify_input_silhouette_with_phone.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_silhouette_with_phone_attempt_01.png", + "output": "references/ref_silhouette_with_phone.png", + "mask": "references/sam_mask_silhouette_with_phone.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 317.0, + 16.0, + 694.0, + 1018.0 + ], + "mask_score": 3.400937, + "mask_area_ratio": 0.174557, + "elapsed_seconds": 10.3827 + }, + "reference_verify": "references/reference_verify_silhouette_with_phone.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "hands_passing_object", + "ref_image": "references/ref_hands_passing_object.png", + "raw_ref_image": "references/raw_ref_hands_passing_object_attempt_02.png", + "diversify_input": "crops/diversify_input_hands_passing_object.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_passing_object_attempt_02.png", + "output": "references/ref_hands_passing_object.png", + "mask": "references/sam_mask_hands_passing_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 338.0, + 17.0, + 690.0, + 1014.0 + ], + "mask_score": 3.418609, + "mask_area_ratio": 0.166451, + "elapsed_seconds": 11.3527 + }, + "reference_verify": "references/reference_verify_hands_passing_object.json", + "reference_verify_passed": true, + "reference_attempts": 2 + }, + { + "name": "blue_light_module", + "ref_image": "references/ref_blue_light_module.png", + "raw_ref_image": "references/raw_ref_blue_light_module_attempt_01.png", + "diversify_input": "crops/diversify_input_blue_light_module.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_blue_light_module_attempt_01.png", + "output": "references/ref_blue_light_module.png", + "mask": "references/sam_mask_blue_light_module.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 4.0, + 250.0, + 978.0, + 796.0 + ], + "mask_score": 3.46793, + "mask_area_ratio": 0.374003, + "elapsed_seconds": 10.496 + }, + "reference_verify": "references/reference_verify_blue_light_module.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "red_device", + "ref_image": "references/ref_red_device.png", + "raw_ref_image": "references/raw_ref_red_device_attempt_01.png", + "diversify_input": "crops/diversify_input_red_device.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_red_device_attempt_01.png", + "output": "references/ref_red_device.png", + "mask": "references/sam_mask_red_device.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 76.0, + 282.0, + 989.0, + 762.0 + ], + "mask_score": 3.430848, + "mask_area_ratio": 0.198863, + "elapsed_seconds": 11.2516 + }, + "reference_verify": "references/reference_verify_red_device.json", + "reference_verify_passed": true, + "reference_attempts": 1 + } + ], + "reference_errors": {} +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_blue_light_module.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_blue_light_module.png new file mode 100644 index 0000000000000000000000000000000000000000..9ccc5fe423674b3e1837bc678eadcad89f9c1ee9 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_blue_light_module.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d9e7410c3631a6731526a465b95750c6e9b816a3fcdf513d37573da6630243 +size 589329 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_holding_part.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_holding_part.png new file mode 100644 index 0000000000000000000000000000000000000000..13db6db57193da86912518c752685013dc41d2a1 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_holding_part.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:162ebfce397c958d923b9440e0920b5285382185658aaed158db1c1fd3d1042c +size 263704 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_passing_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_passing_object.png new file mode 100644 index 0000000000000000000000000000000000000000..a68392bb1321b0a908afe85c1ea160ae4a0e6ac9 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_passing_object.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af3204495779a0745a04839f16b626ed7ae888db3b315f03aab5583f10a649a +size 295059 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_with_small_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_with_small_object.png new file mode 100644 index 0000000000000000000000000000000000000000..78aa29eaa5b2c581a9c7a36f40a2b847465cd6cc --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_with_small_object.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0216fd48da63db385b163fa8e83779cb189314471290c23597f2ffdf408f13e6 +size 254078 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_with_smoldering_tool.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_with_smoldering_tool.png new file mode 100644 index 0000000000000000000000000000000000000000..a5c8172b24bf2769b385119d9cdca4da01210bc3 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_hands_with_smoldering_tool.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d078e12facc585fef9e142a9d9265842124a56085777d4db970d110526f0a32e +size 287681 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_helmeted_figure.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_helmeted_figure.png new file mode 100644 index 0000000000000000000000000000000000000000..d4b977872b40b64189efd7f7917c8ee7e5997f0c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_helmeted_figure.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f53be67aa40beca0b6b3d01c6ec864248eb6c9a076b8ddbc7553d6f2c25c031 +size 329768 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_person_by_screens.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_person_by_screens.png new file mode 100644 index 0000000000000000000000000000000000000000..27e2f9d9fd48fb8fd08c7205fe7757b508bec344 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_person_by_screens.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c542880952c8f9190400a08e68cb2e21a7cf0c473077fefd0289e432f87cee72 +size 279739 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_reaching_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_reaching_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..1f1c74241a4b1770380515f8f39a9f8629a5d73c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_reaching_hands.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a97de64fca327b03ddca13e84398b0e05d60a4828e7aee21218a0cf695a96b +size 139385 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_red_device.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_red_device.png new file mode 100644 index 0000000000000000000000000000000000000000..ffc75cfc6dec3d215763b075f6ece872a8c72d4a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_red_device.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50dee9a1d80ecf644b7bf2b76d58bbc9550edff8406e70bcb02eef7fb1f6078f +size 443762 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_silhouette_with_phone.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_silhouette_with_phone.png new file mode 100644 index 0000000000000000000000000000000000000000..84803f436839d6f99a0326360a425a28a94f2a54 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/ref_silhouette_with_phone.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896b19324b65082602ee71caac1b8520394e74451812a1a27e1a46e92834dad2 +size 319451 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_blue_light_module.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_blue_light_module.json new file mode 100644 index 0000000000000000000000000000000000000000..33ac23e1d7137f27decc40a7fc3dae1f46123301 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_blue_light_module.json @@ -0,0 +1,46 @@ +{ + "name": "blue_light_module", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_blue_light_module_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_blue_light_module_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_blue_light_module_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_blue_light_module_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_blue_light_module_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_blue_light_module_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 4.0, + 250.0, + 978.0, + 796.0 + ], + "mask_score": 3.46793, + "mask_area_ratio": 0.374003, + "elapsed_seconds": 10.496 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "A complete, isolated blue LED light module with wires on a white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_holding_part.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_holding_part.json new file mode 100644 index 0000000000000000000000000000000000000000..1411871f9559d66d95d6da6392bb4fff5eed7e8f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_holding_part.json @@ -0,0 +1,46 @@ +{ + "name": "hands_holding_part", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_hands_holding_part_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_hands_holding_part_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_holding_part_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_holding_part_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_hands_holding_part_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_hands_holding_part_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 345.0, + 90.0, + 675.0, + 1015.0 + ], + "mask_score": 3.449955, + "mask_area_ratio": 0.13985, + "elapsed_seconds": 9.9678 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body visible, uncropped, white background. Meets all hard requirements for a person reference." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_passing_object.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_passing_object.json new file mode 100644 index 0000000000000000000000000000000000000000..40ddde0ca88616e93cdefdb224de862c03069e66 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_passing_object.json @@ -0,0 +1,88 @@ +{ + "name": "hands_passing_object", + "passed": true, + "accepted_attempt": 2, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_hands_passing_object_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_hands_passing_object_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_passing_object_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_passing_object_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_hands_passing_object_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_hands_passing_object_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 272.0, + 41.0, + 744.0, + 1023.0 + ], + "mask_score": 2.949319, + "mask_area_ratio": 0.321181, + "elapsed_seconds": 10.6845 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "Not a full body image", + "Legs and feet are cropped by the bottom boundary" + ], + "notes": "The image shows a person from the head to the upper thighs. It is an upper-body shot, failing the full-body requirement." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_hands_passing_object_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_hands_passing_object_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_passing_object_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_passing_object_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_hands_passing_object_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_hands_passing_object_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 338.0, + 17.0, + 690.0, + 1014.0 + ], + "mask_score": 3.418609, + "mask_area_ratio": 0.166451, + "elapsed_seconds": 11.3527 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body visible with white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_with_small_object.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_with_small_object.json new file mode 100644 index 0000000000000000000000000000000000000000..0608a7ba0771a5a6e7aaf762ba91470351842cee --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_with_small_object.json @@ -0,0 +1,46 @@ +{ + "name": "hands_with_small_object", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_hands_with_small_object_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_hands_with_small_object_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_with_small_object_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_small_object_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_hands_with_small_object_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_hands_with_small_object_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 317.0, + 60.0, + 684.0, + 992.0 + ], + "mask_score": 3.441398, + "mask_area_ratio": 0.128698, + "elapsed_seconds": 10.4408 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a full body of a person isolated on a white background with no cropping. The hands have a red glow and are pinching/holding a small object as requested." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_with_smoldering_tool.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_with_smoldering_tool.json new file mode 100644 index 0000000000000000000000000000000000000000..e2c4aa3f71e6f8ec12ba99e354b279ed4f24d8a5 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_hands_with_smoldering_tool.json @@ -0,0 +1,129 @@ +{ + "name": "hands_with_smoldering_tool", + "passed": true, + "accepted_attempt": 3, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_hands_with_smoldering_tool_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_hands_with_smoldering_tool_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_with_smoldering_tool_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_smoldering_tool_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_hands_with_smoldering_tool_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_hands_with_smoldering_tool_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 327.0, + 9.0, + 696.0, + 1017.0 + ], + "mask_score": 3.052758, + "mask_area_ratio": 0.159752, + "elapsed_seconds": 9.9645 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "The person's head and face are severely corrupted/distorted with white artifacts, failing to provide a complete and recognizable full body." + ], + "notes": "The image shows a full body, but the head and face area is heavily distorted and obscured by white patches, making it unacceptable as a complete subject reference." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_hands_with_smoldering_tool_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_hands_with_smoldering_tool_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_with_smoldering_tool_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_smoldering_tool_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_hands_with_smoldering_tool_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_hands_with_smoldering_tool_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 167.0, + 17.0, + 850.0, + 1023.0 + ], + "mask_score": 3.002517, + "mask_area_ratio": 0.34168, + "elapsed_seconds": 10.1038 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "The person is only a half-body crop, missing the lower body and feet.", + "The full body is not visible." + ], + "notes": "The image shows an upper-body view of a person. The bottom portion (legs, feet) is truncated, which violates the full-body visibility hard requirement." + } + }, + { + "attempt": 3, + "raw_ref_image": "references/raw_ref_hands_with_smoldering_tool_attempt_03.png", + "candidate_ref_image": "references/candidate_ref_hands_with_smoldering_tool_attempt_03.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_with_smoldering_tool_attempt_03.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_smoldering_tool_attempt_03.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_hands_with_smoldering_tool_attempt_03.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_hands_with_smoldering_tool_attempt_03.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 344.0, + 43.0, + 685.0, + 1000.0 + ], + "mask_score": 3.455653, + "mask_area_ratio": 0.157475, + "elapsed_seconds": 10.0065 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body person is visible and isolated on a white background with no cropping. The specific actions and props from the caption are missing, but all structural requirements for a full-body person reference image are fully satisfied." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_helmeted_figure.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_helmeted_figure.json new file mode 100644 index 0000000000000000000000000000000000000000..104f015c1faac711ace60e94a291b87d9f97c108 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_helmeted_figure.json @@ -0,0 +1,88 @@ +{ + "name": "helmeted_figure", + "passed": true, + "accepted_attempt": 2, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_helmeted_figure_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_helmeted_figure_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_helmeted_figure_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_helmeted_figure_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_helmeted_figure_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_helmeted_figure_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 318.0, + 9.0, + 706.0, + 1015.0 + ], + "mask_score": 1.423523, + "mask_area_ratio": 0.047886, + "elapsed_seconds": 11.8102 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": false, + "white_background": true, + "failure_reasons": [ + "The person's head, face, hands, and feet are missing.", + "The subject is poorly rendered and severely incomplete." + ], + "notes": "The image displays a disjointed, heavily artifacted silhouette of clothing, completely missing the head, face, hands, feet, and any recognizable human features." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_helmeted_figure_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_helmeted_figure_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_helmeted_figure_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_helmeted_figure_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_helmeted_figure_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_helmeted_figure_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 333.0, + 17.0, + 696.0, + 1018.0 + ], + "mask_score": 3.313146, + "mask_area_ratio": 0.166679, + "elapsed_seconds": 10.3423 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body visible, well-isolated, no cropping, white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_person_by_screens.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_person_by_screens.json new file mode 100644 index 0000000000000000000000000000000000000000..15932f87c884c19d1e454f0484d85b613bd09917 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_person_by_screens.json @@ -0,0 +1,88 @@ +{ + "name": "person_by_screens", + "passed": true, + "accepted_attempt": 2, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_by_screens_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_by_screens_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_by_screens_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_person_by_screens_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_person_by_screens_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_person_by_screens_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 319.0, + 6.0, + 702.0, + 1023.0 + ], + "mask_score": 0.661541, + "mask_area_ratio": 0.07061, + "elapsed_seconds": 21.8886 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": false, + "white_background": true, + "failure_reasons": [ + "The person's head, upper body, arms, and feet are missing.", + "The visible portion is heavily artifacted and incomplete." + ], + "notes": "The image appears to have failed to generate the subject properly, leaving only glitchy artifacts resembling pants." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_person_by_screens_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_person_by_screens_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_by_screens_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_person_by_screens_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_person_by_screens_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_person_by_screens_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 339.0, + 55.0, + 687.0, + 1009.0 + ], + "mask_score": 3.450623, + "mask_area_ratio": 0.151286, + "elapsed_seconds": 10.4397 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a full body of a person standing on a white background with good margins." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_reaching_hands.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_reaching_hands.json new file mode 100644 index 0000000000000000000000000000000000000000..944ce7e89abe68123fdd94d0042c53bb84e4cba8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_reaching_hands.json @@ -0,0 +1,46 @@ +{ + "name": "reaching_hands", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_reaching_hands_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_reaching_hands_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_reaching_hands_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_reaching_hands_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_reaching_hands_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_reaching_hands_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 388.0, + 183.0, + 637.0, + 867.0 + ], + "mask_score": 3.461161, + "mask_area_ratio": 0.070283, + "elapsed_seconds": 10.0646 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body of the person is clearly visible with ample white margin. None of the body parts are cropped, perfectly satisfying all hard requirements for a person reference image." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_red_device.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_red_device.json new file mode 100644 index 0000000000000000000000000000000000000000..9e8f1e2bec0374c4eeac7b61480ebcfc0353ee2e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_red_device.json @@ -0,0 +1,46 @@ +{ + "name": "red_device", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_red_device_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_red_device_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_red_device_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_red_device_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_red_device_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_red_device_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 76.0, + 282.0, + 989.0, + 762.0 + ], + "mask_score": 3.430848, + "mask_area_ratio": 0.198863, + "elapsed_seconds": 11.2516 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The intended subject (a faintly glowing red rectangular electronic device with a textured surface) is clearly visible, complete, isolated on a white background, and not cropped." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_silhouette_with_phone.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_silhouette_with_phone.json new file mode 100644 index 0000000000000000000000000000000000000000..ab15c7dffe3f579a9795c23f47e4d8375b631364 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/reference_verify_silhouette_with_phone.json @@ -0,0 +1,46 @@ +{ + "name": "silhouette_with_phone", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_silhouette_with_phone_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_silhouette_with_phone_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_silhouette_with_phone_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_silhouette_with_phone_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_ref_silhouette_with_phone_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/candidate_sam_mask_silhouette_with_phone_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 317.0, + 16.0, + 694.0, + 1018.0 + ], + "mask_score": 3.400937, + "mask_area_ratio": 0.174557, + "elapsed_seconds": 10.3827 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a full body of a person isolated on a white background with no cropping. It satisfies all hard requirements." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_blue_light_module.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_blue_light_module.png new file mode 100644 index 0000000000000000000000000000000000000000..5e9ff9addda7c34c2e9b275a477cb53b3c035adb Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_blue_light_module.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_holding_part.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_holding_part.png new file mode 100644 index 0000000000000000000000000000000000000000..0c8e7796e74dbfef60f5e4f3d9ba744999129081 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_holding_part.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_passing_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_passing_object.png new file mode 100644 index 0000000000000000000000000000000000000000..e07287b737f09c54bd7af23709139051343b611c Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_passing_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_with_small_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_with_small_object.png new file mode 100644 index 0000000000000000000000000000000000000000..65e2f9fb252b00317bcb3dd0065f53abf21ea350 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_with_small_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_with_smoldering_tool.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_with_smoldering_tool.png new file mode 100644 index 0000000000000000000000000000000000000000..406357f7c7b28576df1e013ea1d86823baee9dda Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_hands_with_smoldering_tool.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_helmeted_figure.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_helmeted_figure.png new file mode 100644 index 0000000000000000000000000000000000000000..bf02af7280d302782814a6d6d3159f400dcd47c7 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_helmeted_figure.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_person_by_screens.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_person_by_screens.png new file mode 100644 index 0000000000000000000000000000000000000000..7faf0e97ae5d4d9f99e46b84376950e497796a80 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_person_by_screens.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_reaching_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_reaching_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..fdae3f188b2ca7cb524bdde36e45bed048a7a3bc Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_reaching_hands.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_red_device.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_red_device.png new file mode 100644 index 0000000000000000000000000000000000000000..98f91b3cce988cd4d09b0f9cd024500606e35b60 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_red_device.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_silhouette_with_phone.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_silhouette_with_phone.png new file mode 100644 index 0000000000000000000000000000000000000000..415ed3d3b192cf7235e644e919d5573ff9b8f6e5 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/references/sam_mask_silhouette_with_phone.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/row.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/row.json new file mode 100644 index 0000000000000000000000000000000000000000..a36382d0446bca5edf1f904ad1b1097f389ca6c0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/row.json @@ -0,0 +1,486 @@ +{ + "sample_id": "sample_000008", + "target_total": 10, + "target_people": 8, + "target_objects": 2, + "canvas_size": [ + 1280, + 720 + ], + "canvas_aspect_ratio": "16:9", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 10, + "n_detected": 10, + "n_subjects": 10, + "subjects": [ + { + "name": "person_by_screens", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_165518.npy:person:0", + "source_name": "person", + "source_description": "A figure visible in the center, mostly obscured by darkness, wearing dark clothing. Only a faint outline and some lighter parts of clothing or skin are visible. Source dataset: Ego4D. Scene context: A very dark, low-light indoor scene, possibly a room or stage, with a person partially visible in the center and illuminated rectangular panels or screens visible in the background and foreground.", + "sub_caption": "person: a shadowy figure in dark clothing, faintly outlined by the glow of rectangular panels. Scene role: standing in the background, monitoring the glowing screens", + "measured_bbox": [ + 0.1313, + 0.1321, + 0.2804, + 0.5589 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_by_screens.png", + "raw_ref_image": "references/raw_ref_person_by_screens_attempt_02.png", + "reference_verify": "references/reference_verify_person_by_screens.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_person_by_screens_attempt_02.png", + "output": "references/ref_person_by_screens.png", + "mask": "references/sam_mask_person_by_screens.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 339.0, + 55.0, + 687.0, + 1009.0 + ], + "mask_score": 3.450623, + "mask_area_ratio": 0.151286, + "elapsed_seconds": 10.4397 + } + }, + { + "name": "reaching_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282541.npy:person:0", + "source_name": "person", + "source_description": "Visible hands reaching forward. Source dataset: Ego4D. Scene context: A very dark image with red and blue lights, showing a person's hands.", + "sub_caption": "person: a pair of hands reaching forward, catching sharp reflections of red and blue ambient light. Scene role: reaching across the workbench to grab a tool", + "measured_bbox": [ + 0.4313, + 0.3821, + 0.5678, + 0.5521 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_reaching_hands.png", + "raw_ref_image": "references/raw_ref_reaching_hands_attempt_01.png", + "reference_verify": "references/reference_verify_reaching_hands.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_reaching_hands_attempt_01.png", + "output": "references/ref_reaching_hands.png", + "mask": "references/sam_mask_reaching_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 388.0, + 183.0, + 637.0, + 867.0 + ], + "mask_score": 3.461161, + "mask_area_ratio": 0.070283, + "elapsed_seconds": 10.0646 + } + }, + { + "name": "hands_holding_part", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76427.npy:person:0", + "source_name": "person", + "source_description": "Visible only by their hands, which are illuminated by red light. The hands are positioned as if holding or manipulating something. Source dataset: Ego4D. Scene context: A very dark image mostly showing black space, with dim reddish lighting catching what appears to be a person's hands holding an object.", + "sub_caption": "person: hands bathed in deep red light, carefully gripping a mechanical object. Scene role: holding a component steady on the workbench", + "measured_bbox": [ + 0.2769, + 0.4337, + 0.3938, + 0.5372 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_holding_part.png", + "raw_ref_image": "references/raw_ref_hands_holding_part_attempt_01.png", + "reference_verify": "references/reference_verify_hands_holding_part.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_holding_part_attempt_01.png", + "output": "references/ref_hands_holding_part.png", + "mask": "references/sam_mask_hands_holding_part.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 345.0, + 90.0, + 675.0, + 1015.0 + ], + "mask_score": 3.449955, + "mask_area_ratio": 0.13985, + "elapsed_seconds": 9.9678 + } + }, + { + "name": "hands_with_small_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76415.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, illuminated in red light. The left hand is open, and the right hand is partially obscured, appearing to hold a small object. Source dataset: Ego4D. Scene context: Two hands are visible in a dark environment, possibly holding or manipulating a small object.", + "sub_caption": "person: hands illuminated by a red glow, with one hand open and the other pinching a tiny object. Scene role: inspecting a small microchip", + "measured_bbox": [ + 0.3697, + 0.5225, + 0.5251, + 0.7126 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_with_small_object.png", + "raw_ref_image": "references/raw_ref_hands_with_small_object_attempt_01.png", + "reference_verify": "references/reference_verify_hands_with_small_object.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_small_object_attempt_01.png", + "output": "references/ref_hands_with_small_object.png", + "mask": "references/sam_mask_hands_with_small_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 317.0, + 60.0, + 684.0, + 992.0 + ], + "mask_score": 3.441398, + "mask_area_ratio": 0.128698, + "elapsed_seconds": 10.4408 + } + }, + { + "name": "hands_with_smoldering_tool", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_202366.npy:person:0", + "source_name": "person", + "source_description": "Visible only by their hands, illuminated by a red light, holding and manipulating a smoking device. Source dataset: Ego4D. Scene context: A close-up view of hands holding a smoking device, illuminated by a red light in a very dark environment.", + "sub_caption": "person: hands lit by red light, grasping a smoking device or tool. Scene role: soldering a wire, emitting a trail of smoke", + "measured_bbox": [ + 0.7136, + 0.5556, + 0.8779, + 0.7618 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_with_smoldering_tool.png", + "raw_ref_image": "references/raw_ref_hands_with_smoldering_tool_attempt_03.png", + "reference_verify": "references/reference_verify_hands_with_smoldering_tool.json", + "reference_verify_passed": true, + "reference_attempts": 3, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_with_smoldering_tool_attempt_03.png", + "output": "references/ref_hands_with_smoldering_tool.png", + "mask": "references/sam_mask_hands_with_smoldering_tool.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 344.0, + 43.0, + 685.0, + 1000.0 + ], + "mask_score": 3.455653, + "mask_area_ratio": 0.157475, + "elapsed_seconds": 10.0065 + } + }, + { + "name": "helmeted_figure", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282799.npy:person:0", + "source_name": "person", + "source_description": "A person whose features are mostly obscured by darkness; they are wearing a helmet and what appears to be a clear face shield. Source dataset: Ego4D. Scene context: A highly obscured and poorly lit scene where a person wearing a helmet and a face shield is somewhat visible.", + "sub_caption": "person: a person partially obscured by darkness wearing a protective helmet and a clear face shield reflecting ambient light. Scene role: leaning closely over the workbench to inspect the soldering work", + "measured_bbox": [ + 0.717, + 0.081, + 1.0, + 0.862 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_helmeted_figure.png", + "raw_ref_image": "references/raw_ref_helmeted_figure_attempt_02.png", + "reference_verify": "references/reference_verify_helmeted_figure.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_helmeted_figure_attempt_02.png", + "output": "references/ref_helmeted_figure.png", + "mask": "references/sam_mask_helmeted_figure.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 333.0, + 17.0, + 696.0, + 1018.0 + ], + "mask_score": 3.313146, + "mask_area_ratio": 0.166679, + "elapsed_seconds": 10.3423 + } + }, + { + "name": "silhouette_with_phone", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_2030.npy:person:0", + "source_name": "person", + "source_description": "A person visible mainly as a dark silhouette against the red and purple light, holding a rectangular object that appears to be a phone. Source dataset: Ego4D. Scene context: A dark, low-light scene illuminated by red and purple light, with a person holding a phone.", + "sub_caption": "person: a dark silhouette starkly contrasted against bright red and purple lighting, holding up a rectangular device. Scene role: standing on the side, illuminating the workspace with a phone flashlight", + "measured_bbox": [ + 0.5717, + 0.0262, + 0.857, + 0.6433 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_silhouette_with_phone.png", + "raw_ref_image": "references/raw_ref_silhouette_with_phone_attempt_01.png", + "reference_verify": "references/reference_verify_silhouette_with_phone.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_silhouette_with_phone_attempt_01.png", + "output": "references/ref_silhouette_with_phone.png", + "mask": "references/sam_mask_silhouette_with_phone.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 317.0, + 16.0, + 694.0, + 1018.0 + ], + "mask_score": 3.400937, + "mask_area_ratio": 0.174557, + "elapsed_seconds": 10.3827 + } + }, + { + "name": "hands_passing_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76362.npy:person:0", + "source_name": "person", + "source_description": "Visible by their hands, which are illuminated by a red light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in a very dark environment with some red and blue light reflections.", + "sub_caption": "person: hands bathed in dim red and blue reflections, tightly holding an unidentifiable dark object. Scene role: passing a heavy piece of hardware to another worker", + "measured_bbox": [ + 0.6035, + 0.4687, + 0.7144, + 0.5946 + ], + "detection_confidence": "high", + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_passing_object.png", + "raw_ref_image": "references/raw_ref_hands_passing_object_attempt_02.png", + "reference_verify": "references/reference_verify_hands_passing_object.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_hands_passing_object_attempt_02.png", + "output": "references/ref_hands_passing_object.png", + "mask": "references/sam_mask_hands_passing_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 338.0, + 17.0, + 690.0, + 1014.0 + ], + "mask_score": 3.418609, + "mask_area_ratio": 0.166451, + "elapsed_seconds": 11.3527 + } + }, + { + "name": "blue_light_module", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_25200.npy:object:0", + "source_name": "blue light", + "source_description": "A small, bright blue rectangular light glowing faintly in the lower right area of the dark scene. Source dataset: Ego4D. Scene context: The image is almost completely dark, showing only a small, faint blue rectangular light source near the bottom right.", + "sub_caption": "blue light: a small, intensely bright blue rectangular light glowing through the shadows. Scene role: sitting on the edge of the workbench, casting a blue beam across the tools", + "measured_bbox": [ + 0.2793, + 0.8032, + 0.3903, + 0.9054 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_light_module.png", + "raw_ref_image": "references/raw_ref_blue_light_module_attempt_01.png", + "reference_verify": "references/reference_verify_blue_light_module.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_blue_light_module_attempt_01.png", + "output": "references/ref_blue_light_module.png", + "mask": "references/sam_mask_blue_light_module.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 4.0, + 250.0, + 978.0, + 796.0 + ], + "mask_score": 3.46793, + "mask_area_ratio": 0.374003, + "elapsed_seconds": 10.496 + } + }, + { + "name": "red_device", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "source_name": "electronic device", + "source_description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device.", + "sub_caption": "electronic device: a faintly glowing red rectangular electronic device with a textured surface. Scene role: lying flat on the workbench next to the busy hands, functioning as a diagnostic remote", + "measured_bbox": [ + 0.7228, + 0.7939, + 0.902, + 0.8912 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_device.png", + "raw_ref_image": "references/raw_ref_red_device_attempt_01.png", + "reference_verify": "references/reference_verify_red_device.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000008/references/raw_ref_red_device_attempt_01.png", + "output": "references/ref_red_device.png", + "mask": "references/sam_mask_red_device.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 76.0, + 282.0, + 989.0, + 762.0 + ], + "mask_score": 3.430848, + "mask_area_ratio": 0.198863, + "elapsed_seconds": 11.2516 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/vocab_task.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/vocab_task.json new file mode 100644 index 0000000000000000000000000000000000000000..49a2e7cb2dd266119d406162cbf2bb3e6ff37bed --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000008/vocab_task.json @@ -0,0 +1,154 @@ +{ + "task_id": "sample_000008", + "sample_id": "sample_000008", + "sample_index": 8, + "target_total": 10, + "target_people": 8, + "target_objects": 2, + "people_candidates": [ + { + "candidate_index": 0, + "source_offset": 614, + "image_id": "Ego4D:ego4d_video/EGO_259527.npy:person:0", + "name": "person", + "description": "The person is wearing a patterned garment with yellow and blue designs, and their hands are visible manipulating objects. Source dataset: Ego4D. Scene context: A close-up view of hands handling items in a dimly lit setting." + }, + { + "candidate_index": 1, + "source_offset": 160, + "image_id": "Ego4D:ego4d_video/EGO_165518.npy:person:0", + "name": "person", + "description": "A figure visible in the center, mostly obscured by darkness, wearing dark clothing. Only a faint outline and some lighter parts of clothing or skin are visible. Source dataset: Ego4D. Scene context: A very dark, low-light indoor scene, possibly a room or stage, with a person partially visible in the center and illuminated rectangular panels or screens visible in the background and foreground." + }, + { + "candidate_index": 2, + "source_offset": 707, + "image_id": "Ego4D:ego4d_video/EGO_282541.npy:person:0", + "name": "person", + "description": "Visible hands reaching forward. Source dataset: Ego4D. Scene context: A very dark image with red and blue lights, showing a person's hands." + }, + { + "candidate_index": 3, + "source_offset": 1131, + "image_id": "Ego4D:ego4d_video/EGO_76427.npy:person:0", + "name": "person", + "description": "Visible only by their hands, which are illuminated by red light. The hands are positioned as if holding or manipulating something. Source dataset: Ego4D. Scene context: A very dark image mostly showing black space, with dim reddish lighting catching what appears to be a person's hands holding an object." + }, + { + "candidate_index": 4, + "source_offset": 1118, + "image_id": "Ego4D:ego4d_video/EGO_76415.npy:person:0", + "name": "person", + "description": "The person's hands are visible, illuminated in red light. The left hand is open, and the right hand is partially obscured, appearing to hold a small object. Source dataset: Ego4D. Scene context: Two hands are visible in a dark environment, possibly holding or manipulating a small object." + }, + { + "candidate_index": 5, + "source_offset": 311, + "image_id": "Ego4D:ego4d_video/EGO_202366.npy:person:0", + "name": "person", + "description": "Visible only by their hands, illuminated by a red light, holding and manipulating a smoking device. Source dataset: Ego4D. Scene context: A close-up view of hands holding a smoking device, illuminated by a red light in a very dark environment." + }, + { + "candidate_index": 6, + "source_offset": 761, + "image_id": "Ego4D:ego4d_video/EGO_282799.npy:person:0", + "name": "person", + "description": "A person whose features are mostly obscured by darkness; they are wearing a helmet and what appears to be a clear face shield. Source dataset: Ego4D. Scene context: A highly obscured and poorly lit scene where a person wearing a helmet and a face shield is somewhat visible." + }, + { + "candidate_index": 7, + "source_offset": 642, + "image_id": "Ego4D:ego4d_video/EGO_260704.npy:person:0", + "name": "person", + "description": "a person whose foot is visible, illuminated by blacklight, with some glowing areas Source dataset: Ego4D. Scene context: a dark scene with a person's foot visible under blacklight" + }, + { + "candidate_index": 8, + "source_offset": 694, + "image_id": "Ego4D:ego4d_video/EGO_276585.npy:person:0", + "name": "person", + "description": "The person's hands are visible, with red nail polish on the fingernails, holding onto a blue fabric. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding onto a blue fabric, possibly a bedsheet or clothing, with yellow floral patterns." + }, + { + "candidate_index": 9, + "source_offset": 20, + "image_id": "EPIC-Kitchens:P11_107:person:0", + "name": "person", + "description": "Only the person's right hand and part of their wrist are visible, reaching out in a blurred motion. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a kitchen, showing a person's hand reaching towards a cabinet or drawer." + }, + { + "candidate_index": 10, + "source_offset": 77, + "image_id": "Ego4D:ego4d_video/EGO_127804.npy:person:0", + "name": "person", + "description": "A person is visible, specifically their hand, holding a small object. The hand is in a grasping gesture. Source dataset: Ego4D. Scene context: A close-up view of a person's hand holding what appears to be a small tool or piece of hardware." + }, + { + "candidate_index": 11, + "source_offset": 327, + "image_id": "Ego4D:ego4d_video/EGO_2030.npy:person:0", + "name": "person", + "description": "A person visible mainly as a dark silhouette against the red and purple light, holding a rectangular object that appears to be a phone. Source dataset: Ego4D. Scene context: A dark, low-light scene illuminated by red and purple light, with a person holding a phone." + }, + { + "candidate_index": 12, + "source_offset": 63, + "image_id": "Ego4D:ego4d_video/EGO_100293.npy:person:0", + "name": "person", + "description": "Visible from the lower leg down to the foot, illuminated in a strong blue light against a dark background. Source dataset: Ego4D. Scene context: A close-up view of a person's foot and lower leg, illuminated in a blueish light, with a blurry green and dark background." + }, + { + "candidate_index": 13, + "source_offset": 1085, + "image_id": "Ego4D:ego4d_video/EGO_76362.npy:person:0", + "name": "person", + "description": "Visible by their hands, which are illuminated by a red light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in a very dark environment with some red and blue light reflections." + }, + { + "candidate_index": 14, + "source_offset": 31, + "image_id": "EPIC-Kitchens:P26_112:person:0", + "name": "person", + "description": "A person is standing in the kitchen, partially visible from the feet down, wearing black shoes with white laces. Source dataset: EPIC-Kitchens. Scene context: A top-down view of a kitchen sink filled with dishes and utensils, with a refrigerator on the right." + }, + { + "candidate_index": 15, + "source_offset": 25, + "image_id": "EPIC-Kitchens:P25_102:person:0", + "name": "person", + "description": "A person's bare left foot is visible at the bottom edge of the frame, stepping on the tiled floor near the stove. Source dataset: EPIC-Kitchens. Scene context: A first-person view of a kitchen showing a person's foot near a stove, cabinets, and a floor mat." + } + ], + "object_candidates": [ + { + "candidate_index": 0, + "source_offset": 6814, + "image_id": "Ego4D:ego4d_video/EGO_25200.npy:object:0", + "name": "blue light", + "description": "A small, bright blue rectangular light glowing faintly in the lower right area of the dark scene. Source dataset: Ego4D. Scene context: The image is almost completely dark, showing only a small, faint blue rectangular light source near the bottom right." + }, + { + "candidate_index": 1, + "source_offset": 7075, + "image_id": "Ego4D:ego4d_video/EGO_260318.npy:object:0", + "name": "dark object", + "description": "A dark, possibly black, object situated at the bottom center of the frame, appearing to be a wallet or small bag. Source dataset: Ego4D. Scene context: A close-up view of a dark object, possibly a wallet or small bag, on a light-colored surface against a blue background." + }, + { + "candidate_index": 2, + "source_offset": 6514, + "image_id": "Ego4D:ego4d_video/EGO_237754.npy:object:1", + "name": "fence posts", + "description": "Vertical and diagonal metal bars forming a fence or railing structure located to the right side of the image, appearing dark against the foggy background. Source dataset: Ego4D. Scene context: A close-up view of a metal handrail and part of a fence or railing structure shrouded in fog or smoke." + }, + { + "candidate_index": 3, + "source_offset": 7473, + "image_id": "Ego4D:ego4d_video/EGO_282648.npy:object:0", + "name": "electronic device", + "description": "A faint red rectangular shape with some texture, possibly a phone or remote, located near the left arm. Source dataset: Ego4D. Scene context: A very dark scene, likely indoors, with faint red illumination showing parts of a person's arms and a possible electronic device." + } + ], + "rng_seed": 1782765825, + "created_at": 1782259667.831985 +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/bbox_overlay.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/bbox_overlay.png new file mode 100644 index 0000000000000000000000000000000000000000..5f3684d3adfd015bac09df4fec023aa04f2c2609 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/bbox_overlay.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202846d2dcf77412bfe78f0f3c212b16b844e1bd62192a91c841109d393ba79c +size 1447581 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/compose_prompt.txt b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/compose_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..5cc68b2a0a9f5365d2e92f1bd98f8b8068e14dd3 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/compose_prompt.txt @@ -0,0 +1,75 @@ +Render the following JSON scene specification as a photorealistic 1152x864 image using a true 4:3 canvas. Every listed person and object must appear visibly in the image. Keep normal proportions and the requested aspect ratio. The foreground must contain only subjects explicitly listed in the JSON scene specification. Do not add any unlisted foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects. Background context may include non-localizable scenery only when it does not introduce a distinct foreground subject. No text, no labels, no logos, no watermarks. +JSON scene specification: +{ + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1152, + 864 + ], + "aspect_ratio": "4:3", + "style": "photorealistic" + }, + "scene": { + "setting": "A well-lit, contemporary residential kitchen focused tightly around the stainless steel sink and adjacent countertop.", + "activity": "A person is holding a bottle of cleaning solution over the sink to begin washing or scrubbing the basin.", + "composition": "First-person or close over-the-shoulder mid-shot looking slightly downwards. The person's hands and the blue bottle dominate the foreground-center. The curved silver faucet rises behind the hands in the midground. The red mat occupies the bottom portion inside the sink depth. The yellow dish gloves are visibly resting on the left countertop edge, providing lateral balance.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 4:3 composition", + "final canvas size 1152x864", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_washing_sink", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_120803.npy:person:0", + "source_name": "person", + "description": "A person wearing dark, long-sleeved clothing, with only their hands and forearms visible as they reach into the frame.", + "role_in_scene": "Actively gripping the blue cleaner bottle over the kitchen sink." + } + ], + "objects": [ + { + "name": "blue_cleaner_bottle", + "source_index": 0, + "source_image_id": "EPIC-Kitchens:P30_102:object:6", + "source_name": "cleaner bottle", + "description": "A bright blue, unlabelled plastic spray bottle with a nozzle top, typical of liquid cleaning solutions.", + "role_in_scene": "Held in the person's hands, positioned just above the sink basin." + }, + { + "name": "red_sink_mat", + "source_index": 1, + "source_image_id": "EPIC-Kitchens:P27_104:object:6", + "source_name": "red mat", + "description": "A vibrant red, textured rubber mat featuring a grid or perforated pattern.", + "role_in_scene": "Placed flat against the bottom of the stainless steel sink basin, visible beneath the hands." + }, + { + "name": "silver_faucet", + "source_index": 2, + "source_image_id": "EPIC-Kitchens:P37_103:object:3", + "source_name": "faucet", + "description": "A polished, curved silver metal kitchen faucet with a standard spout.", + "role_in_scene": "Rising from the back of the sink counter, partially occluded by the person's hands and bottle." + }, + { + "name": "yellow_dish_gloves", + "source_index": 6, + "source_image_id": "EPIC-Kitchens:P02_128:object:7", + "source_name": "yellow dish gloves", + "description": "A pair of thick, bright yellow rubber dishwashing gloves, slightly crumpled and glossy.", + "role_in_scene": "Laying flat on the countertop directly next to the sink rim, ready to be worn." + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_blue_cleaner_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_blue_cleaner_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..15738bc1b5a8c64fad4f8415de8c09340877e4d1 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_blue_cleaner_bottle.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69baf8d624bcdbedecc3b921358670f6e8ce8499eb83b970bde02272e97e164c +size 270554 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_person_washing_sink.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_person_washing_sink.png new file mode 100644 index 0000000000000000000000000000000000000000..428af14002ab77feec9083ed56d46d3045f10397 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_person_washing_sink.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef740cb4c6f271e24e3bb7fd08f5ed470af3fff2521aeaa7d20d79078d8a551b +size 1118220 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_red_sink_mat.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_red_sink_mat.png new file mode 100644 index 0000000000000000000000000000000000000000..f3f1169a384bf756a7fea59413f22df889ae8e2f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_red_sink_mat.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4e5acab454d7b29904fa485f585e3e629c7d33b80820f2948dbb38ded3f2f8 +size 525146 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_silver_faucet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_silver_faucet.png new file mode 100644 index 0000000000000000000000000000000000000000..aaaca2dff72cbdba99a1dcaf343e5d748bb6eae4 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_silver_faucet.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f2417c8a530430d4a43745b13fab0aec1d90eb97a1b59ef4ad2ed9488102489 +size 150607 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_yellow_dish_gloves.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_yellow_dish_gloves.png new file mode 100644 index 0000000000000000000000000000000000000000..d40e98c66f0f054a4cd51ca354d5b5ee26308374 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/detect_refine_yellow_dish_gloves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:060acdacd38bb7ffbc5bcfed982bc2539ea8c8f58f3b699fbc734a17220dd6f4 +size 207180 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_blue_cleaner_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_blue_cleaner_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..f148beb09fa034053566eeb3a91633cd89c3237e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_blue_cleaner_bottle.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957cf6f5e2a6e283e8da9d72582440ed01dce996780c4d75685d55092f977333 +size 348540 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_person_washing_sink.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_person_washing_sink.png new file mode 100644 index 0000000000000000000000000000000000000000..19a4e09e2ebf7ea8c2c53464ead584ad7f7fc974 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_person_washing_sink.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c77e9540f268c8c86262ebd4d0a9910a39013d92b1ea9c7c69d6189d1c99acd8 +size 1345726 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_red_sink_mat.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_red_sink_mat.png new file mode 100644 index 0000000000000000000000000000000000000000..b09946cff6bc80b75f35dc1ec54fefc100122050 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_red_sink_mat.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a3028e3ec2aaf385b86ace34ac067b1f1e8dd9e465dc42ccfbacd419fcb1cd +size 648617 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_silver_faucet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_silver_faucet.png new file mode 100644 index 0000000000000000000000000000000000000000..a6a3908a259e2211bb85aa6d2c6d696217873b86 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_silver_faucet.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e077ea41849b622d1d8dc1dbcb8d97bb79ea9f22993c6fecd1cd584bc3e7026d +size 183029 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_yellow_dish_gloves.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_yellow_dish_gloves.png new file mode 100644 index 0000000000000000000000000000000000000000..56d0fc28625fb902f9e3bde13fc1b5076ee45abb --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/crops/diversify_input_yellow_dish_gloves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b67535dc62634a1918f09f23851c88dca7df35c7cb7f3259b1bfd478607891a +size 255566 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/detections.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/detections.json new file mode 100644 index 0000000000000000000000000000000000000000..05b897d8ccbc090e676ede27fcec6f2f7b644453 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/detections.json @@ -0,0 +1,97 @@ +[ + { + "name": "person_washing_sink", + "present": true, + "bbox": [ + 0.0921, + 0.3527, + 1.0, + 1.0 + ], + "confidence": 950, + "notes": "Bounding box encompasses both visible arms and hands of the person wearing dark long sleeves, from where they enter the bottom/right edges up to the hands gripping the bottle.", + "coarse_bbox": [ + 0.098, + 0.349, + 1.0, + 1.0 + ], + "refine_crop": "crops/detect_refine_person_washing_sink.png" + }, + { + "name": "blue_cleaner_bottle", + "present": true, + "bbox": [ + 0.4107, + 0.2797, + 0.632, + 0.8174 + ], + "confidence": 0.99, + "notes": "Tight bounding box around the visible portions of the bright blue plastic spray bottle, including the nozzle and body.", + "coarse_bbox": [ + 0.41, + 0.278, + 0.632, + 0.814 + ], + "refine_crop": "crops/detect_refine_blue_cleaner_bottle.png" + }, + { + "name": "red_sink_mat", + "present": true, + "bbox": [ + 0.3049, + 0.5104, + 0.8328, + 0.9302 + ], + "confidence": 0.98, + "notes": "Tight bounding box around the visible portion of the red textured mat in the sink.", + "coarse_bbox": [ + 0.306, + 0.508, + 0.835, + 0.93 + ], + "refine_crop": "crops/detect_refine_red_sink_mat.png" + }, + { + "name": "silver_faucet", + "present": true, + "bbox": [ + 0.5344, + 0.0136, + 0.718, + 0.3772 + ], + "confidence": 0.99, + "notes": "The bounding box surrounds the visible polished silver metal kitchen faucet, including its curved spout and the base with the handle.", + "coarse_bbox": [ + 0.538, + 0.012, + 0.717, + 0.375 + ], + "refine_crop": "crops/detect_refine_silver_faucet.png" + }, + { + "name": "yellow_dish_gloves", + "present": true, + "bbox": [ + 0.0, + 0.3471, + 0.2191, + 0.8264 + ], + "confidence": 1.0, + "notes": "A pair of thick, bright yellow rubber dishwashing gloves laying next to the sink.", + "coarse_bbox": [ + 0.0, + 0.346, + 0.217, + 0.822 + ], + "refine_crop": "crops/detect_refine_yellow_dish_gloves.png" + } +] diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/main_image.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/main_image.png new file mode 100644 index 0000000000000000000000000000000000000000..7156b0337d91055ed99f11afcc24ad941f454399 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/main_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9d96200dfc7ed6fc96a351d1d659d1509332b353dacb96d405325329830ea48 +size 1518536 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/plan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/plan.json new file mode 100644 index 0000000000000000000000000000000000000000..e2e26b4a671ecb294bdb9f7e1d9295184d05bf57 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/plan.json @@ -0,0 +1,141 @@ +{ + "sample_id": "sample_000009", + "target_total": 5, + "target_people": 1, + "target_objects": 4, + "compose_prompt": { + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1152, + 864 + ], + "aspect_ratio": "4:3", + "style": "photorealistic" + }, + "scene": { + "setting": "A well-lit, contemporary residential kitchen focused tightly around the stainless steel sink and adjacent countertop.", + "activity": "A person is holding a bottle of cleaning solution over the sink to begin washing or scrubbing the basin.", + "composition": "First-person or close over-the-shoulder mid-shot looking slightly downwards. The person's hands and the blue bottle dominate the foreground-center. The curved silver faucet rises behind the hands in the midground. The red mat occupies the bottom portion inside the sink depth. The yellow dish gloves are visibly resting on the left countertop edge, providing lateral balance.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 4:3 composition", + "final canvas size 1152x864", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_washing_sink", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_120803.npy:person:0", + "source_name": "person", + "description": "A person wearing dark, long-sleeved clothing, with only their hands and forearms visible as they reach into the frame.", + "role_in_scene": "Actively gripping the blue cleaner bottle over the kitchen sink." + } + ], + "objects": [ + { + "name": "blue_cleaner_bottle", + "source_index": 0, + "source_image_id": "EPIC-Kitchens:P30_102:object:6", + "source_name": "cleaner bottle", + "description": "A bright blue, unlabelled plastic spray bottle with a nozzle top, typical of liquid cleaning solutions.", + "role_in_scene": "Held in the person's hands, positioned just above the sink basin." + }, + { + "name": "red_sink_mat", + "source_index": 1, + "source_image_id": "EPIC-Kitchens:P27_104:object:6", + "source_name": "red mat", + "description": "A vibrant red, textured rubber mat featuring a grid or perforated pattern.", + "role_in_scene": "Placed flat against the bottom of the stainless steel sink basin, visible beneath the hands." + }, + { + "name": "silver_faucet", + "source_index": 2, + "source_image_id": "EPIC-Kitchens:P37_103:object:3", + "source_name": "faucet", + "description": "A polished, curved silver metal kitchen faucet with a standard spout.", + "role_in_scene": "Rising from the back of the sink counter, partially occluded by the person's hands and bottle." + }, + { + "name": "yellow_dish_gloves", + "source_index": 6, + "source_image_id": "EPIC-Kitchens:P02_128:object:7", + "source_name": "yellow dish gloves", + "description": "A pair of thick, bright yellow rubber dishwashing gloves, slightly crumpled and glossy.", + "role_in_scene": "Laying flat on the countertop directly next to the sink rim, ready to be worn." + } + ] + }, + "expected_subjects": [ + { + "name": "person_washing_sink", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_120803.npy:person:0", + "source_name": "person", + "source_description": "A person holding an object, only their hands and parts of their dark clothing are visible. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in low light conditions.", + "sub_caption": "person: A person wearing dark, long-sleeved clothing, with only their hands and forearms visible as they reach into the frame.. Scene role: Actively gripping the blue cleaner bottle over the kitchen sink.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "blue_cleaner_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P30_102:object:6", + "source_name": "cleaner bottle", + "source_description": "Blue plastic bottle with a label, possibly a cleaning product, located behind the sink. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink, dish rack, toaster, cutting board, and various utensils and containers.", + "sub_caption": "cleaner bottle: A bright blue, unlabelled plastic spray bottle with a nozzle top, typical of liquid cleaning solutions.. Scene role: Held in the person's hands, positioned just above the sink basin.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "red_sink_mat", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P27_104:object:6", + "source_name": "red mat", + "source_description": "A textured red mat lining the bottom of the right sink basin. Source dataset: EPIC-Kitchens. Scene context: A kitchen sink area with dirty dishes in both basins, a hand holding a smartphone recording the scene, and various items on the counter.", + "sub_caption": "red mat: A vibrant red, textured rubber mat featuring a grid or perforated pattern.. Scene role: Placed flat against the bottom of the stainless steel sink basin, visible beneath the hands.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "silver_faucet", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P37_103:object:3", + "source_name": "faucet", + "source_description": "Silver metal kitchen faucet attached to the sink. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter and sink with bowls, raw chicken, and cleaning supplies.", + "sub_caption": "faucet: A polished, curved silver metal kitchen faucet with a standard spout.. Scene role: Rising from the back of the sink counter, partially occluded by the person's hands and bottle.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "yellow_dish_gloves", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P02_128:object:7", + "source_name": "yellow dish gloves", + "source_description": "A pair of yellow rubber gloves lying flat on the countertop near the sink. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen space featuring a washing machine, sink area, and various cleaning and kitchen supplies on countertops and the floor.", + "sub_caption": "yellow dish gloves: A pair of thick, bright yellow rubber dishwashing gloves, slightly crumpled and glossy.. Scene role: Laying flat on the countertop directly next to the sink rim, ready to be worn.", + "ref_style": "white_bg_encyclopedia_photo" + } + ], + "vocab_task_path": "sample_000009/vocab_task.json", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references.json new file mode 100644 index 0000000000000000000000000000000000000000..c7e5a38091906d0a124e08fabfe77dbd3963e926 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references.json @@ -0,0 +1,165 @@ +{ + "references": [ + { + "name": "person_washing_sink", + "ref_image": "references/ref_person_washing_sink.png", + "raw_ref_image": "references/raw_ref_person_washing_sink_attempt_01.png", + "diversify_input": "crops/diversify_input_person_washing_sink.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_person_washing_sink_attempt_01.png", + "output": "references/ref_person_washing_sink.png", + "mask": "references/sam_mask_person_washing_sink.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 350.0, + 14.0, + 674.0, + 1023.0 + ], + "mask_score": 3.431432, + "mask_area_ratio": 0.156425, + "elapsed_seconds": 10.3393 + }, + "reference_verify": "references/reference_verify_person_washing_sink.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "blue_cleaner_bottle", + "ref_image": "references/ref_blue_cleaner_bottle.png", + "raw_ref_image": "references/raw_ref_blue_cleaner_bottle_attempt_01.png", + "diversify_input": "crops/diversify_input_blue_cleaner_bottle.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_blue_cleaner_bottle_attempt_01.png", + "output": "references/ref_blue_cleaner_bottle.png", + "mask": "references/sam_mask_blue_cleaner_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 327.0, + 55.0, + 696.0, + 968.0 + ], + "mask_score": 3.452606, + "mask_area_ratio": 0.185988, + "elapsed_seconds": 10.6007 + }, + "reference_verify": "references/reference_verify_blue_cleaner_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "red_sink_mat", + "ref_image": "references/ref_red_sink_mat.png", + "raw_ref_image": "references/raw_ref_red_sink_mat_attempt_01.png", + "diversify_input": "crops/diversify_input_red_sink_mat.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_red_sink_mat_attempt_01.png", + "output": "references/ref_red_sink_mat.png", + "mask": "references/sam_mask_red_sink_mat.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 21.0, + 148.0, + 1001.0, + 881.0 + ], + "mask_score": 3.479561, + "mask_area_ratio": 0.582802, + "elapsed_seconds": 10.3866 + }, + "reference_verify": "references/reference_verify_red_sink_mat.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "silver_faucet", + "ref_image": "references/ref_silver_faucet.png", + "raw_ref_image": "references/raw_ref_silver_faucet_attempt_01.png", + "diversify_input": "crops/diversify_input_silver_faucet.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_silver_faucet_attempt_01.png", + "output": "references/ref_silver_faucet.png", + "mask": "references/sam_mask_silver_faucet.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 115.0, + 62.0, + 895.0, + 985.0 + ], + "mask_score": 3.425959, + "mask_area_ratio": 0.163316, + "elapsed_seconds": 10.2359 + }, + "reference_verify": "references/reference_verify_silver_faucet.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "yellow_dish_gloves", + "ref_image": "references/ref_yellow_dish_gloves.png", + "raw_ref_image": "references/raw_ref_yellow_dish_gloves_attempt_01.png", + "diversify_input": "crops/diversify_input_yellow_dish_gloves.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_yellow_dish_gloves_attempt_01.png", + "output": "references/ref_yellow_dish_gloves.png", + "mask": "references/sam_mask_yellow_dish_gloves.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 138.0, + 66.0, + 850.0, + 952.0 + ], + "mask_score": 3.462321, + "mask_area_ratio": 0.269736, + "elapsed_seconds": 10.2407 + }, + "reference_verify": "references/reference_verify_yellow_dish_gloves.json", + "reference_verify_passed": true, + "reference_attempts": 1 + } + ], + "reference_errors": {} +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_blue_cleaner_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_blue_cleaner_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..9fc1110902851ba381102bbaa5c977f2ead1135c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_blue_cleaner_bottle.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a96bad12e6bb5f4e306d53a2969125252b8f4d76e7f9ece4810270e9932a8ed +size 293473 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_person_washing_sink.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_person_washing_sink.png new file mode 100644 index 0000000000000000000000000000000000000000..4e1899ff180310de128bc409fd2991a99cd1391f --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_person_washing_sink.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59712a72a5e3f4438535acaabccc32e13302f116603b967fc2a9059dfc2f2a81 +size 269339 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_red_sink_mat.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_red_sink_mat.png new file mode 100644 index 0000000000000000000000000000000000000000..580d2d55f133cc67c3295c1807c73fe19e07ac05 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_red_sink_mat.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a75ea006bb21e6b1c1919aa0aaf2708a7c61e30011e7c65096f1b6e7f26bf9fd +size 898492 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_silver_faucet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_silver_faucet.png new file mode 100644 index 0000000000000000000000000000000000000000..2c2b0857458ef0201302120761476e6468bfadfc --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_silver_faucet.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a5edb1d6aab55ad30efdbab117761b58ec31c27083d623e59b82ec8d1641e54 +size 324383 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_yellow_dish_gloves.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_yellow_dish_gloves.png new file mode 100644 index 0000000000000000000000000000000000000000..d08e7dbdf025b35645107e0b5af431cca4e70498 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/ref_yellow_dish_gloves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:866d41532bd083b7baeeeca87c21535be54b0a3b0a2fe3939ab5b1b96d5f5d6e +size 452856 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_blue_cleaner_bottle.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_blue_cleaner_bottle.json new file mode 100644 index 0000000000000000000000000000000000000000..44095cc074de8a6f6537dc0fb7c4bd1fbb62a931 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_blue_cleaner_bottle.json @@ -0,0 +1,46 @@ +{ + "name": "blue_cleaner_bottle", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_blue_cleaner_bottle_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_blue_cleaner_bottle_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_blue_cleaner_bottle_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_blue_cleaner_bottle_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/candidate_ref_blue_cleaner_bottle_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/candidate_sam_mask_blue_cleaner_bottle_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 327.0, + 55.0, + 696.0, + 968.0 + ], + "mask_score": 3.452606, + "mask_area_ratio": 0.185988, + "elapsed_seconds": 10.6007 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The blue cleaner bottle is fully visible, complete, uncropped, isolated on a white background, and clearly matches the provided subject details." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_person_washing_sink.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_person_washing_sink.json new file mode 100644 index 0000000000000000000000000000000000000000..7b1169a42261c643ffdb2bb65f49ba501c5eb00d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_person_washing_sink.json @@ -0,0 +1,46 @@ +{ + "name": "person_washing_sink", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_washing_sink_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_washing_sink_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_washing_sink_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_person_washing_sink_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/candidate_ref_person_washing_sink_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/candidate_sam_mask_person_washing_sink_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 350.0, + 14.0, + 674.0, + 1023.0 + ], + "mask_score": 3.431432, + "mask_area_ratio": 0.156425, + "elapsed_seconds": 10.3393 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body visible, white background, satisfies all requirements." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_red_sink_mat.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_red_sink_mat.json new file mode 100644 index 0000000000000000000000000000000000000000..c6a5c4c7a831fc83e2121cfdbab03a1628702161 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_red_sink_mat.json @@ -0,0 +1,46 @@ +{ + "name": "red_sink_mat", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_red_sink_mat_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_red_sink_mat_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_red_sink_mat_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_red_sink_mat_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/candidate_ref_red_sink_mat_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/candidate_sam_mask_red_sink_mat_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 21.0, + 148.0, + 1001.0, + 881.0 + ], + "mask_score": 3.479561, + "mask_area_ratio": 0.582802, + "elapsed_seconds": 10.3866 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The reference image clearly shows the entire red, textured rubber mat with a grid pattern on a white background, serving perfectly as an isolated object reference." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_silver_faucet.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_silver_faucet.json new file mode 100644 index 0000000000000000000000000000000000000000..30d85841e73a30c59fdd14f8cdafddbdd4fa613c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_silver_faucet.json @@ -0,0 +1,46 @@ +{ + "name": "silver_faucet", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_silver_faucet_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_silver_faucet_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_silver_faucet_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_silver_faucet_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/candidate_ref_silver_faucet_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/candidate_sam_mask_silver_faucet_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 115.0, + 62.0, + 895.0, + 985.0 + ], + "mask_score": 3.425959, + "mask_area_ratio": 0.163316, + "elapsed_seconds": 10.2359 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a complete, isolated silver metal kitchen faucet on a white background, perfectly matching the subject description and requirements." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_yellow_dish_gloves.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_yellow_dish_gloves.json new file mode 100644 index 0000000000000000000000000000000000000000..eeeb3ded7ebb473d90fdbe57e5be6ea725fc1444 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/reference_verify_yellow_dish_gloves.json @@ -0,0 +1,46 @@ +{ + "name": "yellow_dish_gloves", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_yellow_dish_gloves_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_yellow_dish_gloves_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_yellow_dish_gloves_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_yellow_dish_gloves_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/candidate_ref_yellow_dish_gloves_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/candidate_sam_mask_yellow_dish_gloves_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 138.0, + 66.0, + 850.0, + 952.0 + ], + "mask_score": 3.462321, + "mask_area_ratio": 0.269736, + "elapsed_seconds": 10.2407 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The yellow dish gloves are completely visible, not cropped, and presented on a pure white background as an isolated reference image." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_blue_cleaner_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_blue_cleaner_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..018cd0e3dcb41a1f607de78010a1ceb9681105e0 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_blue_cleaner_bottle.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_person_washing_sink.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_person_washing_sink.png new file mode 100644 index 0000000000000000000000000000000000000000..dc4b43e52afa7e4a7b2f39e02b4b8a95ab259b01 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_person_washing_sink.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_red_sink_mat.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_red_sink_mat.png new file mode 100644 index 0000000000000000000000000000000000000000..e30b425630ca12b46c7002ccdc559a4d4a296142 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_red_sink_mat.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_silver_faucet.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_silver_faucet.png new file mode 100644 index 0000000000000000000000000000000000000000..c8781f2b02d1b65e219bdb63574910332b167456 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_silver_faucet.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_yellow_dish_gloves.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_yellow_dish_gloves.png new file mode 100644 index 0000000000000000000000000000000000000000..5857b5d1a7b47a20d327ab25c7e4a656c16af99c Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/references/sam_mask_yellow_dish_gloves.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/row.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/row.json new file mode 100644 index 0000000000000000000000000000000000000000..0012353ba55c36174ae02fe659bcb4fb2d00c8ae --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/row.json @@ -0,0 +1,256 @@ +{ + "sample_id": "sample_000009", + "target_total": 5, + "target_people": 1, + "target_objects": 4, + "canvas_size": [ + 1152, + 864 + ], + "canvas_aspect_ratio": "4:3", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 5, + "n_detected": 5, + "n_subjects": 5, + "subjects": [ + { + "name": "person_washing_sink", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_120803.npy:person:0", + "source_name": "person", + "source_description": "A person holding an object, only their hands and parts of their dark clothing are visible. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in low light conditions.", + "sub_caption": "person: A person wearing dark, long-sleeved clothing, with only their hands and forearms visible as they reach into the frame.. Scene role: Actively gripping the blue cleaner bottle over the kitchen sink.", + "measured_bbox": [ + 0.0921, + 0.3527, + 1.0, + 1.0 + ], + "detection_confidence": 950, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_washing_sink.png", + "raw_ref_image": "references/raw_ref_person_washing_sink_attempt_01.png", + "reference_verify": "references/reference_verify_person_washing_sink.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_person_washing_sink_attempt_01.png", + "output": "references/ref_person_washing_sink.png", + "mask": "references/sam_mask_person_washing_sink.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 350.0, + 14.0, + 674.0, + 1023.0 + ], + "mask_score": 3.431432, + "mask_area_ratio": 0.156425, + "elapsed_seconds": 10.3393 + } + }, + { + "name": "blue_cleaner_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P30_102:object:6", + "source_name": "cleaner bottle", + "source_description": "Blue plastic bottle with a label, possibly a cleaning product, located behind the sink. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink, dish rack, toaster, cutting board, and various utensils and containers.", + "sub_caption": "cleaner bottle: A bright blue, unlabelled plastic spray bottle with a nozzle top, typical of liquid cleaning solutions.. Scene role: Held in the person's hands, positioned just above the sink basin.", + "measured_bbox": [ + 0.4107, + 0.2797, + 0.632, + 0.8174 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_blue_cleaner_bottle.png", + "raw_ref_image": "references/raw_ref_blue_cleaner_bottle_attempt_01.png", + "reference_verify": "references/reference_verify_blue_cleaner_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_blue_cleaner_bottle_attempt_01.png", + "output": "references/ref_blue_cleaner_bottle.png", + "mask": "references/sam_mask_blue_cleaner_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 327.0, + 55.0, + 696.0, + 968.0 + ], + "mask_score": 3.452606, + "mask_area_ratio": 0.185988, + "elapsed_seconds": 10.6007 + } + }, + { + "name": "red_sink_mat", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P27_104:object:6", + "source_name": "red mat", + "source_description": "A textured red mat lining the bottom of the right sink basin. Source dataset: EPIC-Kitchens. Scene context: A kitchen sink area with dirty dishes in both basins, a hand holding a smartphone recording the scene, and various items on the counter.", + "sub_caption": "red mat: A vibrant red, textured rubber mat featuring a grid or perforated pattern.. Scene role: Placed flat against the bottom of the stainless steel sink basin, visible beneath the hands.", + "measured_bbox": [ + 0.3049, + 0.5104, + 0.8328, + 0.9302 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_red_sink_mat.png", + "raw_ref_image": "references/raw_ref_red_sink_mat_attempt_01.png", + "reference_verify": "references/reference_verify_red_sink_mat.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_red_sink_mat_attempt_01.png", + "output": "references/ref_red_sink_mat.png", + "mask": "references/sam_mask_red_sink_mat.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 21.0, + 148.0, + 1001.0, + 881.0 + ], + "mask_score": 3.479561, + "mask_area_ratio": 0.582802, + "elapsed_seconds": 10.3866 + } + }, + { + "name": "silver_faucet", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P37_103:object:3", + "source_name": "faucet", + "source_description": "Silver metal kitchen faucet attached to the sink. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter and sink with bowls, raw chicken, and cleaning supplies.", + "sub_caption": "faucet: A polished, curved silver metal kitchen faucet with a standard spout.. Scene role: Rising from the back of the sink counter, partially occluded by the person's hands and bottle.", + "measured_bbox": [ + 0.5344, + 0.0136, + 0.718, + 0.3772 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_silver_faucet.png", + "raw_ref_image": "references/raw_ref_silver_faucet_attempt_01.png", + "reference_verify": "references/reference_verify_silver_faucet.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_silver_faucet_attempt_01.png", + "output": "references/ref_silver_faucet.png", + "mask": "references/sam_mask_silver_faucet.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 115.0, + 62.0, + 895.0, + 985.0 + ], + "mask_score": 3.425959, + "mask_area_ratio": 0.163316, + "elapsed_seconds": 10.2359 + } + }, + { + "name": "yellow_dish_gloves", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P02_128:object:7", + "source_name": "yellow dish gloves", + "source_description": "A pair of yellow rubber gloves lying flat on the countertop near the sink. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen space featuring a washing machine, sink area, and various cleaning and kitchen supplies on countertops and the floor.", + "sub_caption": "yellow dish gloves: A pair of thick, bright yellow rubber dishwashing gloves, slightly crumpled and glossy.. Scene role: Laying flat on the countertop directly next to the sink rim, ready to be worn.", + "measured_bbox": [ + 0.0, + 0.3471, + 0.2191, + 0.8264 + ], + "detection_confidence": 1.0, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_yellow_dish_gloves.png", + "raw_ref_image": "references/raw_ref_yellow_dish_gloves_attempt_01.png", + "reference_verify": "references/reference_verify_yellow_dish_gloves.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000009/references/raw_ref_yellow_dish_gloves_attempt_01.png", + "output": "references/ref_yellow_dish_gloves.png", + "mask": "references/sam_mask_yellow_dish_gloves.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 138.0, + 66.0, + 850.0, + 952.0 + ], + "mask_score": 3.462321, + "mask_area_ratio": 0.269736, + "elapsed_seconds": 10.2407 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/vocab_task.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/vocab_task.json new file mode 100644 index 0000000000000000000000000000000000000000..a6d9406ce3464d8ed00233cb2eaacb2b9443a950 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000009/vocab_task.json @@ -0,0 +1,84 @@ +{ + "task_id": "sample_000009", + "sample_id": "sample_000009", + "sample_index": 9, + "target_total": 5, + "target_people": 1, + "target_objects": 4, + "people_candidates": [ + { + "candidate_index": 0, + "source_offset": 69, + "image_id": "Ego4D:ego4d_video/EGO_120803.npy:person:0", + "name": "person", + "description": "A person holding an object, only their hands and parts of their dark clothing are visible. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding an object in low light conditions." + }, + { + "candidate_index": 1, + "source_offset": 190, + "image_id": "Ego4D:ego4d_video/EGO_172692.npy:person:0", + "name": "person", + "description": "A person visible mainly by their silhouette in black clothing, with a hand and arm illuminated. Source dataset: Ego4D. Scene context: A close-up view of a person wearing black clothing with their hand interacting with a greenish-blue object, possibly fabric or plastic." + } + ], + "object_candidates": [ + { + "candidate_index": 0, + "source_offset": 3097, + "image_id": "EPIC-Kitchens:P30_102:object:6", + "name": "cleaner bottle", + "description": "Blue plastic bottle with a label, possibly a cleaning product, located behind the sink. Source dataset: EPIC-Kitchens. Scene context: A kitchen counter area with a sink, dish rack, toaster, cutting board, and various utensils and containers." + }, + { + "candidate_index": 1, + "source_offset": 2782, + "image_id": "EPIC-Kitchens:P27_104:object:6", + "name": "red mat", + "description": "A textured red mat lining the bottom of the right sink basin. Source dataset: EPIC-Kitchens. Scene context: A kitchen sink area with dirty dishes in both basins, a hand holding a smartphone recording the scene, and various items on the counter." + }, + { + "candidate_index": 2, + "source_offset": 3835, + "image_id": "EPIC-Kitchens:P37_103:object:3", + "name": "faucet", + "description": "Silver metal kitchen faucet attached to the sink. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter and sink with bowls, raw chicken, and cleaning supplies." + }, + { + "candidate_index": 3, + "source_offset": 1181, + "image_id": "EPIC-Kitchens:P04_116:object:7", + "name": "coffee grinder", + "description": "A tall, narrow stainless steel and black coffee grinder next to the toaster. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen counter with various appliances and cooking utensils, including a bowl of eggs and a pot on a black stovetop." + }, + { + "candidate_index": 4, + "source_offset": 9534, + "image_id": "Ego4D:ego4d_video/EGO_97419.npy:object:0", + "name": "blue light", + "description": "A small, slightly blurry blue light source visible against the dark background. Source dataset: Ego4D. Scene context: A very dark scene with a single small, blue light source visible." + }, + { + "candidate_index": 5, + "source_offset": 6882, + "image_id": "Ego4D:ego4d_video/EGO_255889.npy:object:1", + "name": "dark object", + "description": "A dark, possibly rectangular object with some texture or structure, partially visible in the upper portion of the frame. Source dataset: Ego4D. Scene context: A close-up view of a person wearing a red sweatshirt, possibly looking downwards or handling an object." + }, + { + "candidate_index": 6, + "source_offset": 432, + "image_id": "EPIC-Kitchens:P02_128:object:7", + "name": "yellow dish gloves", + "description": "A pair of yellow rubber gloves lying flat on the countertop near the sink. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen space featuring a washing machine, sink area, and various cleaning and kitchen supplies on countertops and the floor." + }, + { + "candidate_index": 7, + "source_offset": 5252, + "image_id": "Ego4D:ego4d_video/EGO_172694.npy:object:1", + "name": "bright edge", + "description": "A thin, bright blue and white illuminated line in the top right corner. Source dataset: Ego4D. Scene context: A very dark scene with faint, barely visible outlines of a plate of food and a bright edge in the corner." + } + ], + "rng_seed": 1782870554, + "created_at": 1782259667.8461113 +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/bbox_overlay.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/bbox_overlay.png new file mode 100644 index 0000000000000000000000000000000000000000..483eb7dc6b4fafef4a33352d64403916b297a6c7 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/bbox_overlay.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385b58d26f8fa40bdd66c6d7949e21d3082321df5ca5026c850c34de68984bd7 +size 1208923 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/compose_prompt.txt b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/compose_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..270ea12fef6dcff7c5e5607b052bd3c2080bf85c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/compose_prompt.txt @@ -0,0 +1,107 @@ +Render the following JSON scene specification as a photorealistic 1248x832 image using a true 3:2 canvas. Every listed person and object must appear visibly in the image. Keep normal proportions and the requested aspect ratio. The foreground must contain only subjects explicitly listed in the JSON scene specification. Do not add any unlisted foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects. Background context may include non-localizable scenery only when it does not introduce a distinct foreground subject. No text, no labels, no logos, no watermarks. +JSON scene specification: +{ + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1248, + 832 + ], + "aspect_ratio": "3:2", + "style": "photorealistic" + }, + "scene": { + "setting": "A dark, atmospheric indoor room during a power outage, illuminated dramatically by scattered red and blue flashlights and reading lamps.", + "activity": "A group of people are gathered in the dark; some are quietly reading books under localized colored lights, while others chat, gesture, and hold small objects or flashlights.", + "composition": "Wide mid-level shot, heavily defined by shadows and dramatic, colorful rim lighting. The foreground features hands holding flashlights and gesturing, leading the eye into the midground where people sit and read in pools of red light, with depth added by a standing figure and a person lying on a bed in the background.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:2 composition", + "final canvas size 1248x832", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_reading_red_light", + "source_index": 2, + "source_image_id": "Ego4D:ego4d_video/EGO_236885.npy:person:0", + "source_name": "person", + "description": "A person partially visible in the shadows, holding and reading a book that is strongly illuminated by a red light.", + "role_in_scene": "Sitting on the left side of the room, focused intently on reading their book in the red glow." + }, + { + "name": "person_lying_reading_blue_light", + "source_index": 13, + "source_image_id": "Ego4D:ego4d_video/EGO_97416.npy:person:0", + "source_name": "person", + "description": "A person lying down comfortably, visible in the dark while reading a book illuminated by a small, crisp blue light.", + "role_in_scene": "Lying on a bed in the back right corner, quietly reading separate from the main group." + }, + { + "name": "person_holding_blue_flashlight", + "source_index": 6, + "source_image_id": "Ego4D:ego4d_video/EGO_42340.npy:person:0", + "source_name": "person", + "description": "An arm covered by a dark sleeve, with the hand firmly holding a bright blue flashlight that cuts through the darkness.", + "role_in_scene": "Positioned in the lower foreground, pointing the blue flashlight into the room to provide illumination." + }, + { + "name": "person_standing_background", + "source_index": 4, + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "description": "A person seen from the torso down, standing in the dim room and holding a dark round object near a dimly lit blue wall.", + "role_in_scene": "Standing quietly in the background, observing the room while holding a round object." + }, + { + "name": "person_face_in_red_light", + "source_index": 11, + "source_image_id": "Ego4D:ego4d_video/EGO_76449.npy:person:0", + "source_name": "person", + "description": "A person emerging from the darkness, with deep red light catching the skin on their hands, arms, and part of their face.", + "role_in_scene": "Sitting near the center of the gathering, looking toward the others while bathed in red ambient light." + }, + { + "name": "hands_holding_small_object", + "source_index": 10, + "source_image_id": "Ego4D:ego4d_video/EGO_259526.npy:person:0", + "source_name": "hands", + "description": "A pair of hands illuminated in a reddish light, holding a small object against the dark environment.", + "role_in_scene": "Sitting within the group, holding a small item in the pool of red light." + }, + { + "name": "hands_holding_triangular_object", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_76363.npy:person:0", + "source_name": "person", + "description": "Hands clearly visible under a red light source, carefully holding a small triangular object with patterns.", + "role_in_scene": "Showing the patterned triangular object to the group nearby." + }, + { + "name": "hands_gesturing", + "source_index": 7, + "source_image_id": "Ego4D:ego4d_video/EGO_282746.npy:person:0", + "source_name": "person", + "description": "A person's hands catching the red stage-like lighting, caught mid-gesture.", + "role_in_scene": "Actively gesturing and conversing with the central group." + } + ], + "objects": [ + { + "name": "plastic_water_bottle", + "source_index": 1, + "source_image_id": "EPIC-Kitchens:P26_124:object:6", + "source_name": "water bottle", + "description": "A clear plastic bottle containing water, catching the colorful reflections of the scattered red and blue lights.", + "role_in_scene": "Resting upright on the floor in the center of the group, reflecting the dramatic lighting." + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_hands_gesturing.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_hands_gesturing.png new file mode 100644 index 0000000000000000000000000000000000000000..e81e54a782c9aa3c078d4c070a4be20b97d7252d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_hands_gesturing.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350ceb2fd0923a485de11588fed1883a734a1dd5145f098fa6c022f3ed6d2a4a +size 103114 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_hands_holding_small_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_hands_holding_small_object.png new file mode 100644 index 0000000000000000000000000000000000000000..cc546a5a7e24022e8f2696758420465cfada92c5 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_hands_holding_small_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_hands_holding_triangular_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_hands_holding_triangular_object.png new file mode 100644 index 0000000000000000000000000000000000000000..358bb41619f9f3daa317db2a3daab3463fd17aa5 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_hands_holding_triangular_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_face_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_face_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..995072fcff1e7b902996b649d326b4cb6870c200 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_face_in_red_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8d6df324e74799ee71bc4c654f23ea2a30b9d7d8a94a36d16b8dc4c42ee88c2 +size 208138 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_holding_blue_flashlight.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_holding_blue_flashlight.png new file mode 100644 index 0000000000000000000000000000000000000000..f839a147360f185d32a7997960599307a9daacae --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_holding_blue_flashlight.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab32871a41c7d07ffc530a041353dce9e89bac3fb964ecb2ef303945079f3b09 +size 219436 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_lying_reading_blue_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_lying_reading_blue_light.png new file mode 100644 index 0000000000000000000000000000000000000000..d6361dcaa2e392249d1d23583c83db37bb569a9f Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_lying_reading_blue_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_reading_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_reading_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..32a4b3bda03728ba2b7f8f9434fc3c986b4f1b1a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_reading_red_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0530c06324fddfcbd279f4f617fedac8a167b06d5243f8c7905358a9adaa21f0 +size 346914 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_standing_background.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_standing_background.png new file mode 100644 index 0000000000000000000000000000000000000000..fdd7ffcd0cb4babd30c86a01c6f5a81827113cac --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_person_standing_background.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:990c607ec06d9657d7e5be6041bc38443357bf3078e8c67017110a46e987fa6d +size 125913 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_plastic_water_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_plastic_water_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..4fd26bdd35d01beb6b107f19ed4dfe66f4079754 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/detect_refine_plastic_water_bottle.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_hands_gesturing.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_hands_gesturing.png new file mode 100644 index 0000000000000000000000000000000000000000..7611ad8c11a3826d7f32e91163ebc8cbb42e7e44 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_hands_gesturing.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_hands_holding_small_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_hands_holding_small_object.png new file mode 100644 index 0000000000000000000000000000000000000000..7dd1a2cf0ec1abfad03bfcc29e96456f208f3251 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_hands_holding_small_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_hands_holding_triangular_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_hands_holding_triangular_object.png new file mode 100644 index 0000000000000000000000000000000000000000..530f60180118611ba0758132cc588cb7284f36dc --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_hands_holding_triangular_object.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5cafd78516bcf4798a1bde2cd0fba2ddca9b3a98800dfbcbaa0ab0bcd3de75b +size 176260 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_face_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_face_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..0504095b9416dfc20daf751e1a4ba1d2fb2c507c Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_face_in_red_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_holding_blue_flashlight.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_holding_blue_flashlight.png new file mode 100644 index 0000000000000000000000000000000000000000..f8804661681ad0a13cd14f130dacc7b4f7c07136 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_holding_blue_flashlight.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:943ff5f50dc5b466cf734f16b789cddb9814bb78af14ec5b0cce3f184e90baee +size 380204 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_lying_reading_blue_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_lying_reading_blue_light.png new file mode 100644 index 0000000000000000000000000000000000000000..6d18770c116d6a1f8d98bac6bdb2a3c316937f23 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_lying_reading_blue_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6685b1afe93ecb810acdf7c4b04d5d367e838c697b7f95bcbaf91c9a059ff6 +size 133939 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_reading_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_reading_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..9ff644db4b4f9cf50bc2c099f1bce3e0c35872d2 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_reading_red_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da37ef9dae30b888023fa9aff2774e0c0565ff72f259db1be29332dcdff4242 +size 623267 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_standing_background.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_standing_background.png new file mode 100644 index 0000000000000000000000000000000000000000..483b2502513f8e31734534175266ba48fcd03bcf --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_person_standing_background.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16142971a85eb1aed9eb6a117387e8956ef7af9d52be96ce124083aca820e1f6 +size 210053 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_plastic_water_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_plastic_water_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..a0f239526b7748ac698f92cf76e049df683df531 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/crops/diversify_input_plastic_water_bottle.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/detections.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/detections.json new file mode 100644 index 0000000000000000000000000000000000000000..1fe1d82e4a35430ed6c3c886ab8302f6bfde0a6e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/detections.json @@ -0,0 +1,173 @@ +[ + { + "name": "person_reading_red_light", + "present": true, + "bbox": [ + 0.0127, + 0.1501, + 0.4525, + 0.6235 + ], + "confidence": 0.9, + "notes": "The person reading the book is clearly visible in the center of the image, illuminated by the red light. The box encompasses her head, torso, arms, book, and visible legs.", + "coarse_bbox": [ + 0.014, + 0.139, + 0.429, + 0.605 + ], + "refine_crop": "crops/detect_refine_person_reading_red_light.png" + }, + { + "name": "person_lying_reading_blue_light", + "present": true, + "bbox": [ + 0.6944, + 0.2802, + 0.9406, + 0.4237 + ], + "confidence": 0.95, + "notes": "The person reading a book is clearly visible and fits the description.", + "coarse_bbox": [ + 0.692, + 0.275, + 0.938, + 0.414 + ], + "refine_crop": "crops/detect_refine_person_lying_reading_blue_light.png" + }, + { + "name": "person_holding_blue_flashlight", + "present": true, + "bbox": [ + 0.222, + 0.574, + 0.493, + 0.981 + ], + "confidence": 0.9, + "notes": "refine failed; using coarse bbox", + "coarse_bbox": [ + 0.222, + 0.574, + 0.493, + 0.981 + ], + "refine_crop": "crops/detect_refine_person_holding_blue_flashlight.png" + }, + { + "name": "person_standing_background", + "present": true, + "bbox": [ + 0.4462, + 0.0267, + 0.5613, + 0.5921 + ], + "confidence": 0.95, + "notes": "Bounding box covers the visible person, including the head, torso, arms, and legs standing in the dim room.", + "coarse_bbox": [ + 0.444, + 0.027, + 0.57, + 0.579 + ], + "refine_crop": "crops/detect_refine_person_standing_background.png" + }, + { + "name": "person_face_in_red_light", + "present": true, + "bbox": [ + 0.5873, + 0.345, + 0.6506, + 0.4705 + ], + "confidence": 0.95, + "notes": "Tight bounding box identifying the face of the person illuminated by deep red ambient light.", + "coarse_bbox": [ + 0.5, + 0.313, + 0.734, + 0.765 + ], + "refine_crop": "crops/detect_refine_person_face_in_red_light.png" + }, + { + "name": "hands_holding_small_object", + "present": true, + "bbox": [ + 0.5539, + 0.5673, + 0.6564, + 0.6516 + ], + "confidence": 0.95, + "notes": "A pair of hands illuminated in a reddish light holding a small object.", + "coarse_bbox": [ + 0.553, + 0.566, + 0.657, + 0.652 + ], + "refine_crop": "crops/detect_refine_hands_holding_small_object.png" + }, + { + "name": "hands_holding_triangular_object", + "present": true, + "bbox": [ + 0.6869, + 0.6745, + 0.898, + 0.9021 + ], + "confidence": 0.95, + "notes": "Both hands are clearly visible holding the triangular object, illuminated by the red light. Bbox includes both hands and the object.", + "coarse_bbox": [ + 0.686, + 0.676, + 0.898, + 0.912 + ], + "refine_crop": "crops/detect_refine_hands_holding_triangular_object.png" + }, + { + "name": "hands_gesturing", + "present": true, + "bbox": [ + 0.233, + 0.5315, + 0.3583, + 0.6782 + ], + "confidence": 0.95, + "notes": "The hand illuminated by the red light is clearly gesturing mid-air.", + "coarse_bbox": [ + 0.228, + 0.534, + 0.493, + 0.723 + ], + "refine_crop": "crops/detect_refine_hands_gesturing.png" + }, + { + "name": "plastic_water_bottle", + "present": true, + "bbox": [ + 0.5155, + 0.7075, + 0.5653, + 0.9622 + ], + "confidence": 0.98, + "notes": "A clear plastic bottle containing water.", + "coarse_bbox": [ + 0.514, + 0.707, + 0.565, + 0.955 + ], + "refine_crop": "crops/detect_refine_plastic_water_bottle.png" + } +] diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/main_image.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/main_image.png new file mode 100644 index 0000000000000000000000000000000000000000..3d4c3f009f95363ff99cd24e86050a201e35d5a5 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/main_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b96783ebd28cc963493e7b512dfbc00af10ce73b75aafc0bc228ecbefa6e1be +size 1304955 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/plan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/plan.json new file mode 100644 index 0000000000000000000000000000000000000000..fa55d77fd63172fe4796e7e67bd161d7f2b80070 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/plan.json @@ -0,0 +1,217 @@ +{ + "sample_id": "sample_000010", + "target_total": 9, + "target_people": 8, + "target_objects": 1, + "compose_prompt": { + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1248, + 832 + ], + "aspect_ratio": "3:2", + "style": "photorealistic" + }, + "scene": { + "setting": "A dark, atmospheric indoor room during a power outage, illuminated dramatically by scattered red and blue flashlights and reading lamps.", + "activity": "A group of people are gathered in the dark; some are quietly reading books under localized colored lights, while others chat, gesture, and hold small objects or flashlights.", + "composition": "Wide mid-level shot, heavily defined by shadows and dramatic, colorful rim lighting. The foreground features hands holding flashlights and gesturing, leading the eye into the midground where people sit and read in pools of red light, with depth added by a standing figure and a person lying on a bed in the background.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:2 composition", + "final canvas size 1248x832", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_reading_red_light", + "source_index": 2, + "source_image_id": "Ego4D:ego4d_video/EGO_236885.npy:person:0", + "source_name": "person", + "description": "A person partially visible in the shadows, holding and reading a book that is strongly illuminated by a red light.", + "role_in_scene": "Sitting on the left side of the room, focused intently on reading their book in the red glow." + }, + { + "name": "person_lying_reading_blue_light", + "source_index": 13, + "source_image_id": "Ego4D:ego4d_video/EGO_97416.npy:person:0", + "source_name": "person", + "description": "A person lying down comfortably, visible in the dark while reading a book illuminated by a small, crisp blue light.", + "role_in_scene": "Lying on a bed in the back right corner, quietly reading separate from the main group." + }, + { + "name": "person_holding_blue_flashlight", + "source_index": 6, + "source_image_id": "Ego4D:ego4d_video/EGO_42340.npy:person:0", + "source_name": "person", + "description": "An arm covered by a dark sleeve, with the hand firmly holding a bright blue flashlight that cuts through the darkness.", + "role_in_scene": "Positioned in the lower foreground, pointing the blue flashlight into the room to provide illumination." + }, + { + "name": "person_standing_background", + "source_index": 4, + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "description": "A person seen from the torso down, standing in the dim room and holding a dark round object near a dimly lit blue wall.", + "role_in_scene": "Standing quietly in the background, observing the room while holding a round object." + }, + { + "name": "person_face_in_red_light", + "source_index": 11, + "source_image_id": "Ego4D:ego4d_video/EGO_76449.npy:person:0", + "source_name": "person", + "description": "A person emerging from the darkness, with deep red light catching the skin on their hands, arms, and part of their face.", + "role_in_scene": "Sitting near the center of the gathering, looking toward the others while bathed in red ambient light." + }, + { + "name": "hands_holding_small_object", + "source_index": 10, + "source_image_id": "Ego4D:ego4d_video/EGO_259526.npy:person:0", + "source_name": "hands", + "description": "A pair of hands illuminated in a reddish light, holding a small object against the dark environment.", + "role_in_scene": "Sitting within the group, holding a small item in the pool of red light." + }, + { + "name": "hands_holding_triangular_object", + "source_index": 1, + "source_image_id": "Ego4D:ego4d_video/EGO_76363.npy:person:0", + "source_name": "person", + "description": "Hands clearly visible under a red light source, carefully holding a small triangular object with patterns.", + "role_in_scene": "Showing the patterned triangular object to the group nearby." + }, + { + "name": "hands_gesturing", + "source_index": 7, + "source_image_id": "Ego4D:ego4d_video/EGO_282746.npy:person:0", + "source_name": "person", + "description": "A person's hands catching the red stage-like lighting, caught mid-gesture.", + "role_in_scene": "Actively gesturing and conversing with the central group." + } + ], + "objects": [ + { + "name": "plastic_water_bottle", + "source_index": 1, + "source_image_id": "EPIC-Kitchens:P26_124:object:6", + "source_name": "water bottle", + "description": "A clear plastic bottle containing water, catching the colorful reflections of the scattered red and blue lights.", + "role_in_scene": "Resting upright on the floor in the center of the group, reflecting the dramatic lighting." + } + ] + }, + "expected_subjects": [ + { + "name": "person_reading_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_236885.npy:person:0", + "source_name": "person", + "source_description": "A person is reading a book, partially visible in the red light, with only part of their arm and hand shown holding the book. Source dataset: Ego4D. Scene context: A person is reading a book illuminated by a red light in an otherwise dark room.", + "sub_caption": "person: A person partially visible in the shadows, holding and reading a book that is strongly illuminated by a red light.. Scene role: Sitting on the left side of the room, focused intently on reading their book in the red glow.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "person_lying_reading_blue_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_97416.npy:person:0", + "source_name": "person", + "source_description": "A person lying on a bed. Source dataset: Ego4D. Scene context: An indoor scene featuring a person lying on a bed reading a book with a small blue light.", + "sub_caption": "person: A person lying down comfortably, visible in the dark while reading a book illuminated by a small, crisp blue light.. Scene role: Lying on a bed in the back right corner, quietly reading separate from the main group.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "person_holding_blue_flashlight", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_42340.npy:person:0", + "source_name": "person", + "source_description": "A person is visible in the lower right, holding a blue flashlight. Only their hand and part of their arm, covered by a dark sleeve, are visible. Source dataset: Ego4D. Scene context: A person is holding a lit blue flashlight in a dark environment.", + "sub_caption": "person: An arm covered by a dark sleeve, with the hand firmly holding a bright blue flashlight that cuts through the darkness.. Scene role: Positioned in the lower foreground, pointing the blue flashlight into the room to provide illumination.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "person_standing_background", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "source_description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall.", + "sub_caption": "person: A person seen from the torso down, standing in the dim room and holding a dark round object near a dimly lit blue wall.. Scene role: Standing quietly in the background, observing the room while holding a round object.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "person_face_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76449.npy:person:0", + "source_name": "person", + "source_description": "Partially visible due to very low lighting; red light illuminates skin on what looks like hands or arms, and possibly part of the face or shoulder. Source dataset: Ego4D. Scene context: A very dark scene with sparse red lighting illuminating what appears to be a person's hands and part of their face or shoulder in the center, and a glowing rectangular object in the upper right corner.", + "sub_caption": "person: A person emerging from the darkness, with deep red light catching the skin on their hands, arms, and part of their face.. Scene role: Sitting near the center of the gathering, looking toward the others while bathed in red ambient light.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "hands_holding_small_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_259526.npy:person:0", + "source_name": "hands", + "source_description": "Visible hands, illuminated in reddish light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of hands holding something, with abstract colored shapes or fabrics in the dark background.", + "sub_caption": "hands: A pair of hands illuminated in a reddish light, holding a small object against the dark environment.. Scene role: Sitting within the group, holding a small item in the pool of red light.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "hands_holding_triangular_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76363.npy:person:0", + "source_name": "person", + "source_description": "Only hands are visible, illuminated by a red light source, positioned towards the top right. Source dataset: Ego4D. Scene context: A very dark image showing hands illuminated by red light and a small triangular object with blue and white patterns.", + "sub_caption": "person: Hands clearly visible under a red light source, carefully holding a small triangular object with patterns.. Scene role: Showing the patterned triangular object to the group nearby.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "hands_gesturing", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282746.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, lit by red light, holding something or gesturing. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark room illuminated by stage lights.", + "sub_caption": "person: A person's hands catching the red stage-like lighting, caught mid-gesture.. Scene role: Actively gesturing and conversing with the central group.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "plastic_water_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_124:object:6", + "source_name": "water bottle", + "source_description": "A clear plastic bottle, likely containing water, with a blue and white label, standing on the counter to the right. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a stovetop with a pan cooking food, accompanied by various kitchen items on the adjacent counter spaces.", + "sub_caption": "water bottle: A clear plastic bottle containing water, catching the colorful reflections of the scattered red and blue lights.. Scene role: Resting upright on the floor in the center of the group, reflecting the dramatic lighting.", + "ref_style": "white_bg_encyclopedia_photo" + } + ], + "vocab_task_path": "sample_000010/vocab_task.json", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references.json new file mode 100644 index 0000000000000000000000000000000000000000..f99971ce179e610eeb73da415731de8d67e9f803 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references.json @@ -0,0 +1,293 @@ +{ + "references": [ + { + "name": "person_reading_red_light", + "ref_image": "references/ref_person_reading_red_light.png", + "raw_ref_image": "references/raw_ref_person_reading_red_light_attempt_01.png", + "diversify_input": "crops/diversify_input_person_reading_red_light.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_reading_red_light_attempt_01.png", + "output": "references/ref_person_reading_red_light.png", + "mask": "references/sam_mask_person_reading_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 339.0, + 16.0, + 680.0, + 1023.0 + ], + "mask_score": 3.324489, + "mask_area_ratio": 0.157365, + "elapsed_seconds": 7.1471 + }, + "reference_verify": "references/reference_verify_person_reading_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "person_lying_reading_blue_light", + "ref_image": "references/ref_person_lying_reading_blue_light.png", + "raw_ref_image": "references/raw_ref_person_lying_reading_blue_light_attempt_02.png", + "diversify_input": "crops/diversify_input_person_lying_reading_blue_light.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_lying_reading_blue_light_attempt_02.png", + "output": "references/ref_person_lying_reading_blue_light.png", + "mask": "references/sam_mask_person_lying_reading_blue_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 360.0, + 75.0, + 665.0, + 980.0 + ], + "mask_score": 3.501601, + "mask_area_ratio": 0.137436, + "elapsed_seconds": 7.1743 + }, + "reference_verify": "references/reference_verify_person_lying_reading_blue_light.json", + "reference_verify_passed": true, + "reference_attempts": 2 + }, + { + "name": "person_holding_blue_flashlight", + "ref_image": "references/ref_person_holding_blue_flashlight.png", + "raw_ref_image": "references/raw_ref_person_holding_blue_flashlight_attempt_05.png", + "diversify_input": "crops/diversify_input_person_holding_blue_flashlight.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_holding_blue_flashlight_attempt_05.png", + "output": "references/ref_person_holding_blue_flashlight.png", + "mask": "references/sam_mask_person_holding_blue_flashlight.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 331.0, + 17.0, + 704.0, + 1017.0 + ], + "mask_score": 3.354952, + "mask_area_ratio": 0.150028, + "elapsed_seconds": 7.2854 + }, + "reference_verify": "references/reference_verify_person_holding_blue_flashlight.json", + "reference_verify_passed": true, + "reference_attempts": 5 + }, + { + "name": "person_standing_background", + "ref_image": "references/ref_person_standing_background.png", + "raw_ref_image": "references/raw_ref_person_standing_background_attempt_01.png", + "diversify_input": "crops/diversify_input_person_standing_background.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_standing_background_attempt_01.png", + "output": "references/ref_person_standing_background.png", + "mask": "references/sam_mask_person_standing_background.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 337.0, + 25.0, + 678.0, + 1014.0 + ], + "mask_score": 3.31662, + "mask_area_ratio": 0.134835, + "elapsed_seconds": 7.1397 + }, + "reference_verify": "references/reference_verify_person_standing_background.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "person_face_in_red_light", + "ref_image": "references/ref_person_face_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_face_in_red_light_attempt_01.png", + "diversify_input": "crops/diversify_input_person_face_in_red_light.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_face_in_red_light_attempt_01.png", + "output": "references/ref_person_face_in_red_light.png", + "mask": "references/sam_mask_person_face_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 357.0, + 50.0, + 662.0, + 997.0 + ], + "mask_score": 3.468133, + "mask_area_ratio": 0.138924, + "elapsed_seconds": 7.09 + }, + "reference_verify": "references/reference_verify_person_face_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "hands_holding_small_object", + "ref_image": "references/ref_hands_holding_small_object.png", + "raw_ref_image": "references/raw_ref_hands_holding_small_object_attempt_01.png", + "diversify_input": "crops/diversify_input_hands_holding_small_object.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_small_object_attempt_01.png", + "output": "references/ref_hands_holding_small_object.png", + "mask": "references/sam_mask_hands_holding_small_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 347.0, + 66.0, + 678.0, + 1002.0 + ], + "mask_score": 3.446404, + "mask_area_ratio": 0.148472, + "elapsed_seconds": 7.2113 + }, + "reference_verify": "references/reference_verify_hands_holding_small_object.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "hands_holding_triangular_object", + "ref_image": "references/ref_hands_holding_triangular_object.png", + "raw_ref_image": "references/raw_ref_hands_holding_triangular_object_attempt_05.png", + "diversify_input": "crops/diversify_input_hands_holding_triangular_object.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_triangular_object_attempt_05.png", + "output": "references/ref_hands_holding_triangular_object.png", + "mask": "references/sam_mask_hands_holding_triangular_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 371.0, + 176.0, + 647.0, + 904.0 + ], + "mask_score": 3.472167, + "mask_area_ratio": 0.075788, + "elapsed_seconds": 7.6612 + }, + "reference_verify": "references/reference_verify_hands_holding_triangular_object.json", + "reference_verify_passed": true, + "reference_attempts": 5 + }, + { + "name": "hands_gesturing", + "ref_image": "references/ref_hands_gesturing.png", + "raw_ref_image": "references/raw_ref_hands_gesturing_attempt_01.png", + "diversify_input": "crops/diversify_input_hands_gesturing.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_gesturing_attempt_01.png", + "output": "references/ref_hands_gesturing.png", + "mask": "references/sam_mask_hands_gesturing.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 350.0, + 64.0, + 677.0, + 979.0 + ], + "mask_score": 3.460945, + "mask_area_ratio": 0.132824, + "elapsed_seconds": 7.2309 + }, + "reference_verify": "references/reference_verify_hands_gesturing.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "plastic_water_bottle", + "ref_image": "references/ref_plastic_water_bottle.png", + "raw_ref_image": "references/raw_ref_plastic_water_bottle_attempt_03.png", + "diversify_input": "crops/diversify_input_plastic_water_bottle.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_plastic_water_bottle_attempt_03.png", + "output": "references/ref_plastic_water_bottle.png", + "mask": "references/sam_mask_plastic_water_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 356.0, + 72.0, + 666.0, + 982.0 + ], + "mask_score": 3.471713, + "mask_area_ratio": 0.17085, + "elapsed_seconds": 9.3022 + }, + "reference_verify": "references/reference_verify_plastic_water_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 3 + } + ], + "reference_errors": {} +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_gesturing.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_gesturing.png new file mode 100644 index 0000000000000000000000000000000000000000..67e2335fe7e0dc0021e8f910953faff5f0051539 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_gesturing.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae788461e537b1f9d0b2525a2bb784e7f097009d0261c4749c2a57a7763d6f3f +size 238134 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_holding_small_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_holding_small_object.png new file mode 100644 index 0000000000000000000000000000000000000000..7e31e290a7ed403f0d377b649297001c2fe734b1 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_holding_small_object.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79ea704d0b8fa4865bae85b661a63d6d04031965bc5ece81183b075e70caccb2 +size 267332 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_holding_triangular_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_holding_triangular_object.png new file mode 100644 index 0000000000000000000000000000000000000000..5ad89a42779e3d5264d1d93b6ed75b578b95f6b0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_hands_holding_triangular_object.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35335c05a1cf9285b2bdc33f835a122bb330c8027d61be42c4d1c877f1366ade +size 151422 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_face_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_face_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..bcec8317dbcef9590a9958ee230725249ae96a58 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_face_in_red_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5858d6f42a9d854aad19c7a914bcaab9db6e9745529db13dbfc05b649d52240b +size 266333 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_holding_blue_flashlight.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_holding_blue_flashlight.png new file mode 100644 index 0000000000000000000000000000000000000000..8e473173ef9f2e9ca2e741751e52e87e26e46ce2 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_holding_blue_flashlight.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e273d117a0fb8dcb7e4a3e76e2c9c1c10c876684d7bfe1c41f817597a9331eb8 +size 303798 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_lying_reading_blue_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_lying_reading_blue_light.png new file mode 100644 index 0000000000000000000000000000000000000000..6664e05b82f570e650d74664a97106b63544968a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_lying_reading_blue_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57e2a0d8c0b1ed1c883b1e633121fb6ca8e95e6f5161f979b6279b24827de35 +size 241982 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_reading_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_reading_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..c1a8c747ca3492af12dcc59965a82f1f4bbd7465 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_reading_red_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42cf5df696164169be5b409ad2d3e5ac791a1191c074ad1de677232456ed305c +size 328369 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_standing_background.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_standing_background.png new file mode 100644 index 0000000000000000000000000000000000000000..29fba7ba1cd71761bd5265acb0b24705a89823e8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_person_standing_background.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac6795349c6ed468bbe20072005b66c96b4106e9cb953b1fdc58df329f22073 +size 277718 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_plastic_water_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_plastic_water_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..7cb1c7011b4107cf709093c12f3933d43bc3bc22 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/ref_plastic_water_bottle.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed5122186a5ca4049930a5f6e8a6f057b84f79eab4401b763060b9d6c65571c +size 372040 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_hands_gesturing.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_hands_gesturing.json new file mode 100644 index 0000000000000000000000000000000000000000..751fe199ece11df9e7fa93b579d36744bae4358e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_hands_gesturing.json @@ -0,0 +1,46 @@ +{ + "name": "hands_gesturing", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_hands_gesturing_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_hands_gesturing_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_gesturing_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_gesturing_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_hands_gesturing_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_hands_gesturing_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 350.0, + 64.0, + 677.0, + 979.0 + ], + "mask_score": 3.460945, + "mask_area_ratio": 0.132824, + "elapsed_seconds": 7.2309 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image meets all hard requirements for a full-body person reference. It is completely isolated with a white background and no cropping." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_hands_holding_small_object.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_hands_holding_small_object.json new file mode 100644 index 0000000000000000000000000000000000000000..b01c6f48ef97bb5cd4ea5da316c9770b5b952098 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_hands_holding_small_object.json @@ -0,0 +1,46 @@ +{ + "name": "hands_holding_small_object", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_hands_holding_small_object_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_hands_holding_small_object_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_holding_small_object_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_small_object_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_hands_holding_small_object_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_hands_holding_small_object_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 347.0, + 66.0, + 678.0, + 1002.0 + ], + "mask_score": 3.446404, + "mask_area_ratio": 0.148472, + "elapsed_seconds": 7.2113 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image features a complete person on a white background with no cropping, satisfying all hard requirements for a person." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_hands_holding_triangular_object.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_hands_holding_triangular_object.json new file mode 100644 index 0000000000000000000000000000000000000000..79187dfc3ff545af1ce7ff411b14671487180b58 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_hands_holding_triangular_object.json @@ -0,0 +1,216 @@ +{ + "name": "hands_holding_triangular_object", + "passed": true, + "accepted_attempt": 5, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_hands_holding_triangular_object_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_hands_holding_triangular_object_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_holding_triangular_object_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_triangular_object_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_hands_holding_triangular_object_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_hands_holding_triangular_object_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 331.0, + 17.0, + 692.0, + 1017.0 + ], + "mask_score": 1.769483, + "mask_area_ratio": 0.079493, + "elapsed_seconds": 7.1346 + }, + "verify": { + "passed": false, + "subject_visible": false, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "The person's head is missing.", + "The upper body, arms, and hands are severely corrupted and mostly missing.", + "The intended subject described in the caption (hands holding a triangular object) is not visible." + ], + "notes": "The image appears to be a heavily glitched or failed generation, consisting only of fragmented pieces of clothing with no head, hands, or visible subject features." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_hands_holding_triangular_object_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_hands_holding_triangular_object_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_holding_triangular_object_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_triangular_object_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_hands_holding_triangular_object_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_hands_holding_triangular_object_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 296.0, + 23.0, + 743.0, + 1023.0 + ], + "mask_score": 3.259862, + "mask_area_ratio": 0.213888, + "elapsed_seconds": 7.1809 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "The person's lower legs and feet are cropped by the bottom edge of the image.", + "The image does not show the full body from the top of the head through the feet." + ], + "notes": "The subject is clearly visible against a white background, but the image is a partial body crop missing the lower legs and feet." + } + }, + { + "attempt": 3, + "raw_ref_image": "references/raw_ref_hands_holding_triangular_object_attempt_03.png", + "candidate_ref_image": "references/candidate_ref_hands_holding_triangular_object_attempt_03.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_holding_triangular_object_attempt_03.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_triangular_object_attempt_03.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_hands_holding_triangular_object_attempt_03.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_hands_holding_triangular_object_attempt_03.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 275.0, + 153.0, + 749.0, + 912.0 + ], + "mask_score": 3.469005, + "mask_area_ratio": 0.173944, + "elapsed_seconds": 16.1402 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "The person is only a half-body/upper-body crop.", + "The person's legs and feet are missing.", + "The bottom edge of the person's body is cropped by the image boundary." + ], + "notes": "The image clearly shows the person holding the triangular object against a white background, but it fails the hard requirement for person subjects because it is not a full-body shot." + } + }, + { + "attempt": 4, + "raw_ref_image": "references/raw_ref_hands_holding_triangular_object_attempt_04.png", + "candidate_ref_image": "references/candidate_ref_hands_holding_triangular_object_attempt_04.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_holding_triangular_object_attempt_04.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_triangular_object_attempt_04.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_hands_holding_triangular_object_attempt_04.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_hands_holding_triangular_object_attempt_04.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 292.0, + 35.0, + 741.0, + 1023.0 + ], + "mask_score": 2.531483, + "mask_area_ratio": 0.31384, + "elapsed_seconds": 7.9643 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "The person's legs and feet are cropped out at the bottom boundary.", + "The image is an upper-to-mid body crop, not a full body reference." + ], + "notes": "The person is holding the triangular object against a white background, but the image cuts off the lower half of the person's body." + } + }, + { + "attempt": 5, + "raw_ref_image": "references/raw_ref_hands_holding_triangular_object_attempt_05.png", + "candidate_ref_image": "references/candidate_ref_hands_holding_triangular_object_attempt_05.png", + "candidate_sam_mask": "references/candidate_sam_mask_hands_holding_triangular_object_attempt_05.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_triangular_object_attempt_05.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_hands_holding_triangular_object_attempt_05.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_hands_holding_triangular_object_attempt_05.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 371.0, + 176.0, + 647.0, + 904.0 + ], + "mask_score": 3.472167, + "mask_area_ratio": 0.075788, + "elapsed_seconds": 7.6612 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a complete full-body reference of a person isolated on a white background with sufficient margin and no cropping." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_face_in_red_light.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_face_in_red_light.json new file mode 100644 index 0000000000000000000000000000000000000000..fccdcd12f648c8eddaf618cdf33f48d8fb62451a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_face_in_red_light.json @@ -0,0 +1,46 @@ +{ + "name": "person_face_in_red_light", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_face_in_red_light_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_face_in_red_light_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_face_in_red_light_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_face_in_red_light_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_person_face_in_red_light_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_person_face_in_red_light_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 357.0, + 50.0, + 662.0, + 997.0 + ], + "mask_score": 3.468133, + "mask_area_ratio": 0.138924, + "elapsed_seconds": 7.09 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body of a person on a white background with adequate margins and no cropping." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_holding_blue_flashlight.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_holding_blue_flashlight.json new file mode 100644 index 0000000000000000000000000000000000000000..c8da8d89142591f8f7e4cf39cec280c91f168fa5 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_holding_blue_flashlight.json @@ -0,0 +1,216 @@ +{ + "name": "person_holding_blue_flashlight", + "passed": true, + "accepted_attempt": 5, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_holding_blue_flashlight_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_holding_blue_flashlight_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_holding_blue_flashlight_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_holding_blue_flashlight_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_person_holding_blue_flashlight_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_person_holding_blue_flashlight_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 314.0, + 0.0, + 709.0, + 1023.0 + ], + "mask_score": 2.620105, + "mask_area_ratio": 0.116077, + "elapsed_seconds": 7.1478 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "missing body parts", + "severe image corruption or glitching on the subject" + ], + "notes": "Large sections of the person's body, including the head, arm, torso, and feet, are missing or heavily corrupted." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_person_holding_blue_flashlight_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_person_holding_blue_flashlight_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_holding_blue_flashlight_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_holding_blue_flashlight_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_person_holding_blue_flashlight_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_person_holding_blue_flashlight_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 187.0, + 0.0, + 818.0, + 1023.0 + ], + "mask_score": 3.118471, + "mask_area_ratio": 0.335389, + "elapsed_seconds": 7.3178 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "Image shows only a half-body crop", + "Legs and feet are not visible", + "Subject is cropped by the bottom boundary" + ], + "notes": "The image is a half-body shot of a person holding a flashlight. The lower half of the body is missing and cropped by the bottom edge." + } + }, + { + "attempt": 3, + "raw_ref_image": "references/raw_ref_person_holding_blue_flashlight_attempt_03.png", + "candidate_ref_image": "references/candidate_ref_person_holding_blue_flashlight_attempt_03.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_holding_blue_flashlight_attempt_03.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_holding_blue_flashlight_attempt_03.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_person_holding_blue_flashlight_attempt_03.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_person_holding_blue_flashlight_attempt_03.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 325.0, + 7.0, + 704.0, + 1023.0 + ], + "mask_score": 1.757752, + "mask_area_ratio": 0.067009, + "elapsed_seconds": 7.1418 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "The person's face, neck, and large portions of their clothing and body are missing or heavily distorted with white digital artifacts.", + "The image generation is corrupted, making it unusable as a reference." + ], + "notes": "The subject has full body extents within the frame, but the internal rendering is severely broken, with most of the body appearing as white voids." + } + }, + { + "attempt": 4, + "raw_ref_image": "references/raw_ref_person_holding_blue_flashlight_attempt_04.png", + "candidate_ref_image": "references/candidate_ref_person_holding_blue_flashlight_attempt_04.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_holding_blue_flashlight_attempt_04.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_holding_blue_flashlight_attempt_04.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_person_holding_blue_flashlight_attempt_04.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_person_holding_blue_flashlight_attempt_04.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 312.0, + 85.0, + 703.0, + 960.0 + ], + "mask_score": 3.455609, + "mask_area_ratio": 0.146332, + "elapsed_seconds": 7.378 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "The person's legs are cropped by the bottom edge of the image.", + "The full body down to the feet/shoes is not visible.", + "The image is only an upper-body/half-body crop." + ], + "notes": "The image shows the person holding the flashlight against a white background, but it fails the hard requirement of being a full-body shot as it is cut off at the knees." + } + }, + { + "attempt": 5, + "raw_ref_image": "references/raw_ref_person_holding_blue_flashlight_attempt_05.png", + "candidate_ref_image": "references/candidate_ref_person_holding_blue_flashlight_attempt_05.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_holding_blue_flashlight_attempt_05.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_holding_blue_flashlight_attempt_05.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_person_holding_blue_flashlight_attempt_05.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_person_holding_blue_flashlight_attempt_05.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 331.0, + 17.0, + 704.0, + 1017.0 + ], + "mask_score": 3.354952, + "mask_area_ratio": 0.150028, + "elapsed_seconds": 7.2854 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body visible, well isolated on white background. Minor rendering artifacts on shoulder." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_lying_reading_blue_light.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_lying_reading_blue_light.json new file mode 100644 index 0000000000000000000000000000000000000000..799eb3feb5d7892b97ad9722796c96e70ddf0157 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_lying_reading_blue_light.json @@ -0,0 +1,88 @@ +{ + "name": "person_lying_reading_blue_light", + "passed": true, + "accepted_attempt": 2, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_lying_reading_blue_light_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_lying_reading_blue_light_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_lying_reading_blue_light_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_lying_reading_blue_light_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_person_lying_reading_blue_light_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_person_lying_reading_blue_light_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 327.0, + 27.0, + 701.0, + 1023.0 + ], + "mask_score": 3.429948, + "mask_area_ratio": 0.207651, + "elapsed_seconds": 7.157 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "The person's legs and feet are cropped by the bottom edge of the image.", + "Does not show the full body from head to toes." + ], + "notes": "The subject is standing and isolated on a white background, but the bottom of the image truncates the person below the knees." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_person_lying_reading_blue_light_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_person_lying_reading_blue_light_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_lying_reading_blue_light_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_lying_reading_blue_light_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_person_lying_reading_blue_light_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_person_lying_reading_blue_light_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 360.0, + 75.0, + 665.0, + 980.0 + ], + "mask_score": 3.501601, + "mask_area_ratio": 0.137436, + "elapsed_seconds": 7.1743 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The person is fully visible from head to toe with a white background and sufficient margin. The subject meets all hard requirements for a person reference." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_reading_red_light.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_reading_red_light.json new file mode 100644 index 0000000000000000000000000000000000000000..554e6d966dee91daa300118a0a26d326db131edd --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_reading_red_light.json @@ -0,0 +1,46 @@ +{ + "name": "person_reading_red_light", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_reading_red_light_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_reading_red_light_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_reading_red_light_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_reading_red_light_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_person_reading_red_light_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_person_reading_red_light_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 339.0, + 16.0, + 680.0, + 1023.0 + ], + "mask_score": 3.324489, + "mask_area_ratio": 0.157365, + "elapsed_seconds": 7.1471 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body is clearly visible, isolated on a white background with adequate margin. Some visual artifacts are present on the head and neck, but they do not violate the hard requirements for cropping or completeness." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_standing_background.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_standing_background.json new file mode 100644 index 0000000000000000000000000000000000000000..a64358d73f1da055537bf8b6dcedbc9e6e55383b --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_person_standing_background.json @@ -0,0 +1,46 @@ +{ + "name": "person_standing_background", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_standing_background_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_standing_background_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_standing_background_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_standing_background_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_person_standing_background_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_person_standing_background_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 337.0, + 25.0, + 678.0, + 1014.0 + ], + "mask_score": 3.31662, + "mask_area_ratio": 0.134835, + "elapsed_seconds": 7.1397 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body of the person is clearly visible with sufficient margin on a white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_plastic_water_bottle.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_plastic_water_bottle.json new file mode 100644 index 0000000000000000000000000000000000000000..13399bb0b6c17e10d9c91389bdafcb9e53589a6e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/reference_verify_plastic_water_bottle.json @@ -0,0 +1,128 @@ +{ + "name": "plastic_water_bottle", + "passed": true, + "accepted_attempt": 3, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_plastic_water_bottle_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_plastic_water_bottle_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_plastic_water_bottle_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_plastic_water_bottle_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_plastic_water_bottle_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_plastic_water_bottle_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 344.0, + 320.0, + 678.0, + 545.0 + ], + "mask_score": 3.430306, + "mask_area_ratio": 0.095093, + "elapsed_seconds": 7.3282 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "Subject is severely truncated, missing the bottom half" + ], + "notes": "The image only shows the top half of the water bottle; the bottom section is completely cut off." + } + }, + { + "attempt": 2, + "raw_ref_image": "references/raw_ref_plastic_water_bottle_attempt_02.png", + "candidate_ref_image": "references/candidate_ref_plastic_water_bottle_attempt_02.png", + "candidate_sam_mask": "references/candidate_sam_mask_plastic_water_bottle_attempt_02.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_plastic_water_bottle_attempt_02.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_plastic_water_bottle_attempt_02.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_plastic_water_bottle_attempt_02.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 347.0, + 303.0, + 675.0, + 561.0 + ], + "mask_score": 3.470568, + "mask_area_ratio": 0.097877, + "elapsed_seconds": 7.2724 + }, + "verify": { + "passed": false, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [ + "The bottom half of the water bottle is missing/truncated." + ], + "notes": "The image shows a water bottle, but it is severely truncated at the bottom, making it an incomplete reference." + } + }, + { + "attempt": 3, + "raw_ref_image": "references/raw_ref_plastic_water_bottle_attempt_03.png", + "candidate_ref_image": "references/candidate_ref_plastic_water_bottle_attempt_03.png", + "candidate_sam_mask": "references/candidate_sam_mask_plastic_water_bottle_attempt_03.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_plastic_water_bottle_attempt_03.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_ref_plastic_water_bottle_attempt_03.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/candidate_sam_mask_plastic_water_bottle_attempt_03.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 356.0, + 72.0, + 666.0, + 982.0 + ], + "mask_score": 3.471713, + "mask_area_ratio": 0.17085, + "elapsed_seconds": 9.3022 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a single plastic water bottle on a white background, clearly capturing the entire object without cropping." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_hands_gesturing.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_hands_gesturing.png new file mode 100644 index 0000000000000000000000000000000000000000..bbbae5d58f1884bde924571b48d5dd24e758bed2 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_hands_gesturing.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_hands_holding_small_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_hands_holding_small_object.png new file mode 100644 index 0000000000000000000000000000000000000000..ba17637fdce243a89a2e23cc2da865b9e3077037 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_hands_holding_small_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_hands_holding_triangular_object.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_hands_holding_triangular_object.png new file mode 100644 index 0000000000000000000000000000000000000000..f6a28122be165752a0627e46c03bde2b388e3ee0 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_hands_holding_triangular_object.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_face_in_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_face_in_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..45226ba0cc995616f19a25c5de582b46ab460670 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_face_in_red_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_holding_blue_flashlight.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_holding_blue_flashlight.png new file mode 100644 index 0000000000000000000000000000000000000000..e4db84d6a9cefdcdfc66bcbb508372e88fcdbee8 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_holding_blue_flashlight.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_lying_reading_blue_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_lying_reading_blue_light.png new file mode 100644 index 0000000000000000000000000000000000000000..4301b394c37f64598c2ae1c102333d9adc89ba74 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_lying_reading_blue_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_reading_red_light.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_reading_red_light.png new file mode 100644 index 0000000000000000000000000000000000000000..8b28e7b50c66e055a3e58397f8accd1fee5dfaaf Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_reading_red_light.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_standing_background.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_standing_background.png new file mode 100644 index 0000000000000000000000000000000000000000..01067f60ed4bfa48eeb82671ebbfbdc403068195 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_person_standing_background.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_plastic_water_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_plastic_water_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..c53960d577178ae674d7ee8bfe7408bccae64bef Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/references/sam_mask_plastic_water_bottle.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/row.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/row.json new file mode 100644 index 0000000000000000000000000000000000000000..48e4b687b9101f7903bf4844dd791cd1b0554089 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/row.json @@ -0,0 +1,440 @@ +{ + "sample_id": "sample_000010", + "target_total": 9, + "target_people": 8, + "target_objects": 1, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 9, + "n_detected": 9, + "n_subjects": 9, + "subjects": [ + { + "name": "person_reading_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_236885.npy:person:0", + "source_name": "person", + "source_description": "A person is reading a book, partially visible in the red light, with only part of their arm and hand shown holding the book. Source dataset: Ego4D. Scene context: A person is reading a book illuminated by a red light in an otherwise dark room.", + "sub_caption": "person: A person partially visible in the shadows, holding and reading a book that is strongly illuminated by a red light.. Scene role: Sitting on the left side of the room, focused intently on reading their book in the red glow.", + "measured_bbox": [ + 0.0127, + 0.1501, + 0.4525, + 0.6235 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_reading_red_light.png", + "raw_ref_image": "references/raw_ref_person_reading_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_reading_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_reading_red_light_attempt_01.png", + "output": "references/ref_person_reading_red_light.png", + "mask": "references/sam_mask_person_reading_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 339.0, + 16.0, + 680.0, + 1023.0 + ], + "mask_score": 3.324489, + "mask_area_ratio": 0.157365, + "elapsed_seconds": 7.1471 + } + }, + { + "name": "person_lying_reading_blue_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_97416.npy:person:0", + "source_name": "person", + "source_description": "A person lying on a bed. Source dataset: Ego4D. Scene context: An indoor scene featuring a person lying on a bed reading a book with a small blue light.", + "sub_caption": "person: A person lying down comfortably, visible in the dark while reading a book illuminated by a small, crisp blue light.. Scene role: Lying on a bed in the back right corner, quietly reading separate from the main group.", + "measured_bbox": [ + 0.6944, + 0.2802, + 0.9406, + 0.4237 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_lying_reading_blue_light.png", + "raw_ref_image": "references/raw_ref_person_lying_reading_blue_light_attempt_02.png", + "reference_verify": "references/reference_verify_person_lying_reading_blue_light.json", + "reference_verify_passed": true, + "reference_attempts": 2, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_lying_reading_blue_light_attempt_02.png", + "output": "references/ref_person_lying_reading_blue_light.png", + "mask": "references/sam_mask_person_lying_reading_blue_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 360.0, + 75.0, + 665.0, + 980.0 + ], + "mask_score": 3.501601, + "mask_area_ratio": 0.137436, + "elapsed_seconds": 7.1743 + } + }, + { + "name": "person_holding_blue_flashlight", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_42340.npy:person:0", + "source_name": "person", + "source_description": "A person is visible in the lower right, holding a blue flashlight. Only their hand and part of their arm, covered by a dark sleeve, are visible. Source dataset: Ego4D. Scene context: A person is holding a lit blue flashlight in a dark environment.", + "sub_caption": "person: An arm covered by a dark sleeve, with the hand firmly holding a bright blue flashlight that cuts through the darkness.. Scene role: Positioned in the lower foreground, pointing the blue flashlight into the room to provide illumination.", + "measured_bbox": [ + 0.222, + 0.574, + 0.493, + 0.981 + ], + "detection_confidence": 0.9, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_holding_blue_flashlight.png", + "raw_ref_image": "references/raw_ref_person_holding_blue_flashlight_attempt_05.png", + "reference_verify": "references/reference_verify_person_holding_blue_flashlight.json", + "reference_verify_passed": true, + "reference_attempts": 5, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_holding_blue_flashlight_attempt_05.png", + "output": "references/ref_person_holding_blue_flashlight.png", + "mask": "references/sam_mask_person_holding_blue_flashlight.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 331.0, + 17.0, + 704.0, + 1017.0 + ], + "mask_score": 3.354952, + "mask_area_ratio": 0.150028, + "elapsed_seconds": 7.2854 + } + }, + { + "name": "person_standing_background", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "source_name": "person", + "source_description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall.", + "sub_caption": "person: A person seen from the torso down, standing in the dim room and holding a dark round object near a dimly lit blue wall.. Scene role: Standing quietly in the background, observing the room while holding a round object.", + "measured_bbox": [ + 0.4462, + 0.0267, + 0.5613, + 0.5921 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_standing_background.png", + "raw_ref_image": "references/raw_ref_person_standing_background_attempt_01.png", + "reference_verify": "references/reference_verify_person_standing_background.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_standing_background_attempt_01.png", + "output": "references/ref_person_standing_background.png", + "mask": "references/sam_mask_person_standing_background.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 337.0, + 25.0, + 678.0, + 1014.0 + ], + "mask_score": 3.31662, + "mask_area_ratio": 0.134835, + "elapsed_seconds": 7.1397 + } + }, + { + "name": "person_face_in_red_light", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76449.npy:person:0", + "source_name": "person", + "source_description": "Partially visible due to very low lighting; red light illuminates skin on what looks like hands or arms, and possibly part of the face or shoulder. Source dataset: Ego4D. Scene context: A very dark scene with sparse red lighting illuminating what appears to be a person's hands and part of their face or shoulder in the center, and a glowing rectangular object in the upper right corner.", + "sub_caption": "person: A person emerging from the darkness, with deep red light catching the skin on their hands, arms, and part of their face.. Scene role: Sitting near the center of the gathering, looking toward the others while bathed in red ambient light.", + "measured_bbox": [ + 0.5873, + 0.345, + 0.6506, + 0.4705 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_face_in_red_light.png", + "raw_ref_image": "references/raw_ref_person_face_in_red_light_attempt_01.png", + "reference_verify": "references/reference_verify_person_face_in_red_light.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_person_face_in_red_light_attempt_01.png", + "output": "references/ref_person_face_in_red_light.png", + "mask": "references/sam_mask_person_face_in_red_light.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 357.0, + 50.0, + 662.0, + 997.0 + ], + "mask_score": 3.468133, + "mask_area_ratio": 0.138924, + "elapsed_seconds": 7.09 + } + }, + { + "name": "hands_holding_small_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_259526.npy:person:0", + "source_name": "hands", + "source_description": "Visible hands, illuminated in reddish light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of hands holding something, with abstract colored shapes or fabrics in the dark background.", + "sub_caption": "hands: A pair of hands illuminated in a reddish light, holding a small object against the dark environment.. Scene role: Sitting within the group, holding a small item in the pool of red light.", + "measured_bbox": [ + 0.5539, + 0.5673, + 0.6564, + 0.6516 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_holding_small_object.png", + "raw_ref_image": "references/raw_ref_hands_holding_small_object_attempt_01.png", + "reference_verify": "references/reference_verify_hands_holding_small_object.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_small_object_attempt_01.png", + "output": "references/ref_hands_holding_small_object.png", + "mask": "references/sam_mask_hands_holding_small_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 347.0, + 66.0, + 678.0, + 1002.0 + ], + "mask_score": 3.446404, + "mask_area_ratio": 0.148472, + "elapsed_seconds": 7.2113 + } + }, + { + "name": "hands_holding_triangular_object", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76363.npy:person:0", + "source_name": "person", + "source_description": "Only hands are visible, illuminated by a red light source, positioned towards the top right. Source dataset: Ego4D. Scene context: A very dark image showing hands illuminated by red light and a small triangular object with blue and white patterns.", + "sub_caption": "person: Hands clearly visible under a red light source, carefully holding a small triangular object with patterns.. Scene role: Showing the patterned triangular object to the group nearby.", + "measured_bbox": [ + 0.6869, + 0.6745, + 0.898, + 0.9021 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_holding_triangular_object.png", + "raw_ref_image": "references/raw_ref_hands_holding_triangular_object_attempt_05.png", + "reference_verify": "references/reference_verify_hands_holding_triangular_object.json", + "reference_verify_passed": true, + "reference_attempts": 5, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_holding_triangular_object_attempt_05.png", + "output": "references/ref_hands_holding_triangular_object.png", + "mask": "references/sam_mask_hands_holding_triangular_object.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 371.0, + 176.0, + 647.0, + 904.0 + ], + "mask_score": 3.472167, + "mask_area_ratio": 0.075788, + "elapsed_seconds": 7.6612 + } + }, + { + "name": "hands_gesturing", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_282746.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, lit by red light, holding something or gesturing. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark room illuminated by stage lights.", + "sub_caption": "person: A person's hands catching the red stage-like lighting, caught mid-gesture.. Scene role: Actively gesturing and conversing with the central group.", + "measured_bbox": [ + 0.233, + 0.5315, + 0.3583, + 0.6782 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_hands_gesturing.png", + "raw_ref_image": "references/raw_ref_hands_gesturing_attempt_01.png", + "reference_verify": "references/reference_verify_hands_gesturing.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_hands_gesturing_attempt_01.png", + "output": "references/ref_hands_gesturing.png", + "mask": "references/sam_mask_hands_gesturing.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 350.0, + 64.0, + 677.0, + 979.0 + ], + "mask_score": 3.460945, + "mask_area_ratio": 0.132824, + "elapsed_seconds": 7.2309 + } + }, + { + "name": "plastic_water_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_124:object:6", + "source_name": "water bottle", + "source_description": "A clear plastic bottle, likely containing water, with a blue and white label, standing on the counter to the right. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a stovetop with a pan cooking food, accompanied by various kitchen items on the adjacent counter spaces.", + "sub_caption": "water bottle: A clear plastic bottle containing water, catching the colorful reflections of the scattered red and blue lights.. Scene role: Resting upright on the floor in the center of the group, reflecting the dramatic lighting.", + "measured_bbox": [ + 0.5155, + 0.7075, + 0.5653, + 0.9622 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_plastic_water_bottle.png", + "raw_ref_image": "references/raw_ref_plastic_water_bottle_attempt_03.png", + "reference_verify": "references/reference_verify_plastic_water_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 3, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000010/references/raw_ref_plastic_water_bottle_attempt_03.png", + "output": "references/ref_plastic_water_bottle.png", + "mask": "references/sam_mask_plastic_water_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 356.0, + 72.0, + 666.0, + 982.0 + ], + "mask_score": 3.471713, + "mask_area_ratio": 0.17085, + "elapsed_seconds": 9.3022 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/vocab_task.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/vocab_task.json new file mode 100644 index 0000000000000000000000000000000000000000..b6a63bd96e83dce9c5b20179974b8b4955fd5d0d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000010/vocab_task.json @@ -0,0 +1,140 @@ +{ + "task_id": "sample_000010", + "sample_id": "sample_000010", + "sample_index": 10, + "target_total": 9, + "target_people": 8, + "target_objects": 1, + "people_candidates": [ + { + "candidate_index": 0, + "source_offset": 571, + "image_id": "Ego4D:ego4d_video/EGO_259461.npy:person:0", + "name": "person", + "description": "The person is mostly obscured by the darkness, but their hands are visible in the center of the frame, holding or manipulating a small, slightly lighter-colored object or piece of material. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding what appears to be a small object or part of a garment in a dark setting, possibly examining or manipulating it." + }, + { + "candidate_index": 1, + "source_offset": 1101, + "image_id": "Ego4D:ego4d_video/EGO_76363.npy:person:0", + "name": "person", + "description": "Only hands are visible, illuminated by a red light source, positioned towards the top right. Source dataset: Ego4D. Scene context: A very dark image showing hands illuminated by red light and a small triangular object with blue and white patterns." + }, + { + "candidate_index": 2, + "source_offset": 458, + "image_id": "Ego4D:ego4d_video/EGO_236885.npy:person:0", + "name": "person", + "description": "A person is reading a book, partially visible in the red light, with only part of their arm and hand shown holding the book. Source dataset: Ego4D. Scene context: A person is reading a book illuminated by a red light in an otherwise dark room." + }, + { + "candidate_index": 3, + "source_offset": 182, + "image_id": "Ego4D:ego4d_video/EGO_172608.npy:person:0", + "name": "person", + "description": "A person is visible only by a small portion of their leg or foot, partially obscured in the darkness. Source dataset: Ego4D. Scene context: A dark scene with a blurred object appearing to be a foot in motion against a textured, mostly dark background." + }, + { + "candidate_index": 4, + "source_offset": 254, + "image_id": "Ego4D:ego4d_video/EGO_192493.npy:person:0", + "name": "person", + "description": "A person standing in the room, seen from the torso down, holding a dark round object. Source dataset: Ego4D. Scene context: A person stands in a dimly lit room holding a round object near a blue wall." + }, + { + "candidate_index": 5, + "source_offset": 1068, + "image_id": "Ego4D:ego4d_video/EGO_76343.npy:person:0", + "name": "person holding object", + "description": "Only hands are visible, illuminated in deep red light, holding an object. Source dataset: Ego4D. Scene context: A dark scene with a person's hands holding an object illuminated by red light, and another person's face faintly visible in the background." + }, + { + "candidate_index": 6, + "source_offset": 883, + "image_id": "Ego4D:ego4d_video/EGO_42340.npy:person:0", + "name": "person", + "description": "A person is visible in the lower right, holding a blue flashlight. Only their hand and part of their arm, covered by a dark sleeve, are visible. Source dataset: Ego4D. Scene context: A person is holding a lit blue flashlight in a dark environment." + }, + { + "candidate_index": 7, + "source_offset": 748, + "image_id": "Ego4D:ego4d_video/EGO_282746.npy:person:0", + "name": "person", + "description": "The person's hands are visible, lit by red light, holding something or gesturing. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark room illuminated by stage lights." + }, + { + "candidate_index": 8, + "source_offset": 162, + "image_id": "Ego4D:ego4d_video/EGO_165748.npy:person:0", + "name": "person", + "description": "The person's hands are visible, holding a dark object. Source dataset: Ego4D. Scene context: A close-up view of a person's hands holding what appears to be an electronic device or small object in a dark environment with some light reflection." + }, + { + "candidate_index": 9, + "source_offset": 690, + "image_id": "Ego4D:ego4d_video/EGO_274411.npy:person:0", + "name": "person silhouette", + "description": "A dark, silhouetted figure of a person on the left side of the image, holding an object. Source dataset: Ego4D. Scene context: A highly pixelated and dark image showing a silhouette of a person seemingly holding a long object, possibly a stick or a tool, against a lighter background." + }, + { + "candidate_index": 10, + "source_offset": 610, + "image_id": "Ego4D:ego4d_video/EGO_259526.npy:person:0", + "name": "hands", + "description": "Visible hands, illuminated in reddish light, holding an object. Source dataset: Ego4D. Scene context: A close-up view of hands holding something, with abstract colored shapes or fabrics in the dark background." + }, + { + "candidate_index": 11, + "source_offset": 1150, + "image_id": "Ego4D:ego4d_video/EGO_76449.npy:person:0", + "name": "person", + "description": "Partially visible due to very low lighting; red light illuminates skin on what looks like hands or arms, and possibly part of the face or shoulder. Source dataset: Ego4D. Scene context: A very dark scene with sparse red lighting illuminating what appears to be a person's hands and part of their face or shoulder in the center, and a glowing rectangular object in the upper right corner." + }, + { + "candidate_index": 12, + "source_offset": 311, + "image_id": "Ego4D:ego4d_video/EGO_202366.npy:person:0", + "name": "person", + "description": "Visible only by their hands, illuminated by a red light, holding and manipulating a smoking device. Source dataset: Ego4D. Scene context: A close-up view of hands holding a smoking device, illuminated by a red light in a very dark environment." + }, + { + "candidate_index": 13, + "source_offset": 1219, + "image_id": "Ego4D:ego4d_video/EGO_97416.npy:person:0", + "name": "person", + "description": "A person lying on a bed. Source dataset: Ego4D. Scene context: An indoor scene featuring a person lying on a bed reading a book with a small blue light." + }, + { + "candidate_index": 14, + "source_offset": 1182, + "image_id": "Ego4D:ego4d_video/EGO_93019.npy:person:0", + "name": "person", + "description": "Visible only by a dark silhouette of an arm or hand holding a long object, positioned in the lower center foreground. Source dataset: Ego4D. Scene context: A dark, low-light view showing a person holding a long blade or stick, with crumpled blue fabric partially visible on the left." + }, + { + "candidate_index": 15, + "source_offset": 776, + "image_id": "Ego4D:ego4d_video/EGO_282818.npy:person:0", + "name": "person", + "description": "Only the person's hands are clearly visible in the foreground, illuminated by a reddish light. Source dataset: Ego4D. Scene context: A person's hands are visible in a dark room with some light sources in the background." + } + ], + "object_candidates": [ + { + "candidate_index": 0, + "source_offset": 264, + "image_id": "EPIC-Kitchens:P02_120:object:4", + "name": "cable", + "description": "A white electrical cable coiled loosely on the floor near the exercise ball. Source dataset: EPIC-Kitchens. Scene context: A view of a kitchen or utility room floor, showing a washing machine, cleaning supplies, an exercise ball, a vacuum cleaner, and a set of wooden drawers." + }, + { + "candidate_index": 1, + "source_offset": 2697, + "image_id": "EPIC-Kitchens:P26_124:object:6", + "name": "water bottle", + "description": "A clear plastic bottle, likely containing water, with a blue and white label, standing on the counter to the right. Source dataset: EPIC-Kitchens. Scene context: A close-up view of a stovetop with a pan cooking food, accompanied by various kitchen items on the adjacent counter spaces." + } + ], + "rng_seed": 1782975283, + "created_at": 1782259667.863426 +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/bbox_overlay.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/bbox_overlay.png new file mode 100644 index 0000000000000000000000000000000000000000..f8a69ac6255472b9b2d6ad8417b5fa513fe70dc5 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/bbox_overlay.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d98cb738a4320724d6c5cfbab45eaaa4dab8bdb3cc044a7ac0b2874f361f4a +size 1310980 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/compose_prompt.txt b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/compose_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..d6008f26a1ef12e8609dd121da04d2dfe7590d4a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/compose_prompt.txt @@ -0,0 +1,115 @@ +Render the following JSON scene specification as a photorealistic 1248x832 image using a true 3:2 canvas. Every listed person and object must appear visibly in the image. Keep normal proportions and the requested aspect ratio. The foreground must contain only subjects explicitly listed in the JSON scene specification. Do not add any unlisted foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects. Background context may include non-localizable scenery only when it does not introduce a distinct foreground subject. No text, no labels, no logos, no watermarks. +JSON scene specification: +{ + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1248, + 832 + ], + "aspect_ratio": "3:2", + "style": "photorealistic" + }, + "scene": { + "setting": "A dark, dimly lit kitchen at night, illuminated by contrasting harsh red and faint blue ambient lights.", + "activity": "A person's hands are working with a metal tool on the kitchen counter, surrounded by scattered kitchenware.", + "composition": "Close-to-medium wide shot focusing on the hands and countertop in the foreground, with the background cabinets and shelf receding into the shadows. High contrast lighting with a red cast on the hands and tool, and faint blue reflections on the textured mat.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:2 composition", + "final canvas size 1248x832", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_hands", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_76422.npy:person:0", + "source_name": "person", + "description": "The person's hands are visible, illuminated by a strong red light, holding a small object or tool.", + "role_in_scene": "Working intently, holding the metal tool over the granite countertop." + } + ], + "objects": [ + { + "name": "textured_mat", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_39154.npy:object:0", + "source_name": "textured surface", + "description": "A dark surface covered with a repeating pattern of small, raised bumps or dots, catching faint blue light reflections.", + "role_in_scene": "Lying flat on the granite countertop under the person's hands." + }, + { + "name": "wall_shelf", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_309243.npy:object:1", + "source_name": "shelf", + "description": "A dark, multi-tiered shelf attached to the wall, holding various unidentifiable small items.", + "role_in_scene": "Attached to the wall in the shadowy background above the counter." + }, + { + "name": "striped_towel", + "source_index": 5, + "source_image_id": "EPIC-Kitchens:P03_118:object:22", + "source_name": "towel", + "description": "A black and white striped towel.", + "role_in_scene": "Hanging down from a lower cabinet handle just below the edge of the countertop." + }, + { + "name": "glass_bottle", + "source_index": 6, + "source_image_id": "EPIC-Kitchens:P30_113:object:5", + "source_name": "bottle", + "description": "A clear glass bottle filled with amber liquid.", + "role_in_scene": "Standing on the granite countertop to the side, catching dim reflections." + }, + { + "name": "dirty_plate", + "source_index": 8, + "source_image_id": "EPIC-Kitchens:P26_102:object:5", + "source_name": "plate", + "description": "A partially visible plate containing food remnants.", + "role_in_scene": "Sitting on the counter near the bottle in the dim light." + }, + { + "name": "metal_tool", + "source_index": 9, + "source_image_id": "Ego4D:ego4d_video/EGO_15996.npy:object:0", + "source_name": "dark object", + "description": "A dark, silhouetted tool with a thin, elongated structure resembling pliers.", + "role_in_scene": "Held firmly by the person's hands under the red light." + }, + { + "name": "box_grater", + "source_index": 10, + "source_image_id": "EPIC-Kitchens:P34_111:object:34", + "source_name": "grater", + "description": "A metal box grater.", + "role_in_scene": "Resting on the counter towards the background left." + }, + { + "name": "wooden_cabinets", + "source_index": 11, + "source_image_id": "EPIC-Kitchens:P28_106:object:8", + "source_name": "kitchen cabinets (right)", + "description": "A row of light brown wooden cabinets with metal handles.", + "role_in_scene": "Lining the right side of the kitchen in the background shadows." + }, + { + "name": "granite_counter", + "source_index": 16, + "source_image_id": "EPIC-Kitchens:P04_108:object:19", + "source_name": "granite countertop", + "description": "A grey, white, and black speckled stone countertop.", + "role_in_scene": "The main horizontal surface spanning the lower half of the frame, holding the scattered items." + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_box_grater.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_box_grater.png new file mode 100644 index 0000000000000000000000000000000000000000..f0e2ca94861cd17979bf9279cbd5929a2462d29a Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_box_grater.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_dirty_plate.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_dirty_plate.png new file mode 100644 index 0000000000000000000000000000000000000000..e04c882dc5396d8bc00cfce465c3d29d11dfa71b Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_dirty_plate.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_glass_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_glass_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..e07001ab17606e700b4eea820c82469d359bfda4 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_glass_bottle.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_granite_counter.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_granite_counter.png new file mode 100644 index 0000000000000000000000000000000000000000..2504a28bd11ab66b2b7fe12848ebc0d94adb0d2d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_granite_counter.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b23880089980b4ad24cadd4c21a2fe7b1be40ac996b84a06b1643cf2d61872 +size 618764 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_metal_tool.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_metal_tool.png new file mode 100644 index 0000000000000000000000000000000000000000..a50fd6132e6e2dc3067ee9509b93ca2d9b730879 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_metal_tool.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_person_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_person_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..2458aed079baa8b0446d39b9da71693cf3549dc2 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_person_hands.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a7a6e67f371ce35be03e24da3759e954b18d66060a1bb65c7f461d47ebdcc5f +size 177966 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_striped_towel.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_striped_towel.png new file mode 100644 index 0000000000000000000000000000000000000000..526d47dcb8be4d8f96a0d694d2269beb797687a0 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_striped_towel.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_textured_mat.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_textured_mat.png new file mode 100644 index 0000000000000000000000000000000000000000..94f73d741fb70529756e52bdaed90a83acfd2b3d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_textured_mat.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:446df662bc2e0430edf7a1ce233b985f153c9fe521ac2446f77cf8345c28dbd1 +size 262589 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_wall_shelf.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_wall_shelf.png new file mode 100644 index 0000000000000000000000000000000000000000..1f37c86e6d5d9fb1c22a51d5f503445c908e3c34 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_wall_shelf.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_wooden_cabinets.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_wooden_cabinets.png new file mode 100644 index 0000000000000000000000000000000000000000..4bf409b6c92a9fa57384f07eaaebbb10741048da --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/detect_refine_wooden_cabinets.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6495e44e0159897033b43f6e5c184da3fc86aa336021b5502e2cc82724a725cd +size 198289 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_box_grater.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_box_grater.png new file mode 100644 index 0000000000000000000000000000000000000000..c29a91b674b6e7c13d7803a7e8e135603802a260 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_box_grater.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_dirty_plate.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_dirty_plate.png new file mode 100644 index 0000000000000000000000000000000000000000..9d260598831d5a326106d5a966b62183488bfbac Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_dirty_plate.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_glass_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_glass_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..513e58e7e1787d61ae723491c7b4ba8feacd13f3 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_glass_bottle.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_granite_counter.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_granite_counter.png new file mode 100644 index 0000000000000000000000000000000000000000..dc24f2f1680fc1a0800bf76cf60e7c4fed6de9c6 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_granite_counter.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b16ae5036dd09fc5c0c7ae44ff1b98332173d1aafa9e8325f894557b94277e9f +size 638316 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_metal_tool.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_metal_tool.png new file mode 100644 index 0000000000000000000000000000000000000000..81c36ff99ee3d20a7690f9c9b29dd0c88bf35ce2 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_metal_tool.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_person_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_person_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..59170db4b4cd54ff25d87f10ce76b5d83c01b5f8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_person_hands.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d77187500a4755bacc6458737bd01c85dd1862e1fe97217cfe9606cde3dae47 +size 452396 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_striped_towel.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_striped_towel.png new file mode 100644 index 0000000000000000000000000000000000000000..73c5e0cc5dd26318de3943f103b602038a599fa1 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_striped_towel.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_textured_mat.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_textured_mat.png new file mode 100644 index 0000000000000000000000000000000000000000..bbb0be92c649ee03f88bcf7e625c48685a23b211 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_textured_mat.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14731e08cf4a93934a59118e04f6d3e32071da45c0e981ed6a874ab8428b0319 +size 335065 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_wall_shelf.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_wall_shelf.png new file mode 100644 index 0000000000000000000000000000000000000000..555c5e441edab2841bd59b2766713b96f04c7676 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_wall_shelf.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6d4d67ae953817d1911a32b361559fe29f60b7dc6350840df52bf084d4132e2 +size 108420 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_wooden_cabinets.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_wooden_cabinets.png new file mode 100644 index 0000000000000000000000000000000000000000..6c9aa2d8847e69621476e03b0ead26cbcc9b182d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/crops/diversify_input_wooden_cabinets.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32b0308ab903ddf93963d046710304e85646571a3384f383c8107ca6f4916d1 +size 230536 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/detections.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/detections.json new file mode 100644 index 0000000000000000000000000000000000000000..1dc322e8c579864ded9ee63a788716ecdfd2b523 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/detections.json @@ -0,0 +1,192 @@ +[ + { + "name": "person_hands", + "present": true, + "bbox": [ + 0.1787, + 0.2332, + 0.4617, + 0.5792 + ], + "confidence": 0.95, + "notes": "Both hands of the person are visible, illuminated in red, holding a tool.", + "coarse_bbox": [ + 0.205, + 0.264, + 0.466, + 0.581 + ], + "refine_crop": "crops/detect_refine_person_hands.png" + }, + { + "name": "textured_mat", + "present": true, + "bbox": [ + 0.1138, + 0.5866, + 0.5947, + 0.7728 + ], + "confidence": 0.99, + "notes": "Tight bounding box around the textured mat with raised bumps catching blue light reflections on the granite countertop.", + "coarse_bbox": [ + 0.115, + 0.591, + 0.594, + 0.774 + ], + "refine_crop": "crops/detect_refine_textured_mat.png" + }, + { + "name": "wall_shelf", + "present": true, + "bbox": [ + 0.5206, + 0.0681, + 0.6781, + 0.3789 + ], + "confidence": 0.95, + "notes": "The multi-tiered shelf and the items on it.", + "coarse_bbox": [ + 0.519, + 0.087, + 0.678, + 0.375 + ], + "refine_crop": "crops/detect_refine_wall_shelf.png" + }, + { + "name": "striped_towel", + "present": true, + "bbox": [ + 0.6082, + 0.7266, + 0.7242, + 0.9973 + ], + "confidence": 0.98, + "notes": "A black and white (appearing pinkish due to lighting) striped towel hanging over a cabinet handle.", + "coarse_bbox": [ + 0.609, + 0.723, + 0.723, + 1.0 + ], + "refine_crop": "crops/detect_refine_striped_towel.png" + }, + { + "name": "glass_bottle", + "present": true, + "bbox": [ + 0.2049, + 0.4778, + 0.2764, + 0.6209 + ], + "confidence": 0.95, + "notes": "Tight bounding box around the clear glass bottle containing amber liquid.", + "coarse_bbox": [ + 0.205, + 0.476, + 0.277, + 0.62 + ], + "refine_crop": "crops/detect_refine_glass_bottle.png" + }, + { + "name": "dirty_plate", + "present": true, + "bbox": [ + 0.003, + 0.5981, + 0.2, + 0.6793 + ], + "confidence": 0.95, + "notes": "Tight bounding box around the visible portion of the dirty plate with food remnants.", + "coarse_bbox": [ + 0.0, + 0.597, + 0.196, + 0.678 + ], + "refine_crop": "crops/detect_refine_dirty_plate.png" + }, + { + "name": "metal_tool", + "present": true, + "bbox": [ + 0.203, + 0.348, + 0.476, + 0.468 + ], + "confidence": 0.95, + "notes": "refine failed; using coarse bbox", + "coarse_bbox": [ + 0.203, + 0.348, + 0.476, + 0.468 + ], + "refine_crop": "crops/detect_refine_metal_tool.png" + }, + { + "name": "box_grater", + "present": true, + "bbox": [ + 0.4842, + 0.4749, + 0.5853, + 0.5496 + ], + "confidence": 0.99, + "notes": "A metal box grater with a black top and bottom, angled with holes clearly visible on the main face.", + "coarse_bbox": [ + 0.482, + 0.473, + 0.585, + 0.55 + ], + "refine_crop": "crops/detect_refine_box_grater.png" + }, + { + "name": "wooden_cabinets", + "present": true, + "bbox": [ + 0.7319, + 0.5034, + 0.9945, + 0.9951 + ], + "confidence": 0.95, + "notes": "A row of light brown wooden cabinets with metal handles.", + "coarse_bbox": [ + 0.734, + 0.5, + 0.994, + 0.986 + ], + "refine_crop": "crops/detect_refine_wooden_cabinets.png" + }, + { + "name": "granite_counter", + "present": true, + "bbox": [ + 0.003, + 0.5275, + 0.6738, + 0.883 + ], + "confidence": 0.95, + "notes": "The granite countertop is visible in the lower half of the image.", + "coarse_bbox": [ + 0.003, + 0.534, + 0.673, + 0.981 + ], + "refine_crop": "crops/detect_refine_granite_counter.png" + } +] diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/main_image.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/main_image.png new file mode 100644 index 0000000000000000000000000000000000000000..48904a406a72722dd026169203fffd63bd76617a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/main_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cc8339e4b919e2126de8bc37e974e6b17efbfba33f34734c0b1f57f7d234011 +size 1406237 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/plan.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/plan.json new file mode 100644 index 0000000000000000000000000000000000000000..1841793ba175ee1af559aa46490dcf775cadbd6d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/plan.json @@ -0,0 +1,236 @@ +{ + "sample_id": "sample_000011", + "target_total": 10, + "target_people": 1, + "target_objects": 9, + "compose_prompt": { + "format": "structured_json_prompt", + "canvas": { + "size": [ + 1248, + 832 + ], + "aspect_ratio": "3:2", + "style": "photorealistic" + }, + "scene": { + "setting": "A dark, dimly lit kitchen at night, illuminated by contrasting harsh red and faint blue ambient lights.", + "activity": "A person's hands are working with a metal tool on the kitchen counter, surrounded by scattered kitchenware.", + "composition": "Close-to-medium wide shot focusing on the hands and countertop in the foreground, with the background cabinets and shelf receding into the shadows. High contrast lighting with a red cast on the hands and tool, and faint blue reflections on the textured mat.", + "constraints": [ + "no text", + "no labels", + "no watermarks", + "true 3:2 composition", + "final canvas size 1248x832", + "normal human and object proportions", + "no squeezed perspective", + "no anamorphic stretching", + "every listed person and object must be visibly present", + "the foreground may contain only the listed people and objects", + "no extra foreground people, animals, vehicles, props, tools, products, signs, readable text, logos, or other localizable subjects absent from this JSON" + ] + }, + "people": [ + { + "name": "person_hands", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_76422.npy:person:0", + "source_name": "person", + "description": "The person's hands are visible, illuminated by a strong red light, holding a small object or tool.", + "role_in_scene": "Working intently, holding the metal tool over the granite countertop." + } + ], + "objects": [ + { + "name": "textured_mat", + "source_index": 0, + "source_image_id": "Ego4D:ego4d_video/EGO_39154.npy:object:0", + "source_name": "textured surface", + "description": "A dark surface covered with a repeating pattern of small, raised bumps or dots, catching faint blue light reflections.", + "role_in_scene": "Lying flat on the granite countertop under the person's hands." + }, + { + "name": "wall_shelf", + "source_index": 3, + "source_image_id": "Ego4D:ego4d_video/EGO_309243.npy:object:1", + "source_name": "shelf", + "description": "A dark, multi-tiered shelf attached to the wall, holding various unidentifiable small items.", + "role_in_scene": "Attached to the wall in the shadowy background above the counter." + }, + { + "name": "striped_towel", + "source_index": 5, + "source_image_id": "EPIC-Kitchens:P03_118:object:22", + "source_name": "towel", + "description": "A black and white striped towel.", + "role_in_scene": "Hanging down from a lower cabinet handle just below the edge of the countertop." + }, + { + "name": "glass_bottle", + "source_index": 6, + "source_image_id": "EPIC-Kitchens:P30_113:object:5", + "source_name": "bottle", + "description": "A clear glass bottle filled with amber liquid.", + "role_in_scene": "Standing on the granite countertop to the side, catching dim reflections." + }, + { + "name": "dirty_plate", + "source_index": 8, + "source_image_id": "EPIC-Kitchens:P26_102:object:5", + "source_name": "plate", + "description": "A partially visible plate containing food remnants.", + "role_in_scene": "Sitting on the counter near the bottle in the dim light." + }, + { + "name": "metal_tool", + "source_index": 9, + "source_image_id": "Ego4D:ego4d_video/EGO_15996.npy:object:0", + "source_name": "dark object", + "description": "A dark, silhouetted tool with a thin, elongated structure resembling pliers.", + "role_in_scene": "Held firmly by the person's hands under the red light." + }, + { + "name": "box_grater", + "source_index": 10, + "source_image_id": "EPIC-Kitchens:P34_111:object:34", + "source_name": "grater", + "description": "A metal box grater.", + "role_in_scene": "Resting on the counter towards the background left." + }, + { + "name": "wooden_cabinets", + "source_index": 11, + "source_image_id": "EPIC-Kitchens:P28_106:object:8", + "source_name": "kitchen cabinets (right)", + "description": "A row of light brown wooden cabinets with metal handles.", + "role_in_scene": "Lining the right side of the kitchen in the background shadows." + }, + { + "name": "granite_counter", + "source_index": 16, + "source_image_id": "EPIC-Kitchens:P04_108:object:19", + "source_name": "granite countertop", + "description": "A grey, white, and black speckled stone countertop.", + "role_in_scene": "The main horizontal surface spanning the lower half of the frame, holding the scattered items." + } + ] + }, + "expected_subjects": [ + { + "name": "person_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76422.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, illuminated by a red light, and they appear to be holding a small object. Source dataset: Ego4D. Scene context: A dark scene with a person's hands holding what appears to be a small device or tool illuminated by red light.", + "sub_caption": "person: The person's hands are visible, illuminated by a strong red light, holding a small object or tool.. Scene role: Working intently, holding the metal tool over the granite countertop.", + "ref_style": "white_bg_full_body_front" + }, + { + "name": "textured_mat", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39154.npy:object:0", + "source_name": "textured surface", + "source_description": "A dark surface covered with a repeating pattern of small, raised bumps or dots, illuminated by a blue light. Source dataset: Ego4D. Scene context: A close-up view of a textured surface illuminated with blue light in a dark environment.", + "sub_caption": "textured surface: A dark surface covered with a repeating pattern of small, raised bumps or dots, catching faint blue light reflections.. Scene role: Lying flat on the granite countertop under the person's hands.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "wall_shelf", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_309243.npy:object:1", + "source_name": "shelf", + "source_description": "A dark, multi-tiered shelf attached to the wall on the right side, holding various unidentifiable small items. Source dataset: Ego4D. Scene context: A dimly lit room illuminated entirely by strong red light, where a person is sitting and holding a child in their lap.", + "sub_caption": "shelf: A dark, multi-tiered shelf attached to the wall, holding various unidentifiable small items.. Scene role: Attached to the wall in the shadowy background above the counter.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "striped_towel", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P03_118:object:22", + "source_name": "towel", + "source_description": "A black and white striped towel hanging from the oven door handle. Source dataset: EPIC-Kitchens. Scene context: A narrow, slightly messy kitchen with dark grey countertops, white cabinets, and wooden flooring.", + "sub_caption": "towel: A black and white striped towel.. Scene role: Hanging down from a lower cabinet handle just below the edge of the countertop.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "glass_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P30_113:object:5", + "source_name": "bottle", + "source_description": "A clear glass bottle with amber liquid standing on the counter near the cutting board. Source dataset: EPIC-Kitchens. Scene context: A person's hand is visible in the foreground of a kitchen with light wood cabinets, dark countertops, a white washing machine, and a white refrigerator.", + "sub_caption": "bottle: A clear glass bottle filled with amber liquid.. Scene role: Standing on the granite countertop to the side, catching dim reflections.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "dirty_plate", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_102:object:5", + "source_name": "plate", + "source_description": "A partially visible plate or flat dish on the table, seemingly containing food or remnants. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a table covered in a blue and green plaid tablecloth, holding various items like bottles and cans, and a dark chair nearby.", + "sub_caption": "plate: A partially visible plate containing food remnants.. Scene role: Sitting on the counter near the bottle in the dim light.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "metal_tool", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_15996.npy:object:0", + "source_name": "dark object", + "source_description": "A dark, silhouetted object with a thin, elongated structure, appearing to be a tool like pliers or scissors, visible against a dimly lit reddish background. Source dataset: Ego4D. Scene context: A close-up view of what appears to be a tool or mechanical part in shadows, possibly a pair of pliers.", + "sub_caption": "dark object: A dark, silhouetted tool with a thin, elongated structure resembling pliers.. Scene role: Held firmly by the person's hands under the red light.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "box_grater", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P34_111:object:34", + "source_name": "grater", + "source_description": "A metal box grater partially visible on the far left counter. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen sink area with a window overlooking a patio, featuring various plants, cleaning supplies, and kitchen items.", + "sub_caption": "grater: A metal box grater.. Scene role: Resting on the counter towards the background left.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "wooden_cabinets", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P28_106:object:8", + "source_name": "kitchen cabinets (right)", + "source_description": "A row of light brown wooden cabinets with metal handles along the right side. Source dataset: EPIC-Kitchens. Scene context: An overhead view of a kitchen floor with cabinets, a sink, and a dishwasher on the sides.", + "sub_caption": "kitchen cabinets (right): A row of light brown wooden cabinets with metal handles.. Scene role: Lining the right side of the kitchen in the background shadows.", + "ref_style": "white_bg_encyclopedia_photo" + }, + { + "name": "granite_counter", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P04_108:object:19", + "source_name": "granite countertop", + "source_description": "A grey, white, and black speckled stone countertop surface visible in the foreground and near the sink/stove. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen scene with items scattered on the counter, floor, and a large blue delivery bag visible on the right.", + "sub_caption": "granite countertop: A grey, white, and black speckled stone countertop.. Scene role: The main horizontal surface spanning the lower half of the frame, holding the scattered items.", + "ref_style": "white_bg_encyclopedia_photo" + } + ], + "vocab_task_path": "sample_000011/vocab_task.json", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references.json new file mode 100644 index 0000000000000000000000000000000000000000..9b9e0ebbdb781a11b496a251052fe8d0917a4c8a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references.json @@ -0,0 +1,325 @@ +{ + "references": [ + { + "name": "person_hands", + "ref_image": "references/ref_person_hands.png", + "raw_ref_image": "references/raw_ref_person_hands_attempt_01.png", + "diversify_input": "crops/diversify_input_person_hands.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_person_hands_attempt_01.png", + "output": "references/ref_person_hands.png", + "mask": "references/sam_mask_person_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 338.0, + 36.0, + 704.0, + 1013.0 + ], + "mask_score": 3.415329, + "mask_area_ratio": 0.148867, + "elapsed_seconds": 7.2976 + }, + "reference_verify": "references/reference_verify_person_hands.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "textured_mat", + "ref_image": "references/ref_textured_mat.png", + "raw_ref_image": "references/raw_ref_textured_mat_attempt_01.png", + "diversify_input": "crops/diversify_input_textured_mat.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_textured_mat_attempt_01.png", + "output": "references/ref_textured_mat.png", + "mask": "references/sam_mask_textured_mat.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 29.0, + 292.0, + 994.0, + 732.0 + ], + "mask_score": 3.388931, + "mask_area_ratio": 0.280238, + "elapsed_seconds": 7.3108 + }, + "reference_verify": "references/reference_verify_textured_mat.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "wall_shelf", + "ref_image": "references/ref_wall_shelf.png", + "raw_ref_image": "references/raw_ref_wall_shelf_attempt_01.png", + "diversify_input": "crops/diversify_input_wall_shelf.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wall_shelf_attempt_01.png", + "output": "references/ref_wall_shelf.png", + "mask": "references/sam_mask_wall_shelf.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 176.0, + 42.0, + 861.0, + 940.0 + ], + "mask_score": 3.354082, + "mask_area_ratio": 0.233056, + "elapsed_seconds": 7.1907 + }, + "reference_verify": "references/reference_verify_wall_shelf.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "striped_towel", + "ref_image": "references/ref_striped_towel.png", + "raw_ref_image": "references/raw_ref_striped_towel_attempt_01.png", + "diversify_input": "crops/diversify_input_striped_towel.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_striped_towel_attempt_01.png", + "output": "references/ref_striped_towel.png", + "mask": "references/sam_mask_striped_towel.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 125.0, + 53.0, + 897.0, + 971.0 + ], + "mask_score": 3.50075, + "mask_area_ratio": 0.517391, + "elapsed_seconds": 7.4653 + }, + "reference_verify": "references/reference_verify_striped_towel.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "glass_bottle", + "ref_image": "references/ref_glass_bottle.png", + "raw_ref_image": "references/raw_ref_glass_bottle_attempt_01.png", + "diversify_input": "crops/diversify_input_glass_bottle.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_glass_bottle_attempt_01.png", + "output": "references/ref_glass_bottle.png", + "mask": "references/sam_mask_glass_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 198.0, + 0.0, + 820.0, + 1008.0 + ], + "mask_score": 3.251198, + "mask_area_ratio": 0.458004, + "elapsed_seconds": 7.2584 + }, + "reference_verify": "references/reference_verify_glass_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "dirty_plate", + "ref_image": "references/ref_dirty_plate.png", + "raw_ref_image": "references/raw_ref_dirty_plate_attempt_01.png", + "diversify_input": "crops/diversify_input_dirty_plate.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_dirty_plate_attempt_01.png", + "output": "references/ref_dirty_plate.png", + "mask": "references/sam_mask_dirty_plate.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 33.0, + 334.0, + 963.0, + 685.0 + ], + "mask_score": 3.317592, + "mask_area_ratio": 0.170906, + "elapsed_seconds": 7.0879 + }, + "reference_verify": "references/reference_verify_dirty_plate.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "metal_tool", + "ref_image": "references/ref_metal_tool.png", + "raw_ref_image": "references/raw_ref_metal_tool_attempt_01.png", + "diversify_input": "crops/diversify_input_metal_tool.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_metal_tool_attempt_01.png", + "output": "references/ref_metal_tool.png", + "mask": "references/sam_mask_metal_tool.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 51.0, + 66.0, + 993.0, + 937.0 + ], + "mask_score": 0.918062, + "mask_area_ratio": 0.928193, + "elapsed_seconds": 7.1989 + }, + "reference_verify": "references/reference_verify_metal_tool.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "box_grater", + "ref_image": "references/ref_box_grater.png", + "raw_ref_image": "references/raw_ref_box_grater_attempt_01.png", + "diversify_input": "crops/diversify_input_box_grater.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_box_grater_attempt_01.png", + "output": "references/ref_box_grater.png", + "mask": "references/sam_mask_box_grater.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 236.0, + 16.0, + 787.0, + 1015.0 + ], + "mask_score": 3.453517, + "mask_area_ratio": 0.346949, + "elapsed_seconds": 7.1571 + }, + "reference_verify": "references/reference_verify_box_grater.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "wooden_cabinets", + "ref_image": "references/ref_wooden_cabinets.png", + "raw_ref_image": "references/raw_ref_wooden_cabinets_attempt_01.png", + "diversify_input": "crops/diversify_input_wooden_cabinets.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wooden_cabinets_attempt_01.png", + "output": "references/ref_wooden_cabinets.png", + "mask": "references/sam_mask_wooden_cabinets.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 36.0, + 253.0, + 986.0, + 809.0 + ], + "mask_score": 3.454364, + "mask_area_ratio": 0.365833, + "elapsed_seconds": 7.226 + }, + "reference_verify": "references/reference_verify_wooden_cabinets.json", + "reference_verify_passed": true, + "reference_attempts": 1 + }, + { + "name": "granite_counter", + "ref_image": "references/ref_granite_counter.png", + "raw_ref_image": "references/raw_ref_granite_counter_attempt_01.png", + "diversify_input": "crops/diversify_input_granite_counter.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_granite_counter_attempt_01.png", + "output": "references/ref_granite_counter.png", + "mask": "references/sam_mask_granite_counter.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 71.0, + 1023.0, + 940.0 + ], + "mask_score": 3.480751, + "mask_area_ratio": 0.58655, + "elapsed_seconds": 7.1983 + }, + "reference_verify": "references/reference_verify_granite_counter.json", + "reference_verify_passed": true, + "reference_attempts": 1 + } + ], + "reference_errors": {} +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_box_grater.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_box_grater.png new file mode 100644 index 0000000000000000000000000000000000000000..82fa5ad0a10c649ecbf74a2dd92f594eba0372c8 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_box_grater.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba6bb0ee2a473fda523b21e4aef4580e769eeff87f62a82574ae121de9cece8 +size 631695 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_dirty_plate.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_dirty_plate.png new file mode 100644 index 0000000000000000000000000000000000000000..95a353905e7ad94e7b011510d9165224a6dbbfdc --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_dirty_plate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:314749f9231c92ac253e5a123e7c2af0b4e4a41919bc563e550424fae91c3323 +size 345837 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_glass_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_glass_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..a94dfd5a7bfabc39b478107c26b45fb7bae916ec --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_glass_bottle.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d43eca9426345f368252b7e2532cf79109a4af41d5376c7e202f884ccf3ea07 +size 710537 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_granite_counter.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_granite_counter.png new file mode 100644 index 0000000000000000000000000000000000000000..5cf5c198b52b3281fab6560b6aef4426d7dca55c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_granite_counter.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29401dde7a0f8418942204db039727dc20f6407566c5be9f56b18b0a846c44b +size 1395387 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_metal_tool.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_metal_tool.png new file mode 100644 index 0000000000000000000000000000000000000000..631ac0e98e89e5d739bc1a3d53b11dee610f0c8a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_metal_tool.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4c8c960e88c3d681dc1c1935677db2391be99d9bbaba529031da3230bd945b1 +size 633767 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_person_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_person_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..0d9f4c15fcdc0e7b23ae19ac546c4525a5be05c6 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_person_hands.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e796f62e3f06a7124891813af6f12af4d1830b540cc34b17ddd2708271aa00f8 +size 315161 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_striped_towel.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_striped_towel.png new file mode 100644 index 0000000000000000000000000000000000000000..c9d662b29544457fbde2c813ffe70b4d7af597bf --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_striped_towel.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18687af7cd9cb06689ddf445b9648d174d19659e4a1cb06f2dd531c4f78d167f +size 1211645 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_textured_mat.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_textured_mat.png new file mode 100644 index 0000000000000000000000000000000000000000..3f37a3b12216baa76dfaf740072d6b28a2a9c042 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_textured_mat.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9ea37034be9d367d7c521e4a4e4e118c05d2d61e346755299db3bf87348a5de +size 540373 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_wall_shelf.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_wall_shelf.png new file mode 100644 index 0000000000000000000000000000000000000000..99436673aed4cc5922648635ba0551292c67fdef --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_wall_shelf.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7cbab8ff2bc3ec5cd65add5b697ec30934629837b3107f345c84765dcce541d +size 481859 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_wooden_cabinets.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_wooden_cabinets.png new file mode 100644 index 0000000000000000000000000000000000000000..1ec005e1034b89a6503dcce7c2fbe552ede77227 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/ref_wooden_cabinets.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a246145af1ad254d105ca6732963a82dc7479c3da1f06fd29a583485317e7cd1 +size 570086 diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_box_grater.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_box_grater.json new file mode 100644 index 0000000000000000000000000000000000000000..fd88e45bba3e781e2eec002129bf246d5bf5d461 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_box_grater.json @@ -0,0 +1,46 @@ +{ + "name": "box_grater", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_box_grater_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_box_grater_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_box_grater_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_box_grater_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_ref_box_grater_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_sam_mask_box_grater_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 236.0, + 16.0, + 787.0, + 1015.0 + ], + "mask_score": 3.453517, + "mask_area_ratio": 0.346949, + "elapsed_seconds": 7.1571 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Excellent reference image of a box grater, completely isolated on a white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_dirty_plate.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_dirty_plate.json new file mode 100644 index 0000000000000000000000000000000000000000..38597b0e616c8b6dfb93de23728a22353ec2cc52 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_dirty_plate.json @@ -0,0 +1,46 @@ +{ + "name": "dirty_plate", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_dirty_plate_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_dirty_plate_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_dirty_plate_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_dirty_plate_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_ref_dirty_plate_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_sam_mask_dirty_plate_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 33.0, + 334.0, + 963.0, + 685.0 + ], + "mask_score": 3.317592, + "mask_area_ratio": 0.170906, + "elapsed_seconds": 7.0879 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Dirty plate is fully visible. A fork on the plate has its handle abruptly cut off on the left edge." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_glass_bottle.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_glass_bottle.json new file mode 100644 index 0000000000000000000000000000000000000000..4476367c5087d3c3938c84d9759b38d859fb30c9 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_glass_bottle.json @@ -0,0 +1,46 @@ +{ + "name": "glass_bottle", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_glass_bottle_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_glass_bottle_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_glass_bottle_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_glass_bottle_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_ref_glass_bottle_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_sam_mask_glass_bottle_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 198.0, + 0.0, + 820.0, + 1008.0 + ], + "mask_score": 3.251198, + "mask_area_ratio": 0.458004, + "elapsed_seconds": 7.2584 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": false, + "cropped_or_truncated": true, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The top of the bottle neck is cropped out, but the main body is clearly visible and useful as a reference." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_granite_counter.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_granite_counter.json new file mode 100644 index 0000000000000000000000000000000000000000..543f34565e6a8eb8a7eac9b712df0063f8cb926d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_granite_counter.json @@ -0,0 +1,46 @@ +{ + "name": "granite_counter", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_granite_counter_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_granite_counter_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_granite_counter_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_granite_counter_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_ref_granite_counter_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_sam_mask_granite_counter_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 71.0, + 1023.0, + 940.0 + ], + "mask_score": 3.480751, + "mask_area_ratio": 0.58655, + "elapsed_seconds": 7.1983 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The reference image clearly shows an isolated piece of granite countertop on a white background, which perfectly represents the subject." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_metal_tool.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_metal_tool.json new file mode 100644 index 0000000000000000000000000000000000000000..6eb3891cb821260eec2862d68c623658aacd760c --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_metal_tool.json @@ -0,0 +1,46 @@ +{ + "name": "metal_tool", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_metal_tool_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_metal_tool_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_metal_tool_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_metal_tool_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_ref_metal_tool_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_sam_mask_metal_tool_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 51.0, + 66.0, + 993.0, + 937.0 + ], + "mask_score": 0.918062, + "mask_area_ratio": 0.928193, + "elapsed_seconds": 7.1989 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The metal tool (pincers/pliers) is fully visible, isolated, and centered on a white background with no cropping." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_person_hands.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_person_hands.json new file mode 100644 index 0000000000000000000000000000000000000000..3542de00097d6b4061081a4001ca29eef32d0844 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_person_hands.json @@ -0,0 +1,46 @@ +{ + "name": "person_hands", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_person_hands_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_person_hands_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_person_hands_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_person_hands_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_ref_person_hands_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_sam_mask_person_hands_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 338.0, + 36.0, + 704.0, + 1013.0 + ], + "mask_score": 3.415329, + "mask_area_ratio": 0.148867, + "elapsed_seconds": 7.2976 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Full body is clearly visible with adequate margins and no edge cropping. The red light effect on the hands matches the caption details." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_striped_towel.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_striped_towel.json new file mode 100644 index 0000000000000000000000000000000000000000..9e360dceb80693b617b521809f5042bc737f0e1e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_striped_towel.json @@ -0,0 +1,46 @@ +{ + "name": "striped_towel", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_striped_towel_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_striped_towel_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_striped_towel_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_striped_towel_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_ref_striped_towel_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_sam_mask_striped_towel_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 125.0, + 53.0, + 897.0, + 971.0 + ], + "mask_score": 3.50075, + "mask_area_ratio": 0.517391, + "elapsed_seconds": 7.4653 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "Image shows a black and pink striped towel on a white background. Identity difference in color is ignored per policy." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_textured_mat.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_textured_mat.json new file mode 100644 index 0000000000000000000000000000000000000000..bf9f83d6ff0781a2955593576c3ff3c99d7d54ff --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_textured_mat.json @@ -0,0 +1,46 @@ +{ + "name": "textured_mat", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_textured_mat_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_textured_mat_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_textured_mat_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_textured_mat_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_ref_textured_mat_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_sam_mask_textured_mat_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 29.0, + 292.0, + 994.0, + 732.0 + ], + "mask_score": 3.388931, + "mask_area_ratio": 0.280238, + "elapsed_seconds": 7.3108 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The subject is a black textured mat with repeating raised dots, visible in its entirety against a clean white background. It serves as an excellent reference image." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_wall_shelf.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_wall_shelf.json new file mode 100644 index 0000000000000000000000000000000000000000..f3288051b2a9b2e814596295417b0f245ef91e3e --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_wall_shelf.json @@ -0,0 +1,46 @@ +{ + "name": "wall_shelf", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_wall_shelf_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_wall_shelf_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_wall_shelf_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wall_shelf_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_ref_wall_shelf_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_sam_mask_wall_shelf_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 176.0, + 42.0, + 861.0, + 940.0 + ], + "mask_score": 3.354082, + "mask_area_ratio": 0.233056, + "elapsed_seconds": 7.1907 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The wall shelf is clearly visible, isolated on a white background, and fully within the frame. It holds various items as described." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_wooden_cabinets.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_wooden_cabinets.json new file mode 100644 index 0000000000000000000000000000000000000000..d41e3d7814c65a0cbb7a5b9663c1bbba652318c2 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/reference_verify_wooden_cabinets.json @@ -0,0 +1,46 @@ +{ + "name": "wooden_cabinets", + "passed": true, + "accepted_attempt": 1, + "attempts": [ + { + "attempt": 1, + "raw_ref_image": "references/raw_ref_wooden_cabinets_attempt_01.png", + "candidate_ref_image": "references/candidate_ref_wooden_cabinets_attempt_01.png", + "candidate_sam_mask": "references/candidate_sam_mask_wooden_cabinets_attempt_01.png", + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wooden_cabinets_attempt_01.png", + "output": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_ref_wooden_cabinets_attempt_01.png", + "mask": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/candidate_sam_mask_wooden_cabinets_attempt_01.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 36.0, + 253.0, + 986.0, + 809.0 + ], + "mask_score": 3.454364, + "mask_area_ratio": 0.365833, + "elapsed_seconds": 7.226 + }, + "verify": { + "passed": true, + "subject_visible": true, + "complete_subject": true, + "cropped_or_truncated": false, + "single_main_subject": true, + "white_background": true, + "failure_reasons": [], + "notes": "The image shows a complete set of light brown wooden cabinets with metal handles isolated on a white background." + } + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_box_grater.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_box_grater.png new file mode 100644 index 0000000000000000000000000000000000000000..6f7b43a486751af40cfbcc287fb9b8eea38d8c82 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_box_grater.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_dirty_plate.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_dirty_plate.png new file mode 100644 index 0000000000000000000000000000000000000000..b587db550c3d97a756f49c4dec130ac41683a98e Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_dirty_plate.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_glass_bottle.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_glass_bottle.png new file mode 100644 index 0000000000000000000000000000000000000000..fde3c1ae93938ef875ea0e1e61a6781b06505770 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_glass_bottle.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_granite_counter.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_granite_counter.png new file mode 100644 index 0000000000000000000000000000000000000000..438f71aba2a5e6d90f162dad9d1e6d7bd02e4674 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_granite_counter.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_metal_tool.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_metal_tool.png new file mode 100644 index 0000000000000000000000000000000000000000..a780d431ab9f0bc3d9784147856fa1cbe6d44ad6 Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_metal_tool.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_person_hands.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_person_hands.png new file mode 100644 index 0000000000000000000000000000000000000000..a261ff82012e4eb7d1f72e66b9a07fc295a6dede Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_person_hands.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_striped_towel.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_striped_towel.png new file mode 100644 index 0000000000000000000000000000000000000000..e8c707f79ecd10a3901007dfa00945046e50af7b Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_striped_towel.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_textured_mat.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_textured_mat.png new file mode 100644 index 0000000000000000000000000000000000000000..a4edbd6480d91501265136001d0e64fec1a4707d Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_textured_mat.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_wall_shelf.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_wall_shelf.png new file mode 100644 index 0000000000000000000000000000000000000000..0f7b188b0a770c43a37ec340726abda1f6b08dad Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_wall_shelf.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_wooden_cabinets.png b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_wooden_cabinets.png new file mode 100644 index 0000000000000000000000000000000000000000..9611d407479d4233a864b1ca75d08d9aa7b79edc Binary files /dev/null and b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/references/sam_mask_wooden_cabinets.png differ diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/row.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/row.json new file mode 100644 index 0000000000000000000000000000000000000000..80791d9550c21a5b75d9cff8607c4b17e8eccd78 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/row.json @@ -0,0 +1,486 @@ +{ + "sample_id": "sample_000011", + "target_total": 10, + "target_people": 1, + "target_objects": 9, + "canvas_size": [ + 1248, + 832 + ], + "canvas_aspect_ratio": "3:2", + "main_image": "main_image.png", + "bbox_overlay": "bbox_overlay.png", + "plan": "plan.json", + "detections": "detections.json", + "vocab_task": "vocab_task.json", + "n_planned": 10, + "n_detected": 10, + "n_subjects": 10, + "subjects": [ + { + "name": "person_hands", + "is_person": true, + "subject_type": "person", + "source_set": "people_set", + "source_image_id": "Ego4D:ego4d_video/EGO_76422.npy:person:0", + "source_name": "person", + "source_description": "The person's hands are visible, illuminated by a red light, and they appear to be holding a small object. Source dataset: Ego4D. Scene context: A dark scene with a person's hands holding what appears to be a small device or tool illuminated by red light.", + "sub_caption": "person: The person's hands are visible, illuminated by a strong red light, holding a small object or tool.. Scene role: Working intently, holding the metal tool over the granite countertop.", + "measured_bbox": [ + 0.1787, + 0.2332, + 0.4617, + 0.5792 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_full_body_front", + "ref_image": "references/ref_person_hands.png", + "raw_ref_image": "references/raw_ref_person_hands_attempt_01.png", + "reference_verify": "references/reference_verify_person_hands.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_person_hands_attempt_01.png", + "output": "references/ref_person_hands.png", + "mask": "references/sam_mask_person_hands.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 338.0, + 36.0, + 704.0, + 1013.0 + ], + "mask_score": 3.415329, + "mask_area_ratio": 0.148867, + "elapsed_seconds": 7.2976 + } + }, + { + "name": "textured_mat", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_39154.npy:object:0", + "source_name": "textured surface", + "source_description": "A dark surface covered with a repeating pattern of small, raised bumps or dots, illuminated by a blue light. Source dataset: Ego4D. Scene context: A close-up view of a textured surface illuminated with blue light in a dark environment.", + "sub_caption": "textured surface: A dark surface covered with a repeating pattern of small, raised bumps or dots, catching faint blue light reflections.. Scene role: Lying flat on the granite countertop under the person's hands.", + "measured_bbox": [ + 0.1138, + 0.5866, + 0.5947, + 0.7728 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_textured_mat.png", + "raw_ref_image": "references/raw_ref_textured_mat_attempt_01.png", + "reference_verify": "references/reference_verify_textured_mat.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_textured_mat_attempt_01.png", + "output": "references/ref_textured_mat.png", + "mask": "references/sam_mask_textured_mat.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 29.0, + 292.0, + 994.0, + 732.0 + ], + "mask_score": 3.388931, + "mask_area_ratio": 0.280238, + "elapsed_seconds": 7.3108 + } + }, + { + "name": "wall_shelf", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_309243.npy:object:1", + "source_name": "shelf", + "source_description": "A dark, multi-tiered shelf attached to the wall on the right side, holding various unidentifiable small items. Source dataset: Ego4D. Scene context: A dimly lit room illuminated entirely by strong red light, where a person is sitting and holding a child in their lap.", + "sub_caption": "shelf: A dark, multi-tiered shelf attached to the wall, holding various unidentifiable small items.. Scene role: Attached to the wall in the shadowy background above the counter.", + "measured_bbox": [ + 0.5206, + 0.0681, + 0.6781, + 0.3789 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wall_shelf.png", + "raw_ref_image": "references/raw_ref_wall_shelf_attempt_01.png", + "reference_verify": "references/reference_verify_wall_shelf.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wall_shelf_attempt_01.png", + "output": "references/ref_wall_shelf.png", + "mask": "references/sam_mask_wall_shelf.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 176.0, + 42.0, + 861.0, + 940.0 + ], + "mask_score": 3.354082, + "mask_area_ratio": 0.233056, + "elapsed_seconds": 7.1907 + } + }, + { + "name": "striped_towel", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P03_118:object:22", + "source_name": "towel", + "source_description": "A black and white striped towel hanging from the oven door handle. Source dataset: EPIC-Kitchens. Scene context: A narrow, slightly messy kitchen with dark grey countertops, white cabinets, and wooden flooring.", + "sub_caption": "towel: A black and white striped towel.. Scene role: Hanging down from a lower cabinet handle just below the edge of the countertop.", + "measured_bbox": [ + 0.6082, + 0.7266, + 0.7242, + 0.9973 + ], + "detection_confidence": 0.98, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_striped_towel.png", + "raw_ref_image": "references/raw_ref_striped_towel_attempt_01.png", + "reference_verify": "references/reference_verify_striped_towel.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_striped_towel_attempt_01.png", + "output": "references/ref_striped_towel.png", + "mask": "references/sam_mask_striped_towel.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 125.0, + 53.0, + 897.0, + 971.0 + ], + "mask_score": 3.50075, + "mask_area_ratio": 0.517391, + "elapsed_seconds": 7.4653 + } + }, + { + "name": "glass_bottle", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P30_113:object:5", + "source_name": "bottle", + "source_description": "A clear glass bottle with amber liquid standing on the counter near the cutting board. Source dataset: EPIC-Kitchens. Scene context: A person's hand is visible in the foreground of a kitchen with light wood cabinets, dark countertops, a white washing machine, and a white refrigerator.", + "sub_caption": "bottle: A clear glass bottle filled with amber liquid.. Scene role: Standing on the granite countertop to the side, catching dim reflections.", + "measured_bbox": [ + 0.2049, + 0.4778, + 0.2764, + 0.6209 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_glass_bottle.png", + "raw_ref_image": "references/raw_ref_glass_bottle_attempt_01.png", + "reference_verify": "references/reference_verify_glass_bottle.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_glass_bottle_attempt_01.png", + "output": "references/ref_glass_bottle.png", + "mask": "references/sam_mask_glass_bottle.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 198.0, + 0.0, + 820.0, + 1008.0 + ], + "mask_score": 3.251198, + "mask_area_ratio": 0.458004, + "elapsed_seconds": 7.2584 + } + }, + { + "name": "dirty_plate", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P26_102:object:5", + "source_name": "plate", + "source_description": "A partially visible plate or flat dish on the table, seemingly containing food or remnants. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a table covered in a blue and green plaid tablecloth, holding various items like bottles and cans, and a dark chair nearby.", + "sub_caption": "plate: A partially visible plate containing food remnants.. Scene role: Sitting on the counter near the bottle in the dim light.", + "measured_bbox": [ + 0.003, + 0.5981, + 0.2, + 0.6793 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_dirty_plate.png", + "raw_ref_image": "references/raw_ref_dirty_plate_attempt_01.png", + "reference_verify": "references/reference_verify_dirty_plate.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_dirty_plate_attempt_01.png", + "output": "references/ref_dirty_plate.png", + "mask": "references/sam_mask_dirty_plate.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 33.0, + 334.0, + 963.0, + 685.0 + ], + "mask_score": 3.317592, + "mask_area_ratio": 0.170906, + "elapsed_seconds": 7.0879 + } + }, + { + "name": "metal_tool", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "Ego4D:ego4d_video/EGO_15996.npy:object:0", + "source_name": "dark object", + "source_description": "A dark, silhouetted object with a thin, elongated structure, appearing to be a tool like pliers or scissors, visible against a dimly lit reddish background. Source dataset: Ego4D. Scene context: A close-up view of what appears to be a tool or mechanical part in shadows, possibly a pair of pliers.", + "sub_caption": "dark object: A dark, silhouetted tool with a thin, elongated structure resembling pliers.. Scene role: Held firmly by the person's hands under the red light.", + "measured_bbox": [ + 0.203, + 0.348, + 0.476, + 0.468 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_metal_tool.png", + "raw_ref_image": "references/raw_ref_metal_tool_attempt_01.png", + "reference_verify": "references/reference_verify_metal_tool.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_metal_tool_attempt_01.png", + "output": "references/ref_metal_tool.png", + "mask": "references/sam_mask_metal_tool.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 51.0, + 66.0, + 993.0, + 937.0 + ], + "mask_score": 0.918062, + "mask_area_ratio": 0.928193, + "elapsed_seconds": 7.1989 + } + }, + { + "name": "box_grater", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P34_111:object:34", + "source_name": "grater", + "source_description": "A metal box grater partially visible on the far left counter. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen sink area with a window overlooking a patio, featuring various plants, cleaning supplies, and kitchen items.", + "sub_caption": "grater: A metal box grater.. Scene role: Resting on the counter towards the background left.", + "measured_bbox": [ + 0.4842, + 0.4749, + 0.5853, + 0.5496 + ], + "detection_confidence": 0.99, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_box_grater.png", + "raw_ref_image": "references/raw_ref_box_grater_attempt_01.png", + "reference_verify": "references/reference_verify_box_grater.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_box_grater_attempt_01.png", + "output": "references/ref_box_grater.png", + "mask": "references/sam_mask_box_grater.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 236.0, + 16.0, + 787.0, + 1015.0 + ], + "mask_score": 3.453517, + "mask_area_ratio": 0.346949, + "elapsed_seconds": 7.1571 + } + }, + { + "name": "wooden_cabinets", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P28_106:object:8", + "source_name": "kitchen cabinets (right)", + "source_description": "A row of light brown wooden cabinets with metal handles along the right side. Source dataset: EPIC-Kitchens. Scene context: An overhead view of a kitchen floor with cabinets, a sink, and a dishwasher on the sides.", + "sub_caption": "kitchen cabinets (right): A row of light brown wooden cabinets with metal handles.. Scene role: Lining the right side of the kitchen in the background shadows.", + "measured_bbox": [ + 0.7319, + 0.5034, + 0.9945, + 0.9951 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_wooden_cabinets.png", + "raw_ref_image": "references/raw_ref_wooden_cabinets_attempt_01.png", + "reference_verify": "references/reference_verify_wooden_cabinets.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_wooden_cabinets_attempt_01.png", + "output": "references/ref_wooden_cabinets.png", + "mask": "references/sam_mask_wooden_cabinets.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 36.0, + 253.0, + 986.0, + 809.0 + ], + "mask_score": 3.454364, + "mask_area_ratio": 0.365833, + "elapsed_seconds": 7.226 + } + }, + { + "name": "granite_counter", + "is_person": false, + "subject_type": "object", + "source_set": "obj_set", + "source_image_id": "EPIC-Kitchens:P04_108:object:19", + "source_name": "granite countertop", + "source_description": "A grey, white, and black speckled stone countertop surface visible in the foreground and near the sink/stove. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen scene with items scattered on the counter, floor, and a large blue delivery bag visible on the right.", + "sub_caption": "granite countertop: A grey, white, and black speckled stone countertop.. Scene role: The main horizontal surface spanning the lower half of the frame, holding the scattered items.", + "measured_bbox": [ + 0.003, + 0.5275, + 0.6738, + 0.883 + ], + "detection_confidence": 0.95, + "ref_style": "white_bg_encyclopedia_photo", + "ref_image": "references/ref_granite_counter.png", + "raw_ref_image": "references/raw_ref_granite_counter_attempt_01.png", + "reference_verify": "references/reference_verify_granite_counter.json", + "reference_verify_passed": true, + "reference_attempts": 1, + "sam_white_bg": { + "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/sample_000011/references/raw_ref_granite_counter_attempt_01.png", + "output": "references/ref_granite_counter.png", + "mask": "references/sam_mask_granite_counter.png", + "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth", + "sam_model_type": "vit_b", + "sam_device": "auto", + "sam_working_size": [ + 640, + 640 + ], + "sam_max_side": 640, + "sam_downscale": 0.625, + "prompt_box_xyxy": [ + 0.0, + 71.0, + 1023.0, + 940.0 + ], + "mask_score": 3.480751, + "mask_area_ratio": 0.58655, + "elapsed_seconds": 7.1983 + } + } + ], + "not_emitted": [], + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/vocab_task.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/vocab_task.json new file mode 100644 index 0000000000000000000000000000000000000000..fdd3d89254b580a0699a50f159641f11d2c69fd0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/sample_000011/vocab_task.json @@ -0,0 +1,154 @@ +{ + "task_id": "sample_000011", + "sample_id": "sample_000011", + "sample_index": 11, + "target_total": 10, + "target_people": 1, + "target_objects": 9, + "people_candidates": [ + { + "candidate_index": 0, + "source_offset": 1120, + "image_id": "Ego4D:ego4d_video/EGO_76422.npy:person:0", + "name": "person", + "description": "The person's hands are visible, illuminated by a red light, and they appear to be holding a small object. Source dataset: Ego4D. Scene context: A dark scene with a person's hands holding what appears to be a small device or tool illuminated by red light." + }, + { + "candidate_index": 1, + "source_offset": 704, + "image_id": "Ego4D:ego4d_video/EGO_282532.npy:person:0", + "name": "person 1", + "description": "Very faintly visible figure near the center top, mostly obscured by darkness. Source dataset: Ego4D. Scene context: A very dark scene, possibly outdoors at night, with barely visible people." + } + ], + "object_candidates": [ + { + "candidate_index": 0, + "source_offset": 8162, + "image_id": "Ego4D:ego4d_video/EGO_39154.npy:object:0", + "name": "textured surface", + "description": "A dark surface covered with a repeating pattern of small, raised bumps or dots, illuminated by a blue light. Source dataset: Ego4D. Scene context: A close-up view of a textured surface illuminated with blue light in a dark environment." + }, + { + "candidate_index": 1, + "source_offset": 7758, + "image_id": "Ego4D:ego4d_video/EGO_294065.npy:object:18", + "name": "unidentifiable object", + "description": "An object that lacks clear distinguishing features. Source dataset: Ego4D. Scene context: A very dark image showing some indistinguishable objects and shapes." + }, + { + "candidate_index": 2, + "source_offset": 6774, + "image_id": "Ego4D:ego4d_video/EGO_24997.npy:object:0", + "name": "blue light", + "description": "A small, faint, rectangular blue glow visible in the bottom right corner of the otherwise pitch-black image. Source dataset: Ego4D. Scene context: The image is almost completely dark, showing only a small, dim blue light in the lower right corner." + }, + { + "candidate_index": 3, + "source_offset": 7935, + "image_id": "Ego4D:ego4d_video/EGO_309243.npy:object:1", + "name": "shelf", + "description": "A dark, multi-tiered shelf attached to the wall on the right side, holding various unidentifiable small items. Source dataset: Ego4D. Scene context: A dimly lit room illuminated entirely by strong red light, where a person is sitting and holding a child in their lap." + }, + { + "candidate_index": 4, + "source_offset": 7368, + "image_id": "Ego4D:ego4d_video/EGO_274150.npy:object:1", + "name": "dashboard area", + "description": "The dark, mostly obscured area surrounding the illuminated screen, with some subtle highlights and reflections suggesting the interior structure, possibly a dashboard or console. Source dataset: Ego4D. Scene context: A dark, low-light interior view, likely inside a vehicle, showing a person's arms and a bright screen." + }, + { + "candidate_index": 5, + "source_offset": 807, + "image_id": "EPIC-Kitchens:P03_118:object:22", + "name": "towel", + "description": "A black and white striped towel hanging from the oven door handle. Source dataset: EPIC-Kitchens. Scene context: A narrow, slightly messy kitchen with dark grey countertops, white cabinets, and wooden flooring." + }, + { + "candidate_index": 6, + "source_offset": 3353, + "image_id": "EPIC-Kitchens:P30_113:object:5", + "name": "bottle", + "description": "A clear glass bottle with amber liquid standing on the counter near the cutting board. Source dataset: EPIC-Kitchens. Scene context: A person's hand is visible in the foreground of a kitchen with light wood cabinets, dark countertops, a white washing machine, and a white refrigerator." + }, + { + "candidate_index": 7, + "source_offset": 7729, + "image_id": "Ego4D:ego4d_video/EGO_294094.npy:object:1", + "name": "faint light spot", + "description": "A small, faint light spot located slightly above the center. Source dataset: Ego4D. Scene context: The scene is very dark, with a blurry green circle and a few small, faint light spots visible." + }, + { + "candidate_index": 8, + "source_offset": 2429, + "image_id": "EPIC-Kitchens:P26_102:object:5", + "name": "plate", + "description": "A partially visible plate or flat dish on the table, seemingly containing food or remnants. Source dataset: EPIC-Kitchens. Scene context: A dimly lit room with a table covered in a blue and green plaid tablecloth, holding various items like bottles and cans, and a dark chair nearby." + }, + { + "candidate_index": 9, + "source_offset": 4377, + "image_id": "Ego4D:ego4d_video/EGO_15996.npy:object:0", + "name": "dark object", + "description": "A dark, silhouetted object with a thin, elongated structure, appearing to be a tool like pliers or scissors, visible against a dimly lit reddish background. Source dataset: Ego4D. Scene context: A close-up view of what appears to be a tool or mechanical part in shadows, possibly a pair of pliers." + }, + { + "candidate_index": 10, + "source_offset": 3714, + "image_id": "EPIC-Kitchens:P34_111:object:34", + "name": "grater", + "description": "A metal box grater partially visible on the far left counter. Source dataset: EPIC-Kitchens. Scene context: A cluttered kitchen sink area with a window overlooking a patio, featuring various plants, cleaning supplies, and kitchen items." + }, + { + "candidate_index": 11, + "source_offset": 3021, + "image_id": "EPIC-Kitchens:P28_106:object:8", + "name": "kitchen cabinets (right)", + "description": "A row of light brown wooden cabinets with metal handles along the right side. Source dataset: EPIC-Kitchens. Scene context: An overhead view of a kitchen floor with cabinets, a sink, and a dishwasher on the sides." + }, + { + "candidate_index": 12, + "source_offset": 7212, + "image_id": "Ego4D:ego4d_video/EGO_260700.npy:object:0", + "name": "blue lights", + "description": "a cluster of small, circular blue lights visible in the darkness Source dataset: Ego4D. Scene context: The image is almost entirely black, with only a few faint, scattered points of light visible." + }, + { + "candidate_index": 13, + "source_offset": 4960, + "image_id": "Ego4D:ego4d_video/EGO_165528.npy:object:1", + "name": "blue object 2", + "description": "A small blue light or object near the center. Source dataset: Ego4D. Scene context: A very dark scene with some blue light illuminating a few objects." + }, + { + "candidate_index": 14, + "source_offset": 6174, + "image_id": "Ego4D:ego4d_video/EGO_225421.npy:object:1", + "name": "trees", + "description": "Lush green trees located on the right side of the path. Source dataset: Ego4D. Scene context: A person driving a tractor down a path with trees and a building in the background." + }, + { + "candidate_index": 15, + "source_offset": 8667, + "image_id": "Ego4D:ego4d_video/EGO_49990.npy:object:0", + "name": "red highlights", + "description": "A few scattered, faint red highlights visible against the dark background. Source dataset: Ego4D. Scene context: The image is almost completely dark, showing only a few faint red highlights and a hint of a textured surface, possibly fabric or leather, in the upper left corner." + }, + { + "candidate_index": 16, + "source_offset": 1052, + "image_id": "EPIC-Kitchens:P04_108:object:19", + "name": "granite countertop", + "description": "A grey, white, and black speckled stone countertop surface visible in the foreground and near the sink/stove. Source dataset: EPIC-Kitchens. Scene context: A messy kitchen scene with items scattered on the counter, floor, and a large blue delivery bag visible on the right." + }, + { + "candidate_index": 17, + "source_offset": 5724, + "image_id": "Ego4D:ego4d_video/EGO_201901.npy:object:0", + "name": "light reflection or streak", + "description": "A faint, blurry, blue-ish white streak or line of light against a black background. Source dataset: Ego4D. Scene context: A very dark image with a blurry, bright diagonal line, possibly a light source or reflection." + } + ], + "rng_seed": 1783080012, + "created_at": 1782259981.5724683 +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000001.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000001.json new file mode 100644 index 0000000000000000000000000000000000000000..f113c2b976000e4925eb7fd8b3222f3129f7ce02 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000001.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000001", + "plan_path": "sample_000001/plan.json", + "task_path": "sample_000001/vocab_task.json", + "main_image": "sample_000001/main_image.png", + "compose_prompt": "sample_000001/compose_prompt.txt", + "prompt_hash": "548d4b3954f834579e786c96b7c2b06634e1557b5c3696708844fcb349c0b112", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000001", + "pool": "scene_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000003.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000003.json new file mode 100644 index 0000000000000000000000000000000000000000..5c83a0e3b2f89366bfd99282f0f0d1d01793261d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000003.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000003", + "plan_path": "sample_000003/plan.json", + "task_path": "sample_000003/vocab_task.json", + "main_image": "sample_000003/main_image.png", + "compose_prompt": "sample_000003/compose_prompt.txt", + "prompt_hash": "3ef5a4074c1bbefb292e218bb404a1b2a2604238308ed7314fe5242b50456fec", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000003", + "pool": "scene_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000004.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000004.json new file mode 100644 index 0000000000000000000000000000000000000000..7f9fb6e6bcf7969d4240a8321a9211e38c011727 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000004.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000004", + "plan_path": "sample_000004/plan.json", + "task_path": "sample_000004/vocab_task.json", + "main_image": "sample_000004/main_image.png", + "compose_prompt": "sample_000004/compose_prompt.txt", + "prompt_hash": "eb898ac566c6b3f45e1fb840fe6f923919dd9e38fa153a52213e0845cc757b95", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000004", + "pool": "scene_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000005.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000005.json new file mode 100644 index 0000000000000000000000000000000000000000..c1cc690dcd913cc32b2da4561ad7de5b076ca655 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000005.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000005", + "plan_path": "sample_000005/plan.json", + "task_path": "sample_000005/vocab_task.json", + "main_image": "sample_000005/main_image.png", + "compose_prompt": "sample_000005/compose_prompt.txt", + "prompt_hash": "7d6d6dc813df0f07bfd03f7d4502745e1a752f79eae85bdb00c5e5eb069a2de0", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000005", + "pool": "scene_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000006.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000006.json new file mode 100644 index 0000000000000000000000000000000000000000..c1a7d806a232becf098c7296bf04784db92446a5 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000006.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000006", + "plan_path": "sample_000006/plan.json", + "task_path": "sample_000006/vocab_task.json", + "main_image": "sample_000006/main_image.png", + "compose_prompt": "sample_000006/compose_prompt.txt", + "prompt_hash": "3b9be9bca0f2446c09254d05d0da799c57e27a9a2002bdcbca0a51ea75899776", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000006", + "pool": "scene_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000007.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000007.json new file mode 100644 index 0000000000000000000000000000000000000000..74c598182bce896ef4693e8e9d9641feb38e39f9 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000007.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000007", + "plan_path": "sample_000007/plan.json", + "task_path": "sample_000007/vocab_task.json", + "main_image": "sample_000007/main_image.png", + "compose_prompt": "sample_000007/compose_prompt.txt", + "prompt_hash": "28d87a336042bfb0a7b972a34e568278d49088aa1f2374c7b99be55226928fc1", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000007", + "pool": "scene_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000008.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000008.json new file mode 100644 index 0000000000000000000000000000000000000000..2bd9845dd98be12ee2c0e214369e27631b64e7da --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000008.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000008", + "plan_path": "sample_000008/plan.json", + "task_path": "sample_000008/vocab_task.json", + "main_image": "sample_000008/main_image.png", + "compose_prompt": "sample_000008/compose_prompt.txt", + "prompt_hash": "51c22435f380392540743deab0a6fa8eb2290039e4fc68b0f607feb018361bf0", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000008", + "pool": "scene_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000009.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000009.json new file mode 100644 index 0000000000000000000000000000000000000000..f9067f44af8613543af93f652d65a577894dbafa --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000009.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000009", + "plan_path": "sample_000009/plan.json", + "task_path": "sample_000009/vocab_task.json", + "main_image": "sample_000009/main_image.png", + "compose_prompt": "sample_000009/compose_prompt.txt", + "prompt_hash": "7ffa42b4ea56bc7a96ee47ba8eebc3c5e3f2ea9d59db3102ad408c0456652b88", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000009", + "pool": "scene_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000010.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000010.json new file mode 100644 index 0000000000000000000000000000000000000000..750be8bd5638edcd72dc6ba9e3388db6b7f2af05 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000010.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000010", + "plan_path": "sample_000010/plan.json", + "task_path": "sample_000010/vocab_task.json", + "main_image": "sample_000010/main_image.png", + "compose_prompt": "sample_000010/compose_prompt.txt", + "prompt_hash": "82ee01e685cf26898f99a81471c9d8fd25b6d0044aabc83c491d98dbc9a97503", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000010", + "pool": "scene_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000011.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000011.json new file mode 100644 index 0000000000000000000000000000000000000000..6197635c0ed0340b52130f3c3b6f9586bdbe5833 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/done/sample_000011.json @@ -0,0 +1,16 @@ +{ + "sample_id": "sample_000011", + "plan_path": "sample_000011/plan.json", + "task_path": "sample_000011/vocab_task.json", + "main_image": "sample_000011/main_image.png", + "compose_prompt": "sample_000011/compose_prompt.txt", + "prompt_hash": "467a2f191a2cb0fd70a3c5d69b70a9e8be068305eba2f4af1e4c5467e640e383", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000011", + "pool": "scene_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/failed/sample_000002.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/failed/sample_000002.json new file mode 100644 index 0000000000000000000000000000000000000000..452ce2172a1d31752b19d7ddf451f4a6b118611d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/scene_pool/failed/sample_000002.json @@ -0,0 +1,37 @@ +{ + "sample_id": "sample_000002", + "plan_path": "sample_000002/plan.json", + "task_path": "sample_000002/vocab_task.json", + "main_image": "sample_000002/main_image.png", + "compose_prompt": "sample_000002/compose_prompt.txt", + "prompt_hash": "e3122c6ac3f1480e67eeb583008ada94abe0fcd9eb0082efc30b2e00dd60b0a7", + "model_ids": { + "chat_model": "gcp/google/gemini-3.1-pro-preview", + "image_model": "gcp/google/gemini-3-pro-image-preview" + }, + "item_id": "sample_000002", + "pool": "scene_pool", + "retry_count": 4, + "errors": [ + { + "time": 1782259802.4823346, + "error": "RuntimeError: detection incomplete after 3 VLM attempts: missing 1/15 planned subjects: green_led", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1020, in worker_loop\n handler(manifest)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1084, in handler\n detections = detect_all_subjects(sample_id, plan, main_image, subject_workers)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 839, in detect_all_subjects\n raise RuntimeError(\nRuntimeError: detection incomplete after 3 VLM attempts: missing 1/15 planned subjects: green_led\n" + }, + { + "time": 1782259848.400114, + "error": "RuntimeError: detection incomplete after 3 VLM attempts: missing 1/15 planned subjects: green_light", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1020, in worker_loop\n handler(manifest)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1084, in handler\n detections = detect_all_subjects(sample_id, plan, main_image, subject_workers)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 839, in detect_all_subjects\n raise RuntimeError(\nRuntimeError: detection incomplete after 3 VLM attempts: missing 1/15 planned subjects: green_light\n" + }, + { + "time": 1782259903.115867, + "error": "RuntimeError: detection incomplete after 3 VLM attempts: missing 1/15 planned subjects: green_light", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1020, in worker_loop\n handler(manifest)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1084, in handler\n detections = detect_all_subjects(sample_id, plan, main_image, subject_workers)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 839, in detect_all_subjects\n raise RuntimeError(\nRuntimeError: detection incomplete after 3 VLM attempts: missing 1/15 planned subjects: green_light\n" + }, + { + "time": 1782259973.5969427, + "error": "RuntimeError: detection incomplete after 3 VLM attempts: missing 2/15 planned subjects: small_crumpled_object, green_light", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1020, in worker_loop\n handler(manifest)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1084, in handler\n detections = detect_all_subjects(sample_id, plan, main_image, subject_workers)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 839, in detect_all_subjects\n raise RuntimeError(\nRuntimeError: detection incomplete after 3 VLM attempts: missing 2/15 planned subjects: small_crumpled_object, green_light\n" + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/stats.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/stats.json new file mode 100644 index 0000000000000000000000000000000000000000..536c4d491d453abfcb61d2a03a7892997e3dcf15 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/stats.json @@ -0,0 +1,212 @@ +{ + "started_at": 1782259667.338901, + "elapsed_seconds": 2407.0, + "stage": { + "plan": { + "attempts": 12, + "successes": 10, + "errors": 0, + "permanent_failures": 0, + "last_error": "", + "attempts_per_hour": 17.948, + "successes_per_hour": 14.956, + "stage_error_rate": 0.0 + }, + "compose": { + "attempts": 6, + "successes": 6, + "errors": 0, + "permanent_failures": 0, + "last_error": "", + "attempts_per_hour": 8.974, + "successes_per_hour": 8.974, + "stage_error_rate": 0.0 + }, + "vocab_task_pool": { + "attempts": 0, + "successes": 0, + "errors": 2, + "permanent_failures": 0, + "last_error": "FileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000002.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000002.json'", + "attempts_per_hour": 0.0, + "successes_per_hour": 0.0, + "stage_error_rate": 2.0 + }, + "detect": { + "attempts": 11, + "successes": 7, + "errors": 0, + "permanent_failures": 0, + "last_error": "", + "attempts_per_hour": 16.452, + "successes_per_hour": 10.47, + "stage_error_rate": 0.0 + }, + "reference": { + "attempts": 9, + "successes": 6, + "errors": 0, + "permanent_failures": 0, + "last_error": "", + "attempts_per_hour": 13.461, + "successes_per_hour": 8.974, + "stage_error_rate": 0.0 + }, + "scene_pool": { + "attempts": 0, + "successes": 0, + "errors": 4, + "permanent_failures": 1, + "last_error": "RuntimeError: detection incomplete after 3 VLM attempts: missing 2/15 planned subjects: small_crumpled_object, green_light", + "attempts_per_hour": 0.0, + "successes_per_hour": 0.0, + "stage_error_rate": 4.0 + }, + "emit": { + "attempts": 5, + "successes": 5, + "errors": 0, + "permanent_failures": 0, + "last_error": "", + "attempts_per_hour": 7.478, + "successes_per_hour": 7.478, + "stage_error_rate": 0.0 + }, + "reference_subject": { + "attempts": 0, + "successes": 0, + "errors": 3, + "permanent_failures": 0, + "last_error": "RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark_area' is absent.; Image shows a cardboard box instead of the requested subject.", + "attempts_per_hour": 0.0, + "successes_per_hour": 0.0, + "stage_error_rate": 3.0 + }, + "detection_pool": { + "attempts": 0, + "successes": 0, + "errors": 3, + "permanent_failures": 0, + "last_error": "RuntimeError: reference generation or verification failed for dark_area: RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark_area' is absent.; Image shows a cardboard box instead of the requested subject.", + "attempts_per_hour": 0.0, + "successes_per_hour": 0.0, + "stage_error_rate": 3.0 + } + }, + "total_attempts": 43, + "total_errors": 12, + "error_rate": 0.2791, + "recent_events": [ + { + "time": 1782259710.280691, + "stage": "vocab_task_pool", + "error": "FileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000005.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000005.json'", + "permanent": false + }, + { + "time": 1782259718.4838197, + "stage": "vocab_task_pool", + "error": "FileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000002.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000002.json'", + "permanent": false + }, + { + "time": 1782259802.4826574, + "stage": "scene_pool", + "error": "RuntimeError: detection incomplete after 3 VLM attempts: missing 1/15 planned subjects: green_led", + "permanent": false + }, + { + "time": 1782259848.4005313, + "stage": "scene_pool", + "error": "RuntimeError: detection incomplete after 3 VLM attempts: missing 1/15 planned subjects: green_light", + "permanent": false + }, + { + "time": 1782259903.1171935, + "stage": "scene_pool", + "error": "RuntimeError: detection incomplete after 3 VLM attempts: missing 1/15 planned subjects: green_light", + "permanent": false + }, + { + "time": 1782259973.5972824, + "stage": "scene_pool", + "error": "RuntimeError: detection incomplete after 3 VLM attempts: missing 2/15 planned subjects: small_crumpled_object, green_light", + "permanent": true + }, + { + "time": 1782260885.2187245, + "stage": "reference_subject", + "error": "RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark area' is entirely absent; Image shows a cardboard box instead of the requested subject", + "permanent": false + }, + { + "time": 1782260885.231833, + "stage": "detection_pool", + "error": "RuntimeError: reference generation or verification failed for dark_area: RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark area' is entirely absent; Image shows a cardboard box instead of the requested subject", + "permanent": false + }, + { + "time": 1782261302.74644, + "stage": "reference_subject", + "error": "RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject is absent; image shows a cardboard box instead of a dark shadowed area.", + "permanent": false + }, + { + "time": 1782261302.7614222, + "stage": "detection_pool", + "error": "RuntimeError: reference generation or verification failed for dark_area: RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject is absent; image shows a cardboard box instead of a dark shadowed area.", + "permanent": false + }, + { + "time": 1782261717.33018, + "stage": "reference_subject", + "error": "RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark_area' is absent.; Image shows a cardboard box instead of the requested subject.", + "permanent": false + }, + { + "time": 1782261717.3306336, + "stage": "detection_pool", + "error": "RuntimeError: reference generation or verification failed for dark_area: RuntimeError: reference verification failed for dark_area after 10 attempts: Intended subject 'dark_area' is absent.; Image shows a cardboard box instead of the requested subject.", + "permanent": false + } + ], + "row_count": 10, + "pool_counts": { + "vocab_task_pool": { + "pending": 0, + "in_progress": 0, + "done": 11, + "failed": 0 + }, + "plan_pool": { + "pending": 0, + "in_progress": 0, + "done": 11, + "failed": 0 + }, + "scene_pool": { + "pending": 0, + "in_progress": 0, + "done": 10, + "failed": 1 + }, + "detection_pool": { + "pending": 0, + "in_progress": 0, + "done": 10, + "failed": 0 + }, + "reference_pool": { + "pending": 0, + "in_progress": 0, + "done": 10, + "failed": 0 + }, + "emit_pool": { + "pending": 0, + "in_progress": 0, + "done": 10, + "failed": 0 + } + } +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000001.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000001.json new file mode 100644 index 0000000000000000000000000000000000000000..00d5c6d773706fefcfc0cf34ee37e223903297d4 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000001.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000001", + "task_path": "sample_000001/vocab_task.json", + "item_id": "sample_000001", + "pool": "vocab_task_pool", + "retry_count": 1, + "errors": [ + { + "time": 1782259691.9945705, + "error": "FileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000001.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000001.json'", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1021, in worker_loop\n p.complete(claimed)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 278, in complete\n os.replace(claimed, destination)\nFileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000001.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000001.json'\n" + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000002.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000002.json new file mode 100644 index 0000000000000000000000000000000000000000..a017e37d87a1f64b7583d9dd942d7d65e1e2bf79 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000002.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000002", + "task_path": "sample_000002/vocab_task.json", + "item_id": "sample_000002", + "pool": "vocab_task_pool", + "retry_count": 1, + "errors": [ + { + "time": 1782259718.483596, + "error": "FileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000002.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000002.json'", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1021, in worker_loop\n p.complete(claimed)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 278, in complete\n os.replace(claimed, destination)\nFileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000002.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000002.json'\n" + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000003.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000003.json new file mode 100644 index 0000000000000000000000000000000000000000..41bbd43ebf8e565e254a65e8aea167a6c75416c0 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000003.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000003", + "task_path": "sample_000003/vocab_task.json", + "item_id": "sample_000003", + "pool": "vocab_task_pool", + "retry_count": 1, + "errors": [ + { + "time": 1782259694.1740756, + "error": "FileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000003.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000003.json'", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1021, in worker_loop\n p.complete(claimed)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 278, in complete\n os.replace(claimed, destination)\nFileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000003.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000003.json'\n" + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000004.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000004.json new file mode 100644 index 0000000000000000000000000000000000000000..0eacfce06683908af97b680b3f082d0c719db0c3 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000004.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000004", + "task_path": "sample_000004/vocab_task.json", + "item_id": "sample_000004", + "pool": "vocab_task_pool", + "retry_count": 1, + "errors": [ + { + "time": 1782259709.730379, + "error": "FileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000004.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000004.json'", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1021, in worker_loop\n p.complete(claimed)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 278, in complete\n os.replace(claimed, destination)\nFileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000004.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000004.json'\n" + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000005.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000005.json new file mode 100644 index 0000000000000000000000000000000000000000..581ada9d5964b7654f0a210d2d0e16465f1826f9 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000005.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000005", + "task_path": "sample_000005/vocab_task.json", + "item_id": "sample_000005", + "pool": "vocab_task_pool", + "retry_count": 1, + "errors": [ + { + "time": 1782259710.279673, + "error": "FileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000005.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000005.json'", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1021, in worker_loop\n p.complete(claimed)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 278, in complete\n os.replace(claimed, destination)\nFileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000005.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000005.json'\n" + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000006.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000006.json new file mode 100644 index 0000000000000000000000000000000000000000..f18f803c0b9a814bcbd621713a279c1af830b2d7 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000006.json @@ -0,0 +1,14 @@ +{ + "sample_id": "sample_000006", + "task_path": "sample_000006/vocab_task.json", + "item_id": "sample_000006", + "pool": "vocab_task_pool", + "retry_count": 1, + "errors": [ + { + "time": 1782259727.4824324, + "error": "FileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000006.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000006.json'", + "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1021, in worker_loop\n p.complete(claimed)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 278, in complete\n os.replace(claimed, destination)\nFileNotFoundError: [Errno 2] No such file or directory: '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/in_progress/sample_000006.json' -> '/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/chaos/samples/vocab_task_pool/done/sample_000006.json'\n" + } + ] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000007.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000007.json new file mode 100644 index 0000000000000000000000000000000000000000..bfcbb8a053b1c5624cea0ab51d2a6c7e6410e47d --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000007.json @@ -0,0 +1,8 @@ +{ + "sample_id": "sample_000007", + "task_path": "sample_000007/vocab_task.json", + "item_id": "sample_000007", + "pool": "vocab_task_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000008.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000008.json new file mode 100644 index 0000000000000000000000000000000000000000..9bfd3d2a2e9f585229217f20ef5a1cbf32cf771a --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000008.json @@ -0,0 +1,8 @@ +{ + "sample_id": "sample_000008", + "task_path": "sample_000008/vocab_task.json", + "item_id": "sample_000008", + "pool": "vocab_task_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000009.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000009.json new file mode 100644 index 0000000000000000000000000000000000000000..961f18edd93a499789dfc998e18e95ff951d47b3 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000009.json @@ -0,0 +1,8 @@ +{ + "sample_id": "sample_000009", + "task_path": "sample_000009/vocab_task.json", + "item_id": "sample_000009", + "pool": "vocab_task_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000010.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000010.json new file mode 100644 index 0000000000000000000000000000000000000000..0059f5f064cfc8ca21f7de4b2d22205efe9ca6cf --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000010.json @@ -0,0 +1,8 @@ +{ + "sample_id": "sample_000010", + "task_path": "sample_000010/vocab_task.json", + "item_id": "sample_000010", + "pool": "vocab_task_pool", + "retry_count": 0, + "errors": [] +} diff --git a/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000011.json b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000011.json new file mode 100644 index 0000000000000000000000000000000000000000..9d100735805b3b24b3183d7a949ff455b69916b9 --- /dev/null +++ b/samples_v8/chaos/EPIC-Kitchens_Ego4D_samples/vocab_task_pool/done/sample_000011.json @@ -0,0 +1,8 @@ +{ + "sample_id": "sample_000011", + "task_path": "sample_000011/vocab_task.json", + "item_id": "sample_000011", + "pool": "vocab_task_pool", + "retry_count": 0, + "errors": [] +}