diff --git a/.gitattributes b/.gitattributes index c4d9ebe8054a75d16bb6f43f9dc08381844d0b28..c882f8e375191bfe491e735687ac7a44cd0d0edd 100644 --- a/.gitattributes +++ b/.gitattributes @@ -70,3 +70,33 @@ examples/inference_sample/processed/kid_coffee/trajectory_templates/visualizatio examples/inference_sample/processed/kid_coffee/trajectory_templates/visualizations/kid_coffee/trajectories_global_frustum.mp4 filter=lfs diff=lfs merge=lfs -text examples/inference_sample/processed/kid_coffee/trajectory_templates/visualizations/kid_coffee/trajectories_global_overlay.png filter=lfs diff=lfs merge=lfs -text examples/inference_sample/raw/kid_coffee.png filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000000/clip.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000000/dynamic_masks.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000000/mask_sky.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000000/train_preceding_rgb_9.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000000/train_reference_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000000/train_reference_scene_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000000/train_reference_scene_rgb_orig.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000000/train_target_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000000/train_target_scene_proj_fg_overlay_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000000/train_target_scene_proj_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000000/train_target_scene_proj_rgb_orig.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000001/clip.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000001/train_target_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000001/train_target_scene_proj_fg_overlay_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000001/train_target_scene_proj_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000001/train_target_scene_proj_rgb_orig.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000002/clip.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000002/dynamic_masks.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000002/mask_person.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000002/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000002/train_preceding_scene_proj_rgb_9.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000002/train_target_proj_fg_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000002/train_target_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000002/train_target_scene_proj_fg_overlay_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000002/train_target_scene_proj_rgb.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed/00000002/train_target_scene_proj_rgb_orig.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/processed_lmdb/shard_000.lmdb/data.mdb filter=lfs diff=lfs merge=lfs -text +examples/training_sample/raw/MIRA/4230740.0.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/raw/RealEstate10K/1259726fc1f8e966.mp4 filter=lfs diff=lfs merge=lfs -text +examples/training_sample/raw/SpatiaVID_HQ/0a2d11ab-57d7-516d-b873-c29555a41796.mp4 filter=lfs diff=lfs merge=lfs -text diff --git a/examples/inference_sample/processed/kid_coffee/infer_scripts/case1_left.yaml b/examples/inference_sample/processed/kid_coffee/infer_scripts/case1_left.yaml index f672d0d971bca3655f1d2c4ee03dc837b1974cf1..b9621ff626ad55a88792045322ffe44398aa2517 100644 --- a/examples/inference_sample/processed/kid_coffee/infer_scripts/case1_left.yaml +++ b/examples/inference_sample/processed/kid_coffee/infer_scripts/case1_left.yaml @@ -9,7 +9,7 @@ iter_input: from a perpendicular angle. The red-and-white striped canopy and colorful pennant banner are seen edge-on, with the coffee and books signage now aligned vertically in the frame. - fg_text: '' + fg_text: 'On the right, a woden bench at under the wall sits a lovely corgi dog, staying steadily on the bench and rest.' '1': scene_text: The scene now presents a side profile of the stall, with the red-and-white striped canopy and colorful pennant banner visible overhead. The white tablecloth diff --git a/examples/inference_sample/processed/kid_coffee/infer_scripts/case1_right.yaml b/examples/inference_sample/processed/kid_coffee/infer_scripts/case1_right.yaml index 12faad11651d35c8485d22338899756a5ea90d1b..0484ab20ae326c37b663618ff24463802b88fd22 100644 --- a/examples/inference_sample/processed/kid_coffee/infer_scripts/case1_right.yaml +++ b/examples/inference_sample/processed/kid_coffee/infer_scripts/case1_right.yaml @@ -10,7 +10,7 @@ iter_input: The white tablecloth covers the counter, with chalkboard signs for "COFFEE" and "BOOKS" now oriented sideways, and stacks of books, a jar of pencils, and potted flowers still arranged on the right side. - fg_text: '' + fg_text: 'On the right, a woden bench at under the wall sits a lovely corgi dog, staying steadily on the bench and rest.' '1': scene_text: The scene is viewed from a side angle, showing the full length of the stall’s white-clothed counter with chalkboard signs for “COFFEE” and “BOOKS,” diff --git a/examples/training_sample/.DS_Store b/examples/training_sample/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3190f5452eefc6208e333580388bfa1cb4687474 Binary files /dev/null and b/examples/training_sample/.DS_Store differ diff --git a/examples/training_sample/processed/00000000/clip.mp4 b/examples/training_sample/processed/00000000/clip.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..3b8f9d004bc006f16569138bfc93660fc1153d8c --- /dev/null +++ b/examples/training_sample/processed/00000000/clip.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7933df8527f851b0dd18600242720f9edf8f0f54114f40317860d7e163c6e943 +size 3029650 diff --git a/examples/training_sample/processed/00000000/dynamic_masks.mp4 b/examples/training_sample/processed/00000000/dynamic_masks.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..cf5b0f56821403666fdf1025e14220644f38d547 --- /dev/null +++ b/examples/training_sample/processed/00000000/dynamic_masks.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135723756707066e4b1b7edecf0f55f3474496ffb120d52dc71871dbb8a389ea +size 339662 diff --git a/examples/training_sample/processed/00000000/dynamic_prompts.json b/examples/training_sample/processed/00000000/dynamic_prompts.json new file mode 100644 index 0000000000000000000000000000000000000000..87e4efa0319bd856285af37a61d1cc1a20f13c40 --- /dev/null +++ b/examples/training_sample/processed/00000000/dynamic_prompts.json @@ -0,0 +1,14 @@ +{ + "raw": "1) person\n2) car\n3) sky", + "entities": [ + "person", + "car", + "sky" + ], + "prompts": [ + "person", + "car", + "sky" + ], + "status": "ok" +} \ No newline at end of file diff --git a/examples/training_sample/processed/00000000/geometry.npz b/examples/training_sample/processed/00000000/geometry.npz new file mode 100644 index 0000000000000000000000000000000000000000..f75687b679de2e49997bd1cc5636dca148a2e21c --- /dev/null +++ b/examples/training_sample/processed/00000000/geometry.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff89760739ce6a0ffbcdfbdb1f1883ed5971e10c9dd82973fcf6c8c8206e657 +size 165477281 diff --git a/examples/training_sample/processed/00000000/latents.pt b/examples/training_sample/processed/00000000/latents.pt new file mode 100644 index 0000000000000000000000000000000000000000..2cdd116d681d01036f53ce5cded1e8ca6f661692 --- /dev/null +++ b/examples/training_sample/processed/00000000/latents.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30c6d9d6301d71c9c4717ef468d447a2e2ba18f5510af051458f382f93f40e6a +size 27363108 diff --git a/examples/training_sample/processed/00000000/mask_car.mp4 b/examples/training_sample/processed/00000000/mask_car.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e64584bcfdc2da1247fc5ac13fbe64d539dd4f8c Binary files /dev/null and b/examples/training_sample/processed/00000000/mask_car.mp4 differ diff --git a/examples/training_sample/processed/00000000/mask_person.mp4 b/examples/training_sample/processed/00000000/mask_person.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..aec82af7f811748132b07a384c82a48291748879 Binary files /dev/null and b/examples/training_sample/processed/00000000/mask_person.mp4 differ diff --git a/examples/training_sample/processed/00000000/mask_sky.mp4 b/examples/training_sample/processed/00000000/mask_sky.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d6b723dee14f76dd5fe8ec81c7ebc2a5aaa0f4b1 --- /dev/null +++ b/examples/training_sample/processed/00000000/mask_sky.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:600e63d9faf910365cf5f71290869bfa05f646f644415696da0a1a476f4674ed +size 283869 diff --git a/examples/training_sample/processed/00000000/train_preceding_proj_fg_rgb_1.mp4 b/examples/training_sample/processed/00000000/train_preceding_proj_fg_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5921bfd7fc8d7125fc1529aacd1a777fcde44268 Binary files /dev/null and b/examples/training_sample/processed/00000000/train_preceding_proj_fg_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_preceding_proj_fg_rgb_9.mp4 b/examples/training_sample/processed/00000000/train_preceding_proj_fg_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..9de748b61f114654ddb9741c4d30d9356777cbb4 Binary files /dev/null and b/examples/training_sample/processed/00000000/train_preceding_proj_fg_rgb_9.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_preceding_rgb_1.mp4 b/examples/training_sample/processed/00000000/train_preceding_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d3e8865cccfdeba224277cf35f51e0fb3cae9727 Binary files /dev/null and b/examples/training_sample/processed/00000000/train_preceding_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_preceding_rgb_9.mp4 b/examples/training_sample/processed/00000000/train_preceding_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c42cff157990e184ea37f59082704ee83d0763b8 --- /dev/null +++ b/examples/training_sample/processed/00000000/train_preceding_rgb_9.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a806dc60b82aa5f24a74cf7584fa33a4febfd733c39ca623e29a56a5a78716bd +size 121834 diff --git a/examples/training_sample/processed/00000000/train_preceding_scene_proj_fg_overlay_rgb_1.mp4 b/examples/training_sample/processed/00000000/train_preceding_scene_proj_fg_overlay_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..03cfdf6923b10a61363cc4f64cf270a29a33faf3 Binary files /dev/null and b/examples/training_sample/processed/00000000/train_preceding_scene_proj_fg_overlay_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 b/examples/training_sample/processed/00000000/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c439afb12adac005fc3897a144555e234482526a Binary files /dev/null and b/examples/training_sample/processed/00000000/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_preceding_scene_proj_rgb_1.mp4 b/examples/training_sample/processed/00000000/train_preceding_scene_proj_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..bbd74dbd082bf1c6f1b9d19bea573572c6e0b2a7 Binary files /dev/null and b/examples/training_sample/processed/00000000/train_preceding_scene_proj_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_preceding_scene_proj_rgb_9.mp4 b/examples/training_sample/processed/00000000/train_preceding_scene_proj_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e528b829d12dc878b120a6d97276af7dc9a36a5c Binary files /dev/null and b/examples/training_sample/processed/00000000/train_preceding_scene_proj_rgb_9.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_reference_instance_00.mp4 b/examples/training_sample/processed/00000000/train_reference_instance_00.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..47d360b82f9b13e1db8bc0f604e63d30eea2ecd5 Binary files /dev/null and b/examples/training_sample/processed/00000000/train_reference_instance_00.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_reference_instance_01.mp4 b/examples/training_sample/processed/00000000/train_reference_instance_01.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ac26d4e54154b2e87d45ab5fd773a026cacf53bc Binary files /dev/null and b/examples/training_sample/processed/00000000/train_reference_instance_01.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_reference_instance_02.mp4 b/examples/training_sample/processed/00000000/train_reference_instance_02.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d95ca00ad3a95a7888643b35c6f6f1cd266e004a Binary files /dev/null and b/examples/training_sample/processed/00000000/train_reference_instance_02.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_reference_instance_03.mp4 b/examples/training_sample/processed/00000000/train_reference_instance_03.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..dd59560a0ef0c551935d4af700c30b5cf45efedf Binary files /dev/null and b/examples/training_sample/processed/00000000/train_reference_instance_03.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_reference_instance_04.mp4 b/examples/training_sample/processed/00000000/train_reference_instance_04.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..77868c74ca0456ea88433c47d9974a9f27a3b7c8 Binary files /dev/null and b/examples/training_sample/processed/00000000/train_reference_instance_04.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_reference_instances.json b/examples/training_sample/processed/00000000/train_reference_instances.json new file mode 100644 index 0000000000000000000000000000000000000000..f8d53cd182e1c737382c4205459fa4dd1b17f079 --- /dev/null +++ b/examples/training_sample/processed/00000000/train_reference_instances.json @@ -0,0 +1,59 @@ +{ + "target_video": "train_target_rgb.mp4", + "prompts": [ + "person", + "car" + ], + "instances": [ + { + "rank": 0, + "obj_id": 9, + "max_area_ratio": 0.030376101762820514, + "frame_indices": [ + 42, + 53, + 60 + ] + }, + { + "rank": 1, + "obj_id": 8, + "max_area_ratio": 0.005681590544871795, + "frame_indices": [ + 1, + 42, + 62 + ] + }, + { + "rank": 2, + "obj_id": 7, + "max_area_ratio": 0.002423878205128205, + "frame_indices": [ + 11, + 29, + 54 + ] + }, + { + "rank": 3, + "obj_id": 6, + "max_area_ratio": 0.0023337339743589743, + "frame_indices": [ + 60, + 62, + 63 + ] + }, + { + "rank": 4, + "obj_id": 5, + "max_area_ratio": 0.001970653044871795, + "frame_indices": [ + 55, + 59, + 60 + ] + } + ] +} \ No newline at end of file diff --git a/examples/training_sample/processed/00000000/train_reference_rgb.mp4 b/examples/training_sample/processed/00000000/train_reference_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d69f8802c6e62dfa1fdd48a9049480d382260be6 --- /dev/null +++ b/examples/training_sample/processed/00000000/train_reference_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9198ec28b716cdd629680b654c2db416f2056136a6500df4f5db614381825cde +size 144828 diff --git a/examples/training_sample/processed/00000000/train_reference_scene_rgb.mp4 b/examples/training_sample/processed/00000000/train_reference_scene_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b34d6a8274f0e537acc06d30bc65e25fea35649f --- /dev/null +++ b/examples/training_sample/processed/00000000/train_reference_scene_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43933ec73381a1adee7be5ce33194b3568922f0c496ae3657bc155d0b14cbb41 +size 138324 diff --git a/examples/training_sample/processed/00000000/train_reference_scene_rgb_orig.mp4 b/examples/training_sample/processed/00000000/train_reference_scene_rgb_orig.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..909d77c1c695e455dd0af1592abc69ff963132e2 --- /dev/null +++ b/examples/training_sample/processed/00000000/train_reference_scene_rgb_orig.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5228a8f111598e17ac0fc46376070444536b3ea7541aac5051ff919c40470d5 +size 143767 diff --git a/examples/training_sample/processed/00000000/train_sample.json b/examples/training_sample/processed/00000000/train_sample.json new file mode 100644 index 0000000000000000000000000000000000000000..3c8f8f0acfe282d2c7c5f66bc0501621cf824592 --- /dev/null +++ b/examples/training_sample/processed/00000000/train_sample.json @@ -0,0 +1,171 @@ +{ + "t0": 26, + "P9_idx": [ + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25 + ], + "P1_idx": [ + 25 + ], + "T_idx": [ + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90 + ], + "C_idx": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120 + ], + "scene_idx": 115, + "R_idx": [ + 92, + 14, + 12, + 94, + 10, + 96, + 98 + ], + "R_iou": [ + 0.07454549403766372, + 0.0694113967938869, + 0.06155005990764799, + 0.06073820098027936, + 0.06025912646675358, + 0.05896005194930133, + 0.05594802497313094 + ], + "R_stats": { + "threshold": 0.04, + "max_refs": 7, + "stride": 2, + "voxel_size": 0.01, + "num_targets": 65, + "num_candidates": 23, + "best_iou": 0.07454549403766372, + "avg_iou": 0.040257401322028034 + }, + "projection_channels": [ + "rgb" + ], + "output_size": [ + 480, + 832 + ], + "add_fg_to_projection": true, + "has_fg": true, + "src_aug_applied": false, + "naming": "figure" +} \ No newline at end of file diff --git a/examples/training_sample/processed/00000000/train_target_fg_rgb.txt b/examples/training_sample/processed/00000000/train_target_fg_rgb.txt new file mode 100644 index 0000000000000000000000000000000000000000..83e6fa5105113a759fbf025b21192f2c3bed94de --- /dev/null +++ b/examples/training_sample/processed/00000000/train_target_fg_rgb.txt @@ -0,0 +1 @@ +A white van drives forward, followed by an orange and white bus. A large truck passes on the right. A yellow taxi speeds past the bus. diff --git a/examples/training_sample/processed/00000000/train_target_proj_fg_rgb.mp4 b/examples/training_sample/processed/00000000/train_target_proj_fg_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2b56bcf478c478698f4ceba41799f99557633596 Binary files /dev/null and b/examples/training_sample/processed/00000000/train_target_proj_fg_rgb.mp4 differ diff --git a/examples/training_sample/processed/00000000/train_target_rgb.mp4 b/examples/training_sample/processed/00000000/train_target_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..332317f7cf20db6bed72bffbaeffd5eb2893f5bf --- /dev/null +++ b/examples/training_sample/processed/00000000/train_target_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:629b6e0fb5de1a0a2b0ede81d5ce8a48c044559b8f2b35bbd740ac09257757a7 +size 808291 diff --git a/examples/training_sample/processed/00000000/train_target_rgb.txt b/examples/training_sample/processed/00000000/train_target_rgb.txt new file mode 100644 index 0000000000000000000000000000000000000000..aa0b29f05b53c0b94b3a25489839bf4dd8d27d81 --- /dev/null +++ b/examples/training_sample/processed/00000000/train_target_rgb.txt @@ -0,0 +1 @@ +The vehicle glides forward along a wide urban avenue, flanked by bare trees and modern buildings under an overcast sky. Traffic flows steadily, including a prominent orange-and-white bus and a large truck passing by. Pedestrians stroll along sidewalks beside metal railings, while streetlights and traffic signals punctuate the scene. The camera moves smoothly ahead, capturing the rhythm of city life with muted tones and quiet motion. diff --git a/examples/training_sample/processed/00000000/train_target_scene_proj_fg_overlay_rgb.mp4 b/examples/training_sample/processed/00000000/train_target_scene_proj_fg_overlay_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..62415f12d1a958fa7f493e4b7f2f34e0543bf784 --- /dev/null +++ b/examples/training_sample/processed/00000000/train_target_scene_proj_fg_overlay_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f26e2972c6b80172049df7774a6cfc6e74fdf14766439947e5139a63d8ee47 +size 832132 diff --git a/examples/training_sample/processed/00000000/train_target_scene_proj_rgb.mp4 b/examples/training_sample/processed/00000000/train_target_scene_proj_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e7f4f119e2db41c0877b7156a8dc8f77faa3f934 --- /dev/null +++ b/examples/training_sample/processed/00000000/train_target_scene_proj_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c08b45fbdfecea9614eb673ebc91a794728fe281cc848d53d462a109a8e36764 +size 820568 diff --git a/examples/training_sample/processed/00000000/train_target_scene_proj_rgb_orig.mp4 b/examples/training_sample/processed/00000000/train_target_scene_proj_rgb_orig.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e7f4f119e2db41c0877b7156a8dc8f77faa3f934 --- /dev/null +++ b/examples/training_sample/processed/00000000/train_target_scene_proj_rgb_orig.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c08b45fbdfecea9614eb673ebc91a794728fe281cc848d53d462a109a8e36764 +size 820568 diff --git a/examples/training_sample/processed/00000000/train_target_scene_rgb.txt b/examples/training_sample/processed/00000000/train_target_scene_rgb.txt new file mode 100644 index 0000000000000000000000000000000000000000..1e681b62356d52ce36463a1e5e7262dae6ea4206 --- /dev/null +++ b/examples/training_sample/processed/00000000/train_target_scene_rgb.txt @@ -0,0 +1 @@ +Urban street flanked by bare trees and modern buildings under overcast skies. Concrete sidewalks and metal railings line the road. Streetlights and traffic signals punctuate the scene. Buildings display varied architectural styles. The atmosphere is muted, with diffused lighting enhancing the city’s quiet, subdued ambiance. Camera moves steadily forward. diff --git a/examples/training_sample/processed/00000001/clip.mp4 b/examples/training_sample/processed/00000001/clip.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5a5b825b5e71a17fa72b266c94b9316cd7042e3d --- /dev/null +++ b/examples/training_sample/processed/00000001/clip.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:192530efced1bb9553bff5bb56af57874d412e82a77f321ad22ccc4ba4cc923c +size 1694156 diff --git a/examples/training_sample/processed/00000001/dynamic_masks.mp4 b/examples/training_sample/processed/00000001/dynamic_masks.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..67ea39ff206911c665daa39b0b325c0fa89b0f44 Binary files /dev/null and b/examples/training_sample/processed/00000001/dynamic_masks.mp4 differ diff --git a/examples/training_sample/processed/00000001/dynamic_prompts.json b/examples/training_sample/processed/00000001/dynamic_prompts.json new file mode 100644 index 0000000000000000000000000000000000000000..831f166ff9062c94e708909293326e569bed70e8 --- /dev/null +++ b/examples/training_sample/processed/00000001/dynamic_prompts.json @@ -0,0 +1,6 @@ +{ + "raw": "Nothing", + "entities": [], + "prompts": [], + "status": "nothing" +} \ No newline at end of file diff --git a/examples/training_sample/processed/00000001/geometry.npz b/examples/training_sample/processed/00000001/geometry.npz new file mode 100644 index 0000000000000000000000000000000000000000..e6cb002bd69f9d72ab3b8622495f8c901dbbf8ca --- /dev/null +++ b/examples/training_sample/processed/00000001/geometry.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ca3cd74738693835fee9a4b51afcfadf6e0ad8bd26b599d2d48e3113efc1f7 +size 162947301 diff --git a/examples/training_sample/processed/00000001/latents.pt b/examples/training_sample/processed/00000001/latents.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cf02f88a645fdcb08843d1fc7ed8620999e9b8d --- /dev/null +++ b/examples/training_sample/processed/00000001/latents.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09aec048eccdf9ebd9252776729df42ff301af7fb22709502ac372ef5c16420e +size 17376130 diff --git a/examples/training_sample/processed/00000001/train_preceding_proj_fg_rgb_1.mp4 b/examples/training_sample/processed/00000001/train_preceding_proj_fg_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b6077f5a7a0a6925a8af15e4cfb5c52a7337b09f Binary files /dev/null and b/examples/training_sample/processed/00000001/train_preceding_proj_fg_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000001/train_preceding_proj_fg_rgb_9.mp4 b/examples/training_sample/processed/00000001/train_preceding_proj_fg_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4237996272163e0096e87b02b3f3df5648e8405d Binary files /dev/null and b/examples/training_sample/processed/00000001/train_preceding_proj_fg_rgb_9.mp4 differ diff --git a/examples/training_sample/processed/00000001/train_preceding_rgb_1.mp4 b/examples/training_sample/processed/00000001/train_preceding_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..759371ed7925b667304a419c6910fce8ba76ecfa Binary files /dev/null and b/examples/training_sample/processed/00000001/train_preceding_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000001/train_preceding_rgb_9.mp4 b/examples/training_sample/processed/00000001/train_preceding_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..81a466a9806fc273c58dd722a80cd144d3208265 Binary files /dev/null and b/examples/training_sample/processed/00000001/train_preceding_rgb_9.mp4 differ diff --git a/examples/training_sample/processed/00000001/train_preceding_scene_proj_fg_overlay_rgb_1.mp4 b/examples/training_sample/processed/00000001/train_preceding_scene_proj_fg_overlay_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0e0cdcd2a9fdf0e1e79c5a570f1dbdec4aee0312 Binary files /dev/null and b/examples/training_sample/processed/00000001/train_preceding_scene_proj_fg_overlay_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000001/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 b/examples/training_sample/processed/00000001/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b74d275a29821435b57437438139ad08be4d2e35 Binary files /dev/null and b/examples/training_sample/processed/00000001/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 differ diff --git a/examples/training_sample/processed/00000001/train_preceding_scene_proj_rgb_1.mp4 b/examples/training_sample/processed/00000001/train_preceding_scene_proj_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0e0cdcd2a9fdf0e1e79c5a570f1dbdec4aee0312 Binary files /dev/null and b/examples/training_sample/processed/00000001/train_preceding_scene_proj_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000001/train_preceding_scene_proj_rgb_9.mp4 b/examples/training_sample/processed/00000001/train_preceding_scene_proj_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b74d275a29821435b57437438139ad08be4d2e35 Binary files /dev/null and b/examples/training_sample/processed/00000001/train_preceding_scene_proj_rgb_9.mp4 differ diff --git a/examples/training_sample/processed/00000001/train_reference_rgb.mp4 b/examples/training_sample/processed/00000001/train_reference_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..8c6b64f58e45464803c0f4c866e7471856be9011 Binary files /dev/null and b/examples/training_sample/processed/00000001/train_reference_rgb.mp4 differ diff --git a/examples/training_sample/processed/00000001/train_sample.json b/examples/training_sample/processed/00000001/train_sample.json new file mode 100644 index 0000000000000000000000000000000000000000..d6b94caafaf4088b0ca77f11aa3134cb4dbdb4df --- /dev/null +++ b/examples/training_sample/processed/00000001/train_sample.json @@ -0,0 +1,171 @@ +{ + "t0": 18, + "P9_idx": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17 + ], + "P1_idx": [ + 17 + ], + "T_idx": [ + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82 + ], + "C_idx": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120 + ], + "scene_idx": 114, + "R_idx": [ + 84, + 86, + 88, + 90, + 92, + 6, + 94 + ], + "R_iou": [ + 0.5075392736220058, + 0.3585663437196642, + 0.2754447755802402, + 0.20114208819267132, + 0.17367741069481032, + 0.16608006730701816, + 0.13594338545828102 + ], + "R_stats": { + "threshold": 0.04, + "max_refs": 7, + "stride": 2, + "voxel_size": 0.01, + "num_targets": 65, + "num_candidates": 23, + "best_iou": 0.5075392736220058, + "avg_iou": 0.15195567723155867 + }, + "projection_channels": [ + "rgb" + ], + "output_size": [ + 480, + 832 + ], + "add_fg_to_projection": true, + "has_fg": false, + "src_aug_applied": false, + "naming": "figure" +} \ No newline at end of file diff --git a/examples/training_sample/processed/00000001/train_target_proj_fg_rgb.mp4 b/examples/training_sample/processed/00000001/train_target_proj_fg_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..65ad3473e8f43982a5cc48aa764da1377d31e9d7 Binary files /dev/null and b/examples/training_sample/processed/00000001/train_target_proj_fg_rgb.mp4 differ diff --git a/examples/training_sample/processed/00000001/train_target_rgb.mp4 b/examples/training_sample/processed/00000001/train_target_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..17cd7975bb00ee974f824d7e3ae66ff3470792eb --- /dev/null +++ b/examples/training_sample/processed/00000001/train_target_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85fb1edb55c61bb5cf82f8ff583bf5e96de9222c1a6786b0fc362c5fc7c00616 +size 485462 diff --git a/examples/training_sample/processed/00000001/train_target_rgb.txt b/examples/training_sample/processed/00000001/train_target_rgb.txt new file mode 100644 index 0000000000000000000000000000000000000000..0085472b82885d641f9734c8e3a08deef877fd42 --- /dev/null +++ b/examples/training_sample/processed/00000001/train_target_rgb.txt @@ -0,0 +1 @@ +The camera glides forward through a sunlit, spacious home, revealing hardwood floors, a sleek white kitchen with stainless steel appliances, and a wooden dining set under a warm pendant light. Large windows flood the room with natural light, while a lush potted plant adds greenery near the glass doors. The open-concept layout flows seamlessly from living to dining to kitchen, exuding modern elegance and comfort. Soft beige walls and elegant columns enhance the airy, inviting atmosphere. diff --git a/examples/training_sample/processed/00000001/train_target_scene_proj_fg_overlay_rgb.mp4 b/examples/training_sample/processed/00000001/train_target_scene_proj_fg_overlay_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b004cc26655239f19604fcbf5b489b2fdcdae2ea --- /dev/null +++ b/examples/training_sample/processed/00000001/train_target_scene_proj_fg_overlay_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d99849576f05a0f14b6600deff06d8b94f06b638ca9c8d1cad27e2c59a13ac5 +size 485084 diff --git a/examples/training_sample/processed/00000001/train_target_scene_proj_rgb.mp4 b/examples/training_sample/processed/00000001/train_target_scene_proj_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b004cc26655239f19604fcbf5b489b2fdcdae2ea --- /dev/null +++ b/examples/training_sample/processed/00000001/train_target_scene_proj_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d99849576f05a0f14b6600deff06d8b94f06b638ca9c8d1cad27e2c59a13ac5 +size 485084 diff --git a/examples/training_sample/processed/00000001/train_target_scene_proj_rgb_orig.mp4 b/examples/training_sample/processed/00000001/train_target_scene_proj_rgb_orig.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b004cc26655239f19604fcbf5b489b2fdcdae2ea --- /dev/null +++ b/examples/training_sample/processed/00000001/train_target_scene_proj_rgb_orig.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d99849576f05a0f14b6600deff06d8b94f06b638ca9c8d1cad27e2c59a13ac5 +size 485084 diff --git a/examples/training_sample/processed/00000001/train_target_scene_rgb.txt b/examples/training_sample/processed/00000001/train_target_scene_rgb.txt new file mode 100644 index 0000000000000000000000000000000000000000..ca1c24827223b3728efb3efb5790de9fe7099e43 --- /dev/null +++ b/examples/training_sample/processed/00000001/train_target_scene_rgb.txt @@ -0,0 +1 @@ +The camera glides through a spacious, sunlit home with hardwood floors and beige walls. Architectural columns frame the open-plan living and dining areas, illuminated by natural light and a warm pendant fixture. Large windows offer a view of greenery, enhancing the airy, inviting atmosphere. diff --git a/examples/training_sample/processed/00000002/clip.mp4 b/examples/training_sample/processed/00000002/clip.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e896f2edf52402ce20c994101a2275d9c0697071 --- /dev/null +++ b/examples/training_sample/processed/00000002/clip.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f82eced49893850609b488f7c4b2ab9e37e7e9a9ae451fcf7bb46533601b8fa +size 1568398 diff --git a/examples/training_sample/processed/00000002/dynamic_masks.mp4 b/examples/training_sample/processed/00000002/dynamic_masks.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0975521032d50c32d4fd9dec691bb49a275dee3f --- /dev/null +++ b/examples/training_sample/processed/00000002/dynamic_masks.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b98d3783887317ba0bc6e73286c524ac45a8894f8f80f0259821b4c086f0d72e +size 687645 diff --git a/examples/training_sample/processed/00000002/dynamic_prompts.json b/examples/training_sample/processed/00000002/dynamic_prompts.json new file mode 100644 index 0000000000000000000000000000000000000000..24e4f9dd7cd827cecca37c36583b28b1d9a658c3 --- /dev/null +++ b/examples/training_sample/processed/00000002/dynamic_prompts.json @@ -0,0 +1,16 @@ +{ + "raw": "1) person\n2) radio\n3) chessboard\n4) food box", + "entities": [ + "person", + "radio", + "chessboard", + "food box" + ], + "prompts": [ + "person", + "radio", + "chessboard", + "food box" + ], + "status": "ok" +} \ No newline at end of file diff --git a/examples/training_sample/processed/00000002/geometry.npz b/examples/training_sample/processed/00000002/geometry.npz new file mode 100644 index 0000000000000000000000000000000000000000..fa1dd219f77fd319f65efe403dad83be6f6f75ef --- /dev/null +++ b/examples/training_sample/processed/00000002/geometry.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c078513c054c67022e5bc7ac04322a2c537247a4ebd662d1610fcc63c74b3368 +size 159181349 diff --git a/examples/training_sample/processed/00000002/latents.pt b/examples/training_sample/processed/00000002/latents.pt new file mode 100644 index 0000000000000000000000000000000000000000..74a20d0ce2667ae1ecbd6e1e336ea0a5eb5260aa --- /dev/null +++ b/examples/training_sample/processed/00000002/latents.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c15bfb37a4b7ba79ba175f911ef26e5dbbe148309a375798343733fc7d3b205 +size 27363108 diff --git a/examples/training_sample/processed/00000002/mask_chessboard.mp4 b/examples/training_sample/processed/00000002/mask_chessboard.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f60552b9afaa06006f933547893818c4a6064d18 Binary files /dev/null and b/examples/training_sample/processed/00000002/mask_chessboard.mp4 differ diff --git a/examples/training_sample/processed/00000002/mask_person.mp4 b/examples/training_sample/processed/00000002/mask_person.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..a94cc940dcd317429035cc77e1ceadc0d5ee1519 --- /dev/null +++ b/examples/training_sample/processed/00000002/mask_person.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fa84cfc2cfa81a5da14b05eaf7ea1f4d3f3e9c28a24a6042e9e37b9080ded0e +size 604896 diff --git a/examples/training_sample/processed/00000002/mask_radio.mp4 b/examples/training_sample/processed/00000002/mask_radio.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ef4a876a46bf836b5a8c95c7b01221a10d684071 Binary files /dev/null and b/examples/training_sample/processed/00000002/mask_radio.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_preceding_proj_fg_rgb_1.mp4 b/examples/training_sample/processed/00000002/train_preceding_proj_fg_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c2978f9fe9a77f6ac19ce3eef81fa96a26e2990a Binary files /dev/null and b/examples/training_sample/processed/00000002/train_preceding_proj_fg_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_preceding_proj_fg_rgb_9.mp4 b/examples/training_sample/processed/00000002/train_preceding_proj_fg_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6b0e0aca5835a33151a544a9cadbb4826cfc662f Binary files /dev/null and b/examples/training_sample/processed/00000002/train_preceding_proj_fg_rgb_9.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_preceding_rgb_1.mp4 b/examples/training_sample/processed/00000002/train_preceding_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4cd083a6b55fe52037a75e2036b5bceb39a064b6 Binary files /dev/null and b/examples/training_sample/processed/00000002/train_preceding_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_preceding_rgb_9.mp4 b/examples/training_sample/processed/00000002/train_preceding_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..afe0c9eb378b74a779488ca059722f244594f84d Binary files /dev/null and b/examples/training_sample/processed/00000002/train_preceding_rgb_9.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_preceding_scene_proj_fg_overlay_rgb_1.mp4 b/examples/training_sample/processed/00000002/train_preceding_scene_proj_fg_overlay_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..8902bf45778f7a2f2c1049ebcf366ba155970134 Binary files /dev/null and b/examples/training_sample/processed/00000002/train_preceding_scene_proj_fg_overlay_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 b/examples/training_sample/processed/00000002/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2c6f3d35049a0df5e03f91aa6ee45309fb04b560 --- /dev/null +++ b/examples/training_sample/processed/00000002/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb9a439500ae841ef6e76bfb3bcd23aafa86fa5ddcdf4218992d9b52b5966c0e +size 131214 diff --git a/examples/training_sample/processed/00000002/train_preceding_scene_proj_rgb_1.mp4 b/examples/training_sample/processed/00000002/train_preceding_scene_proj_rgb_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b2c27100c322a913a12dbe8604526be5dfc98710 Binary files /dev/null and b/examples/training_sample/processed/00000002/train_preceding_scene_proj_rgb_1.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_preceding_scene_proj_rgb_9.mp4 b/examples/training_sample/processed/00000002/train_preceding_scene_proj_rgb_9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4a9e0806715cb8bf702d18d485b65a37a375faab --- /dev/null +++ b/examples/training_sample/processed/00000002/train_preceding_scene_proj_rgb_9.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7946fc91d06bc3d3d1fe31c455f4a2e01352418e18018615df0790f932dbe9b4 +size 137625 diff --git a/examples/training_sample/processed/00000002/train_reference_instance_00.mp4 b/examples/training_sample/processed/00000002/train_reference_instance_00.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5b19296b640d156094b162d7a38943082e2097ee Binary files /dev/null and b/examples/training_sample/processed/00000002/train_reference_instance_00.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_reference_instance_01.mp4 b/examples/training_sample/processed/00000002/train_reference_instance_01.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..88ee3300031bb1a7152ef63ee0de87c3bfddb15f Binary files /dev/null and b/examples/training_sample/processed/00000002/train_reference_instance_01.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_reference_instance_02.mp4 b/examples/training_sample/processed/00000002/train_reference_instance_02.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..873ed7aeb17a0acf68da6bea4fea07f402a04fa2 Binary files /dev/null and b/examples/training_sample/processed/00000002/train_reference_instance_02.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_reference_instance_03.mp4 b/examples/training_sample/processed/00000002/train_reference_instance_03.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..188c288f58dcd5cd69c3c808597973b804a4ddfa Binary files /dev/null and b/examples/training_sample/processed/00000002/train_reference_instance_03.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_reference_instance_04.mp4 b/examples/training_sample/processed/00000002/train_reference_instance_04.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..51324426c2c83c6509299b8ee85d3221b85e3324 Binary files /dev/null and b/examples/training_sample/processed/00000002/train_reference_instance_04.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_reference_instances.json b/examples/training_sample/processed/00000002/train_reference_instances.json new file mode 100644 index 0000000000000000000000000000000000000000..17d8828cb7147bff39d6ba9260a6aa29b052f5e3 --- /dev/null +++ b/examples/training_sample/processed/00000002/train_reference_instances.json @@ -0,0 +1,61 @@ +{ + "target_video": "train_target_rgb.mp4", + "prompts": [ + "person", + "radio", + "chessboard", + "food box" + ], + "instances": [ + { + "rank": 0, + "obj_id": 2, + "max_area_ratio": 0.21596304086538462, + "frame_indices": [ + 7, + 47, + 48 + ] + }, + { + "rank": 1, + "obj_id": 1, + "max_area_ratio": 0.17424128605769232, + "frame_indices": [ + 4, + 32, + 53 + ] + }, + { + "rank": 2, + "obj_id": 3, + "max_area_ratio": 0.13515875400641025, + "frame_indices": [ + 16, + 22, + 55 + ] + }, + { + "rank": 3, + "obj_id": 4, + "max_area_ratio": 0.13495843349358974, + "frame_indices": [ + 2, + 10, + 53 + ] + }, + { + "rank": 4, + "obj_id": 5, + "max_area_ratio": 0.029286858974358974, + "frame_indices": [ + 44, + 45, + 64 + ] + } + ] +} \ No newline at end of file diff --git a/examples/training_sample/processed/00000002/train_reference_rgb.mp4 b/examples/training_sample/processed/00000002/train_reference_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..cb4cd54d016fff05b92a10cc1f7335ee52828017 Binary files /dev/null and b/examples/training_sample/processed/00000002/train_reference_rgb.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_reference_scene_rgb.mp4 b/examples/training_sample/processed/00000002/train_reference_scene_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c094cda51bb786ae57a0f841b0b03aa7e1c0910a Binary files /dev/null and b/examples/training_sample/processed/00000002/train_reference_scene_rgb.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_reference_scene_rgb_orig.mp4 b/examples/training_sample/processed/00000002/train_reference_scene_rgb_orig.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0e588219756e8328af0300ce20c5295f4452c751 Binary files /dev/null and b/examples/training_sample/processed/00000002/train_reference_scene_rgb_orig.mp4 differ diff --git a/examples/training_sample/processed/00000002/train_sample.json b/examples/training_sample/processed/00000002/train_sample.json new file mode 100644 index 0000000000000000000000000000000000000000..b6a664901a63e67b3d03139bbb198b4d72d5ebee --- /dev/null +++ b/examples/training_sample/processed/00000002/train_sample.json @@ -0,0 +1,172 @@ +{ + "t0": 9, + "P9_idx": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8 + ], + "P1_idx": [ + 8 + ], + "T_idx": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73 + ], + "C_idx": [ + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120 + ], + "scene_idx": 105, + "R_idx": [ + 75, + 77, + 81, + 79, + 83, + 85, + 87 + ], + "R_iou": [ + 0.6607234699125665, + 0.5335322757970787, + 0.4243924392439244, + 0.41892492225677475, + 0.35327022375215145, + 0.30494350282485877, + 0.29902030384779216 + ], + "R_stats": { + "threshold": 0.04, + "max_refs": 7, + "stride": 2, + "voxel_size": 0.01, + "num_targets": 65, + "num_candidates": 23, + "best_iou": 0.6607234699125665, + "avg_iou": 0.23296677640349647 + }, + "projection_channels": [ + "rgb" + ], + "output_size": [ + 480, + 832 + ], + "add_fg_to_projection": true, + "has_fg": true, + "src_aug_applied": false, + "naming": "figure" +} \ No newline at end of file diff --git a/examples/training_sample/processed/00000002/train_target_fg_rgb.txt b/examples/training_sample/processed/00000002/train_target_fg_rgb.txt new file mode 100644 index 0000000000000000000000000000000000000000..fcaf6ead9d97048ac9b491d11b1e292ddd5ce4d0 --- /dev/null +++ b/examples/training_sample/processed/00000002/train_target_fg_rgb.txt @@ -0,0 +1 @@ +Four children in scout uniforms crouch, one adjusting a vintage radio, another eating, while a chessboard lies nearby on dry grass. diff --git a/examples/training_sample/processed/00000002/train_target_proj_fg_rgb.mp4 b/examples/training_sample/processed/00000002/train_target_proj_fg_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0f518e074596a8c26976fd2c6d6665aa7662c826 --- /dev/null +++ b/examples/training_sample/processed/00000002/train_target_proj_fg_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:897a8b966c6809e02dc63fb01b55076c86aec6647348e235ebf5ebbd624e9971 +size 388937 diff --git a/examples/training_sample/processed/00000002/train_target_rgb.mp4 b/examples/training_sample/processed/00000002/train_target_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e4d0c55f8cccf351dfd37d80280f83419d60733c --- /dev/null +++ b/examples/training_sample/processed/00000002/train_target_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:790e09dc7a0369cd7737fd98d03e1b9b8f163255fca9963aa37a015019aeb08e +size 371700 diff --git a/examples/training_sample/processed/00000002/train_target_rgb.txt b/examples/training_sample/processed/00000002/train_target_rgb.txt new file mode 100644 index 0000000000000000000000000000000000000000..201e54dfdbae1052e5522d3d47eda2195f58ab4c --- /dev/null +++ b/examples/training_sample/processed/00000002/train_target_rgb.txt @@ -0,0 +1 @@ +Children in scout uniforms crouch in dry grass, focused on a vintage radio. One adjusts dials while others eat sandwiches, a chessboard nearby. Warm sunlight bathes the scene, casting soft shadows. The camera slowly pans right, revealing more of the grassy field and the quiet camaraderie. A sense of peaceful exploration lingers in the golden afternoon air. diff --git a/examples/training_sample/processed/00000002/train_target_scene_proj_fg_overlay_rgb.mp4 b/examples/training_sample/processed/00000002/train_target_scene_proj_fg_overlay_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..cf5bd346ffe25d9b8713cae5fadb608aa84080e6 --- /dev/null +++ b/examples/training_sample/processed/00000002/train_target_scene_proj_fg_overlay_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0125ae4b359b952631d175d10158fd3e0a6442fe3bf5fc914efb3690d288b80 +size 1714073 diff --git a/examples/training_sample/processed/00000002/train_target_scene_proj_rgb.mp4 b/examples/training_sample/processed/00000002/train_target_scene_proj_rgb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f99972e116ce90a3f60aba86c52ba663e2514f5d --- /dev/null +++ b/examples/training_sample/processed/00000002/train_target_scene_proj_rgb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c44ee871bd32fe73f637f56a1dd889895eb892c52ada65bdebe24252768b7e +size 1671581 diff --git a/examples/training_sample/processed/00000002/train_target_scene_proj_rgb_orig.mp4 b/examples/training_sample/processed/00000002/train_target_scene_proj_rgb_orig.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f99972e116ce90a3f60aba86c52ba663e2514f5d --- /dev/null +++ b/examples/training_sample/processed/00000002/train_target_scene_proj_rgb_orig.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c44ee871bd32fe73f637f56a1dd889895eb892c52ada65bdebe24252768b7e +size 1671581 diff --git a/examples/training_sample/processed/00000002/train_target_scene_rgb.txt b/examples/training_sample/processed/00000002/train_target_scene_rgb.txt new file mode 100644 index 0000000000000000000000000000000000000000..3265d1dc3b0d1b7fdbf8afd09abe3e08a050d86a --- /dev/null +++ b/examples/training_sample/processed/00000002/train_target_scene_rgb.txt @@ -0,0 +1 @@ +Dry grass sways gently under golden afternoon sun, casting long shadows across an open field. Sparse trees dot the horizon, their leaves rustling softly. The air feels warm and still, with a quiet, natural stillness enveloping the landscape. No structures or man-made features break the serene, untouched wilderness. diff --git a/examples/training_sample/processed_lmdb/shard_000.lmdb/data.mdb b/examples/training_sample/processed_lmdb/shard_000.lmdb/data.mdb new file mode 100644 index 0000000000000000000000000000000000000000..9c9b2ae6386b68808bb913f8f4cf2beb9b2c5d5e --- /dev/null +++ b/examples/training_sample/processed_lmdb/shard_000.lmdb/data.mdb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2558e9ff5cf07bab388f8571f7a841c87205abf6918700d88d8b99f40d37cb5 +size 73129984 diff --git a/examples/training_sample/processed_lmdb/shard_000.lmdb/lock.mdb b/examples/training_sample/processed_lmdb/shard_000.lmdb/lock.mdb new file mode 100644 index 0000000000000000000000000000000000000000..64ee83c96b070bea3afac2a87f4afb0fe5285e06 Binary files /dev/null and b/examples/training_sample/processed_lmdb/shard_000.lmdb/lock.mdb differ diff --git a/examples/training_sample/processed_lmdb/sharded_keys_cache.pkl b/examples/training_sample/processed_lmdb/sharded_keys_cache.pkl new file mode 100644 index 0000000000000000000000000000000000000000..fd998e59dcb77f4921045a7ca3a7250fd78c5651 --- /dev/null +++ b/examples/training_sample/processed_lmdb/sharded_keys_cache.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59dcd496aca603f694d5ff00cca1e44a01fb140625cfe99e6a0d98064bcd24ef +size 180 diff --git a/examples/training_sample/raw/.DS_Store b/examples/training_sample/raw/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..7dcd2570fd01204c01da220b384d76654443d070 Binary files /dev/null and b/examples/training_sample/raw/.DS_Store differ diff --git a/examples/training_sample/raw/MIRA/4230740.0.mp4 b/examples/training_sample/raw/MIRA/4230740.0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f7d190761630eb4cc85578197a352011c5d1f9e6 --- /dev/null +++ b/examples/training_sample/raw/MIRA/4230740.0.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff4c4f7fc5b7fc0f1a2f9a390d12fe0acb16df0b6caffd0d993f54a56bdd25aa +size 12010782 diff --git a/examples/training_sample/raw/RealEstate10K/.DS_Store b/examples/training_sample/raw/RealEstate10K/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/examples/training_sample/raw/RealEstate10K/.DS_Store differ diff --git a/examples/training_sample/raw/RealEstate10K/1259726fc1f8e966.mp4 b/examples/training_sample/raw/RealEstate10K/1259726fc1f8e966.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..9fcc54a5c40b09b7c121f1bb4e9ea2f04fc6a812 --- /dev/null +++ b/examples/training_sample/raw/RealEstate10K/1259726fc1f8e966.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dab2c18247fa4e43bbc69fcef790666243a3cb99e398cf044728f6917a535e0 +size 4617996 diff --git a/examples/training_sample/raw/SpatiaVID_HQ/.DS_Store b/examples/training_sample/raw/SpatiaVID_HQ/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/examples/training_sample/raw/SpatiaVID_HQ/.DS_Store differ diff --git a/examples/training_sample/raw/SpatiaVID_HQ/0a2d11ab-57d7-516d-b873-c29555a41796.mp4 b/examples/training_sample/raw/SpatiaVID_HQ/0a2d11ab-57d7-516d-b873-c29555a41796.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e3a33fb0753149ebbec84b4295e4c683d03e55b3 --- /dev/null +++ b/examples/training_sample/raw/SpatiaVID_HQ/0a2d11ab-57d7-516d-b873-c29555a41796.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a973c0fe0e6c018e09d0680410899780bb0d98b2bef63955508473b534ec8c4 +size 4098899