Generates point cloud dataset from HoHo25k.
Browse filesCreates a script to generate a point cloud dataset from the HoHo25k dataset.
The script reads COLMAP data, ground truth vertices, and connections from the dataset and saves them as pickle files.
This will allow for training models on the generated point cloud data.
- generate_pcloud_dataset.py +57 -0
generate_pcloud_dataset.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datasets import load_dataset
|
| 2 |
+
from hoho2025.viz3d import *
|
| 3 |
+
import os
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pickle
|
| 6 |
+
|
| 7 |
+
from utils import read_colmap_rec
|
| 8 |
+
|
| 9 |
+
from tqdm import tqdm
|
| 10 |
+
|
| 11 |
+
ds = load_dataset("usm3d/hoho25k", cache_dir="/media/skvrnjan/sd/hoho25k/", trust_remote_code=True)
|
| 12 |
+
#ds = load_dataset("usm3d/hoho25k", cache_dir="/mnt/personal/skvrnjan/hoho25k/", trust_remote_code=True)
|
| 13 |
+
ds = ds.shuffle()
|
| 14 |
+
|
| 15 |
+
# Create output directory
|
| 16 |
+
output_dir = "/home/skvrnjan/personal/hoho_fully/"
|
| 17 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 18 |
+
|
| 19 |
+
counter = 0
|
| 20 |
+
for a in tqdm(ds['train'], desc="Processing dataset"):
|
| 21 |
+
colmap = read_colmap_rec(a['colmap_binary'])
|
| 22 |
+
order_id = a['order_id']
|
| 23 |
+
|
| 24 |
+
# Save as pickle file
|
| 25 |
+
output_file = os.path.join(output_dir, f'sample_{order_id}.pkl')
|
| 26 |
+
if os.path.exists(output_file):
|
| 27 |
+
continue
|
| 28 |
+
|
| 29 |
+
# Extract point cloud from COLMAP
|
| 30 |
+
points3d = colmap.points3D
|
| 31 |
+
if len(points3d) == 0:
|
| 32 |
+
continue
|
| 33 |
+
|
| 34 |
+
# Convert to numpy arrays
|
| 35 |
+
point_coords = np.array([point.xyz for point in points3d.values()])
|
| 36 |
+
point_colors = np.array([point.color for point in points3d.values()])
|
| 37 |
+
|
| 38 |
+
# Get ground truth data
|
| 39 |
+
gt_vertices = np.array(a['wf_vertices'])
|
| 40 |
+
gt_connections = np.array(a['wf_edges'])
|
| 41 |
+
|
| 42 |
+
# Save the data
|
| 43 |
+
sample_data = {
|
| 44 |
+
'point_cloud': point_coords,
|
| 45 |
+
'point_colors': point_colors,
|
| 46 |
+
'gt_vertices': gt_vertices,
|
| 47 |
+
'gt_connections': gt_connections,
|
| 48 |
+
'sample_id': order_id
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
with open(output_file, 'wb') as f:
|
| 52 |
+
pickle.dump(sample_data, f)
|
| 53 |
+
|
| 54 |
+
counter += 1
|
| 55 |
+
|
| 56 |
+
print(f"Generated {counter} samples in {output_dir}")
|
| 57 |
+
|