jskvrna commited on
Commit
01b1bb1
·
1 Parent(s): a4196d4

Generates point cloud dataset from HoHo25k.

Browse files

Creates a script to generate a point cloud dataset from the HoHo25k dataset.

The script reads COLMAP data, ground truth vertices, and connections from the dataset and saves them as pickle files.
This will allow for training models on the generated point cloud data.

Files changed (1) hide show
  1. generate_pcloud_dataset.py +57 -0
generate_pcloud_dataset.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from hoho2025.viz3d import *
3
+ import os
4
+ import numpy as np
5
+ import pickle
6
+
7
+ from utils import read_colmap_rec
8
+
9
+ from tqdm import tqdm
10
+
11
+ ds = load_dataset("usm3d/hoho25k", cache_dir="/media/skvrnjan/sd/hoho25k/", trust_remote_code=True)
12
+ #ds = load_dataset("usm3d/hoho25k", cache_dir="/mnt/personal/skvrnjan/hoho25k/", trust_remote_code=True)
13
+ ds = ds.shuffle()
14
+
15
+ # Create output directory
16
+ output_dir = "/home/skvrnjan/personal/hoho_fully/"
17
+ os.makedirs(output_dir, exist_ok=True)
18
+
19
+ counter = 0
20
+ for a in tqdm(ds['train'], desc="Processing dataset"):
21
+ colmap = read_colmap_rec(a['colmap_binary'])
22
+ order_id = a['order_id']
23
+
24
+ # Save as pickle file
25
+ output_file = os.path.join(output_dir, f'sample_{order_id}.pkl')
26
+ if os.path.exists(output_file):
27
+ continue
28
+
29
+ # Extract point cloud from COLMAP
30
+ points3d = colmap.points3D
31
+ if len(points3d) == 0:
32
+ continue
33
+
34
+ # Convert to numpy arrays
35
+ point_coords = np.array([point.xyz for point in points3d.values()])
36
+ point_colors = np.array([point.color for point in points3d.values()])
37
+
38
+ # Get ground truth data
39
+ gt_vertices = np.array(a['wf_vertices'])
40
+ gt_connections = np.array(a['wf_edges'])
41
+
42
+ # Save the data
43
+ sample_data = {
44
+ 'point_cloud': point_coords,
45
+ 'point_colors': point_colors,
46
+ 'gt_vertices': gt_vertices,
47
+ 'gt_connections': gt_connections,
48
+ 'sample_id': order_id
49
+ }
50
+
51
+ with open(output_file, 'wb') as f:
52
+ pickle.dump(sample_data, f)
53
+
54
+ counter += 1
55
+
56
+ print(f"Generated {counter} samples in {output_dir}")
57
+