bulkobubulko commited on
Commit
9632ddb
·
verified ·
1 Parent(s): d60b992

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,20 +1,4 @@
1
- ---
2
- license: apache-2.0
3
- ---
4
- # Handcrafted solution example for the S23DR competition
5
-
6
- This repo provides a minimalistic example of a wireframe estimation submission to S23DR competition.
7
- We recommend you take a look at [this example](https://github.com/s23dr/hoho2025/blob/main/hoho2025/example_solutions.py), for detailed code of this submission. It also provides useful I/O and visualization functions.
8
-
9
- This example seeks to simply provide minimal code which succeeds at reading the dataset and producing a solution (in this case two vertices at the origin and edge of zero length connecting them).
10
-
11
- `script.py` - is the main file which is run by the competition space. It should produce `submission.parquet` as the result of the run. Please see the additional comments in the `script.py` file.
12
-
13
-
14
- # How to submit
15
-
16
- Use the notebook [example_notebook.ipynb](example_notebook.ipynb)
17
-
18
-
19
 
 
20
 
 
1
+ # Handcrafted Submission 2025-1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ This repo contains a submission to the [S23DR Challenge](https://huggingface.co/spaces/usm3d/S23DR) (part of the [USM3D](https://usm3d.github.io/) workshop at CVPR2025). It was prepared by [bulkobubulko](https://huggingface.co/bulkobubulko).
4
 
script.py CHANGED
@@ -15,7 +15,7 @@ from joblib import Parallel, delayed
15
  import os
16
  import json
17
  import gc
18
- from hoho2025.example_solutions import predict_wireframe
19
  # check the https://github.com/s23dr/hoho2025/blob/main/hoho2025/example_solutions.py for the example solution
20
 
21
  def empty_solution():
 
15
  import os
16
  import json
17
  import gc
18
+ from tools2025.hoho2025.hoho2025.example_solutions import predict_wireframe
19
  # check the https://github.com/s23dr/hoho2025/blob/main/hoho2025/example_solutions.py for the example solution
20
 
21
  def empty_solution():
tools2025/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
tools2025/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .DS_Store
2
+ __pycache__
3
+ hoho.egg-info/
tools2025/LICENSE.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2024 Jack Langerman & Dmytro Mishkin
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
tools2025/README.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ # HoHo2025 Tools
5
+
6
+ Tools and utilities for the [S23DR-2025 competition](https://huggingface.co/spaces/usm3d/S23DR2025) and [HoHo25k Dataset](https://huggingface.co/datasets/usm3d/hoho25k)
7
+
8
+ ## Installation
9
+
10
+ ```bash
11
+ pip install hoho2025
12
+ ```
13
+
14
+ ### pip install over http
15
+ ```bash
16
+ pip install git+http://hf.co/usm3d/tools2025.git
17
+ ```
18
+
19
+ or editable
20
+ ```bash
21
+ git clone http://hf.co/usm3d/tools2025
22
+ cd tools2025
23
+ pip install -e .
24
+ ```
25
+
26
+ ### Usage example
27
+
28
+ ```python
29
+ from datasets import load_dataset
30
+ from hoho2025.vis import plot_all_modalities
31
+ from hoho2025.viz3d import *
32
+
33
+ def read_colmap_rec(colmap_data):
34
+ import pycolmap
35
+ import tempfile,zipfile
36
+ import io
37
+ with tempfile.TemporaryDirectory() as tmpdir:
38
+ with zipfile.ZipFile(io.BytesIO(colmap_data), "r") as zf:
39
+ zf.extractall(tmpdir) # unpacks cameras.txt, images.txt, etc. to tmpdir
40
+ # Now parse with pycolmap
41
+ rec = pycolmap.Reconstruction(tmpdir)
42
+ return rec
43
+
44
+ ds = load_dataset("usm3d/hoho25k", streaming=True, trust_remote_code=True)
45
+ for a in ds['train']:
46
+ break
47
+
48
+ fig, ax = plot_all_modalities(a)
49
+
50
+ ## Now 3d
51
+
52
+ fig3d = init_figure()
53
+ plot_reconstruction(fig3d, read_colmap_rec(a['colmap_binary']))
54
+ plot_wireframe(fig3d, a['wf_vertices'], a['wf_edges'], a['wf_classifications'])
55
+ plot_bpo_cameras_from_entry(fig3d, a)
56
+ fig3d
57
+ ```
58
+
59
+ ## Example wireframe estimation
60
+
61
+ Look in [hoho2025/example_solution.py](hoho2025/example_solution.py)
62
+
63
+ ```python
64
+ from hoho2025.example_solutions import predict_wireframe
65
+ pred_vertices, pred_connections = predict_wireframe(a)
66
+
67
+ fig3d = init_figure()
68
+ plot_reconstruction(fig3d, read_colmap_rec(a['colmap_binary']))
69
+ plot_wireframe(fig3d, pred_vertices, pred_connections, color='rgb(0, 0, 255)')
70
+ fig3d
71
+ ```
72
+
73
+
74
+ And to get the metric
75
+
76
+ ```python
77
+ from hoho2025.metric_helper import hss
78
+
79
+ score = hss(pred_vertices, pred_connections, a['wf_vertices'], a['wf_edges'], vert_thresh=0.5, edge_thresh=0.5)
80
+ print (score)
81
+ ```
tools2025/hoho2025/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .hoho import *
2
+ from . import vis
3
+
4
+ import importlib
5
+ import sys
6
+
7
+ class LazyLoadModule:
8
+ def __init__(self, module_name):
9
+ self.module_name = module_name
10
+ self.module = None
11
+
12
+ def __getattribute__(self, attr):
13
+ if attr == 'module_name' or attr == 'module':
14
+ return super().__getattribute__(attr)
15
+
16
+ if self.module is None:
17
+ self.module = importlib.import_module(f'hoho.{self.module_name}')
18
+ sys.modules[self.module_name] = self.module
19
+
20
+ return getattr(self.module, attr)
21
+
22
+ try:
23
+ import viz3d
24
+ except ImportError:
25
+ viz3d = LazyLoadModule('viz3d')
tools2025/hoho2025/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.58 kB). View file
 
tools2025/hoho2025/__pycache__/hoho.cpython-311.pyc ADDED
Binary file (19.2 kB). View file
 
tools2025/hoho2025/color_mappings.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gestalt_color_mapping = {
2
+ "unclassified": (215, 62, 138),
3
+ "apex": (235, 88, 48),
4
+ "eave_end_point": (248, 130, 228),
5
+ "flashing_end_point": (71, 11, 161),
6
+ "ridge": (214, 251, 248),
7
+ "rake": (13, 94, 47),
8
+ "eave": (54, 243, 63),
9
+ "post": (187, 123, 236),
10
+ "ground_line": (136, 206, 14),
11
+ "flashing": (162, 162, 32),
12
+ "step_flashing": (169, 255, 219),
13
+ "hip": (8, 89, 52),
14
+ "valley": (85, 27, 65),
15
+ "roof": (215, 232, 179),
16
+ "door": (110, 52, 23),
17
+ "garage": (50, 233, 171),
18
+ "window": (230, 249, 40),
19
+ "shutter": (122, 4, 233),
20
+ "fascia": (95, 230, 240),
21
+ "soffit": (2, 102, 197),
22
+ "horizontal_siding": (131, 88, 59),
23
+ "vertical_siding": (110, 187, 198),
24
+ "brick": (171, 252, 7),
25
+ "concrete": (32, 47, 246),
26
+ "other_wall": (112, 61, 240),
27
+ "trim": (151, 206, 58),
28
+ "unknown": (127, 127, 127),
29
+ "transition_line": (0,0,0),
30
+ }
31
+
32
+ ade20k_color_mapping = {
33
+ 'wall': (120, 120, 120),
34
+ 'building;edifice': (180, 120, 120),
35
+ 'sky': (6, 230, 230),
36
+ 'floor;flooring': (80, 50, 50),
37
+ 'tree': (4, 200, 3),
38
+ 'ceiling': (120, 120, 80),
39
+ 'road;route': (140, 140, 140),
40
+ 'bed': (204, 5, 255),
41
+ 'windowpane;window': (230, 230, 230),
42
+ 'grass': (4, 250, 7),
43
+ 'cabinet': (224, 5, 255),
44
+ 'sidewalk;pavement': (235, 255, 7),
45
+ 'person;individual;someone;somebody;mortal;soul': (150, 5, 61),
46
+ 'earth;ground': (120, 120, 70),
47
+ 'door;double;door': (8, 255, 51),
48
+ 'table': (255, 6, 82),
49
+ 'mountain;mount': (143, 255, 140),
50
+ 'plant;flora;plant;life': (204, 255, 4),
51
+ 'curtain;drape;drapery;mantle;pall': (255, 51, 7),
52
+ 'chair': (204, 70, 3),
53
+ 'car;auto;automobile;machine;motorcar': (0, 102, 200),
54
+ 'water': (61, 230, 250),
55
+ 'painting;picture': (255, 6, 51),
56
+ 'sofa;couch;lounge': (11, 102, 255),
57
+ 'shelf': (255, 7, 71),
58
+ 'house': (255, 9, 224),
59
+ 'sea': (9, 7, 230),
60
+ 'mirror': (220, 220, 220),
61
+ 'rug;carpet;carpeting': (255, 9, 92),
62
+ 'field': (112, 9, 255),
63
+ 'armchair': (8, 255, 214),
64
+ 'seat': (7, 255, 224),
65
+ 'fence;fencing': (255, 184, 6),
66
+ 'desk': (10, 255, 71),
67
+ 'rock;stone': (255, 41, 10),
68
+ 'wardrobe;closet;press': (7, 255, 255),
69
+ 'lamp': (224, 255, 8),
70
+ 'bathtub;bathing;tub;bath;tub': (102, 8, 255),
71
+ 'railing;rail': (255, 61, 6),
72
+ 'cushion': (255, 194, 7),
73
+ 'base;pedestal;stand': (255, 122, 8),
74
+ 'box': (0, 255, 20),
75
+ 'column;pillar': (255, 8, 41),
76
+ 'signboard;sign': (255, 5, 153),
77
+ 'chest;of;drawers;chest;bureau;dresser': (6, 51, 255),
78
+ 'counter': (235, 12, 255),
79
+ 'sand': (160, 150, 20),
80
+ 'sink': (0, 163, 255),
81
+ 'skyscraper': (140, 140, 140),
82
+ 'fireplace;hearth;open;fireplace': (250, 10, 15),
83
+ 'refrigerator;icebox': (20, 255, 0),
84
+ 'grandstand;covered;stand': (31, 255, 0),
85
+ 'path': (255, 31, 0),
86
+ 'stairs;steps': (255, 224, 0),
87
+ 'runway': (153, 255, 0),
88
+ 'case;display;case;showcase;vitrine': (0, 0, 255),
89
+ 'pool;table;billiard;table;snooker;table': (255, 71, 0),
90
+ 'pillow': (0, 235, 255),
91
+ 'screen;door;screen': (0, 173, 255),
92
+ 'stairway;staircase': (31, 0, 255),
93
+ 'river': (11, 200, 200),
94
+ 'bridge;span': (255 ,82, 0),
95
+ 'bookcase': (0, 255, 245),
96
+ 'blind;screen': (0, 61, 255),
97
+ 'coffee;table;cocktail;table': (0, 255, 112),
98
+ 'toilet;can;commode;crapper;pot;potty;stool;throne': (0, 255, 133),
99
+ 'flower': (255, 0, 0),
100
+ 'book': (255, 163, 0),
101
+ 'hill': (255, 102, 0),
102
+ 'bench': (194, 255, 0),
103
+ 'countertop': (0, 143, 255),
104
+ 'stove;kitchen;stove;range;kitchen;range;cooking;stove': (51, 255, 0),
105
+ 'palm;palm;tree': (0, 82, 255),
106
+ 'kitchen;island': (0, 255, 41),
107
+ 'computer;computing;machine;computing;device;data;processor;electronic;computer;information;processing;system': (0, 255, 173),
108
+ 'swivel;chair': (10, 0, 255),
109
+ 'boat': (173, 255, 0),
110
+ 'bar': (0, 255, 153),
111
+ 'arcade;machine': (255, 92, 0),
112
+ 'hovel;hut;hutch;shack;shanty': (255, 0, 255),
113
+ 'bus;autobus;coach;charabanc;double-decker;jitney;motorbus;motorcoach;omnibus;passenger;vehicle': (255, 0, 245),
114
+ 'towel': (255, 0, 102),
115
+ 'light;light;source': (255, 173, 0),
116
+ 'truck;motortruck': (255, 0, 20),
117
+ 'tower': (255, 184, 184),
118
+ 'chandelier;pendant;pendent': (0, 31, 255),
119
+ 'awning;sunshade;sunblind': (0, 255, 61),
120
+ 'streetlight;street;lamp': (0, 71, 255),
121
+ 'booth;cubicle;stall;kiosk': (255, 0, 204),
122
+ 'television;television;receiver;television;set;tv;tv;set;idiot;box;boob;tube;telly;goggle;box': (0, 255, 194),
123
+ 'airplane;aeroplane;plane': (0, 255, 82),
124
+ 'dirt;track': (0, 10, 255),
125
+ 'apparel;wearing;apparel;dress;clothes': (0, 112, 255),
126
+ 'pole': (51, 0, 255),
127
+ 'land;ground;soil': (0, 194, 255),
128
+ 'bannister;banister;balustrade;balusters;handrail': (0, 122, 255),
129
+ 'escalator;moving;staircase;moving;stairway': (0, 255, 163),
130
+ 'ottoman;pouf;pouffe;puff;hassock': (255, 153, 0),
131
+ 'bottle': (0, 255, 10),
132
+ 'buffet;counter;sideboard': (255, 112, 0),
133
+ 'poster;posting;placard;notice;bill;card': (143, 255, 0),
134
+ 'stage': (82, 0, 255),
135
+ 'van': (163, 255, 0),
136
+ 'ship': (255, 235, 0),
137
+ 'fountain': (8, 184, 170),
138
+ 'conveyer;belt;conveyor;belt;conveyer;conveyor;transporter': (133, 0, 255),
139
+ 'canopy': (0, 255, 92),
140
+ 'washer;automatic;washer;washing;machine': (184, 0, 255),
141
+ 'plaything;toy': (255, 0, 31),
142
+ 'swimming;pool;swimming;bath;natatorium': (0, 184, 255),
143
+ 'stool': (0, 214, 255),
144
+ 'barrel;cask': (255, 0, 112),
145
+ 'basket;handbasket': (92, 255, 0),
146
+ 'waterfall;falls': (0, 224, 255),
147
+ 'tent;collapsible;shelter': (112, 224, 255),
148
+ 'bag': (70, 184, 160),
149
+ 'minibike;motorbike': (163, 0, 255),
150
+ 'cradle': (153, 0, 255),
151
+ 'oven': (71, 255, 0),
152
+ 'ball': (255, 0, 163),
153
+ 'food;solid;food': (255, 204, 0),
154
+ 'step;stair': (255, 0, 143),
155
+ 'tank;storage;tank': (0, 255, 235),
156
+ 'trade;name;brand;name;brand;marque': (133, 255, 0),
157
+ 'microwave;microwave;oven': (255, 0, 235),
158
+ 'pot;flowerpot': (245, 0, 255),
159
+ 'animal;animate;being;beast;brute;creature;fauna': (255, 0, 122),
160
+ 'bicycle;bike;wheel;cycle': (255, 245, 0),
161
+ 'lake': (10, 190, 212),
162
+ 'dishwasher;dish;washer;dishwashing;machine': (214, 255, 0),
163
+ 'screen;silver;screen;projection;screen': (0, 204, 255),
164
+ 'blanket;cover': (20, 0, 255),
165
+ 'sculpture': (255, 255, 0),
166
+ 'hood;exhaust;hood': (0, 153, 255),
167
+ 'sconce': (0, 41, 255),
168
+ 'vase': (0, 255, 204),
169
+ 'traffic;light;traffic;signal;stoplight': (41, 0, 255),
170
+ 'tray': (41, 255, 0),
171
+ 'ashcan;trash;can;garbage;can;wastebin;ash;bin;ash-bin;ashbin;dustbin;trash;barrel;trash;bin': (173, 0, 255),
172
+ 'fan': (0, 245, 255),
173
+ 'pier;wharf;wharfage;dock': (71, 0, 255),
174
+ 'crt;screen': (122, 0, 255),
175
+ 'plate': (0, 255, 184),
176
+ 'monitor;monitoring;device': (0, 92, 255),
177
+ 'bulletin;board;notice;board': (184, 255, 0),
178
+ 'shower': (0, 133, 255),
179
+ 'radiator': (255, 214, 0),
180
+ 'glass;drinking;glass': (25, 194, 194),
181
+ 'clock': (102, 255, 0),
182
+ 'flag': (92, 0, 255),
183
+ }
184
+
185
+
186
+ EDGE_CLASSES = {'cornice_return': 0,
187
+ 'cornice_strip': 1,
188
+ 'eave': 2,
189
+ 'flashing': 3,
190
+ 'hip': 4,
191
+ 'rake': 5,
192
+ 'ridge': 6,
193
+ 'step_flashing': 7,
194
+ 'transition_line': 8,
195
+ 'valley': 9}
196
+ EDGE_CLASSES_BY_ID = {v: k for k, v in EDGE_CLASSES.items()}
197
+
198
+ edge_color_mapping = {
199
+ 'cornice_return': (215, 62, 138),
200
+ 'cornice_strip': (235, 88, 48),
201
+ 'eave': (54, 243, 63),
202
+ "flashing": (162, 162, 32),
203
+ 'hip': (8, 89, 52),
204
+ 'rake': (13, 94, 47),
205
+ 'ridge': (214, 251, 248),
206
+ "step_flashing": (169, 255, 219),
207
+ 'transition_line': (200,0,50),
208
+ 'valley': (85, 27, 65),
209
+ }
tools2025/hoho2025/example_solutions.py ADDED
@@ -0,0 +1,715 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Description: This file contains the handcrafted solution for the task of wireframe reconstruction
2
+ import io
3
+ import tempfile
4
+ import zipfile
5
+ from collections import defaultdict
6
+ from typing import Tuple, List
7
+ import cv2
8
+ import numpy as np
9
+ import pycolmap
10
+ from PIL import Image as PImage
11
+ from scipy.spatial.distance import cdist
12
+
13
+ from hoho2025.color_mappings import ade20k_color_mapping, gestalt_color_mapping
14
+
15
+
16
+ def empty_solution():
17
+ '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
18
+ return np.zeros((2,3)), [(0, 1)]
19
+
20
+
21
+ def read_colmap_rec(colmap_data):
22
+ with tempfile.TemporaryDirectory() as tmpdir:
23
+ with zipfile.ZipFile(io.BytesIO(colmap_data), "r") as zf:
24
+ zf.extractall(tmpdir) # unpacks cameras.txt, images.txt, etc. to tmpdir
25
+ # Now parse with pycolmap
26
+ rec = pycolmap.Reconstruction(tmpdir)
27
+ return rec
28
+
29
+ def convert_entry_to_human_readable(entry):
30
+ out = {}
31
+ for k, v in entry.items():
32
+ if 'colmap' in k:
33
+ out[k] = read_colmap_rec(v)
34
+ elif k in ['wf_vertices', 'wf_edges', 'K', 'R', 't', 'depth']:
35
+ out[k] = np.array(v)
36
+ else:
37
+ out[k]=v
38
+ out['__key__'] = entry['order_id']
39
+ return out
40
+
41
+
42
+ def get_house_mask(ade20k_seg):
43
+ """
44
+ Get a mask of the house in the ADE20K segmentation map.
45
+ """
46
+ house_classes_ade20k = [
47
+ 'wall',
48
+ 'house',
49
+ 'building;edifice',
50
+ 'door;double;door',
51
+ 'windowpane;window',
52
+ ]
53
+ np_seg = np.array(ade20k_seg)
54
+ full_mask = np.zeros(np_seg.shape[:2], dtype=np.uint8)
55
+ for c in house_classes_ade20k:
56
+ color = np.array(ade20k_color_mapping[c])
57
+ mask = cv2.inRange(np_seg, color-0.5, color+0.5)
58
+ full_mask = np.logical_or(full_mask, mask)
59
+ return full_mask
60
+
61
+
62
+ def point_to_segment_dist(pt, seg_p1, seg_p2):
63
+ """
64
+ Computes the Euclidean distance from pt to the line segment p1->p2.
65
+ pt, seg_p1, seg_p2: (x, y) as np.ndarray
66
+ """
67
+ # If both endpoints are the same, just return distance to one of them
68
+ if np.allclose(seg_p1, seg_p2):
69
+ return np.linalg.norm(pt - seg_p1)
70
+ seg_vec = seg_p2 - seg_p1
71
+ pt_vec = pt - seg_p1
72
+ seg_len2 = seg_vec.dot(seg_vec)
73
+ t = max(0, min(1, pt_vec.dot(seg_vec)/seg_len2))
74
+ proj = seg_p1 + t*seg_vec
75
+ return np.linalg.norm(pt - proj)
76
+
77
+
78
+ def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.0):
79
+ """
80
+ Identify apex and eave-end vertices, then detect lines for eave/ridge/rake/valley.
81
+ For each connected component, we do a line fit with cv2.fitLine, then measure
82
+ segment endpoints more robustly. We then associate apex points that are within
83
+ 'edge_th' of the line segment. We record those apex–apex connections for edges
84
+ if at least 2 apexes lie near the same component line.
85
+ """
86
+ #--------------------------------------------------------------------------------
87
+ # Step A: Collect apex and eave_end vertices
88
+ #--------------------------------------------------------------------------------
89
+ if not isinstance(gest_seg_np, np.ndarray):
90
+ gest_seg_np = np.array(gest_seg_np)
91
+ vertices = []
92
+ # Apex
93
+ apex_color = np.array(gestalt_color_mapping['apex'])
94
+ apex_mask = cv2.inRange(gest_seg_np, apex_color-0.5, apex_color+0.5)
95
+ if apex_mask.sum() > 0:
96
+ output = cv2.connectedComponentsWithStats(apex_mask, 8, cv2.CV_32S)
97
+ (numLabels, labels, stats, centroids) = output
98
+ stats, centroids = stats[1:], centroids[1:] # skip background
99
+ for i in range(numLabels-1):
100
+ vert = {"xy": centroids[i], "type": "apex"}
101
+ vertices.append(vert)
102
+
103
+ # Eave end
104
+ eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
105
+ eave_end_mask = cv2.inRange(gest_seg_np, eave_end_color-0.5, eave_end_color+0.5)
106
+ if eave_end_mask.sum() > 0:
107
+ output = cv2.connectedComponentsWithStats(eave_end_mask, 8, cv2.CV_32S)
108
+ (numLabels, labels, stats, centroids) = output
109
+ stats, centroids = stats[1:], centroids[1:]
110
+ for i in range(numLabels-1):
111
+ vert = {"xy": centroids[i], "type": "eave_end_point"}
112
+ vertices.append(vert)
113
+
114
+ flashing_end_color = np.array(gestalt_color_mapping['flashing_end_point'])
115
+ flashing_end_mask = cv2.inRange(gest_seg_np, flashing_end_color - 0.5, flashing_end_color + 0.5)
116
+ if flashing_end_mask.sum() > 0:
117
+ output = cv2.connectedComponentsWithStats(flashing_end_mask, 8, cv2.CV_32S)
118
+ (numLabels, labels, stats, centroids) = output
119
+ if numLabels > 1:
120
+ stats_fl, centroids_fl = stats[1:], centroids[1:]
121
+ for i in range(numLabels - 1):
122
+ vert = {"xy": centroids_fl[i], "type": "flashing_end_point"}
123
+ vertices.append(vert)
124
+
125
+ # Consolidate apex points as array:
126
+ apex_pts = []
127
+ apex_idx_map = [] # keep track of index in 'vertices'
128
+ for idx, v in enumerate(vertices):
129
+ apex_pts.append(v['xy'])
130
+ apex_idx_map.append(idx)
131
+ apex_pts = np.array(apex_pts)
132
+
133
+ connections = []
134
+ edge_classes = ['eave', 'ridge', 'rake', 'valley', 'flashing', 'hip', 'step_flashing', 'transition_line']
135
+ for edge_class in edge_classes:
136
+ edge_color = np.array(gestalt_color_mapping[edge_class])
137
+ mask_raw = cv2.inRange(gest_seg_np, edge_color-0.5, edge_color+0.5)
138
+ # Possibly do morphological open/close to avoid merges or small holes
139
+ # open/close makes reusults worse
140
+ kernel = np.ones((5, 5), np.uint8) # smaller kernel to reduce over-merge
141
+ mask = cv2.morphologyEx(mask_raw, cv2.MORPH_CLOSE, kernel)
142
+ if mask.sum() == 0:
143
+ continue
144
+
145
+ # Connected components
146
+ output = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
147
+ (numLabels, labels, stats, centroids) = output
148
+ # skip the background
149
+ stats, centroids = stats[1:], centroids[1:]
150
+ label_indices = range(1, numLabels)
151
+
152
+ # For each connected component, do a line fit
153
+ for lbl in label_indices:
154
+ ys, xs = np.where(labels == lbl)
155
+ if len(xs) < 2:
156
+ continue
157
+ # Fit a line using cv2.fitLine
158
+ pts_for_fit = np.column_stack([xs, ys]).astype(np.float32)
159
+ # (vx, vy, x0, y0) = direction + a point on the line
160
+ line_params = cv2.fitLine(pts_for_fit, distType=cv2.DIST_L2,
161
+ param=0, reps=0.01, aeps=0.01)
162
+ vx, vy, x0, y0 = line_params.ravel()
163
+ # We'll approximate endpoints by projecting (xs, ys) onto the line,
164
+ # then taking min and max in the 1D param along the line.
165
+
166
+ # param along the line = ( (x - x0)*vx + (y - y0)*vy )
167
+ proj = ( (xs - x0)*vx + (ys - y0)*vy )
168
+ proj_min, proj_max = proj.min(), proj.max()
169
+ p1 = np.array([x0 + proj_min*vx, y0 + proj_min*vy])
170
+ p2 = np.array([x0 + proj_max*vx, y0 + proj_max*vy])
171
+
172
+ #--------------------------------------------------------------------------------
173
+ # Step C: If apex points are within 'edge_th' of segment, they are connected
174
+ #--------------------------------------------------------------------------------
175
+ if len(apex_pts) < 2:
176
+ continue
177
+
178
+ # Distance from each apex to the line segment
179
+ dists = np.array([
180
+ point_to_segment_dist(apex_pts[i], p1, p2)
181
+ for i in range(len(apex_pts))
182
+ ])
183
+
184
+ # Indices of apex points that are near
185
+ near_mask = (dists <= edge_th)
186
+ near_indices = np.where(near_mask)[0]
187
+ if len(near_indices) < 2:
188
+ continue
189
+
190
+ # Connect each pair among these near apex points
191
+ for i in range(len(near_indices)):
192
+ for j in range(i+1, len(near_indices)):
193
+ a_idx = near_indices[i]
194
+ b_idx = near_indices[j]
195
+ # 'a_idx' and 'b_idx' are indices in apex_pts / apex_idx_map
196
+ vA = apex_idx_map[a_idx]
197
+ vB = apex_idx_map[b_idx]
198
+ # Store the connection using sorted indexing
199
+ conn = tuple(sorted((vA, vB)))
200
+ connections.append(conn)
201
+
202
+ return vertices, connections
203
+
204
+
205
+ def get_uv_depth(vertices: List[dict],
206
+ depth_fitted: np.ndarray,
207
+ sparse_depth: np.ndarray,
208
+ search_radius: int = 10) -> Tuple[np.ndarray, np.ndarray]:
209
+ """
210
+ For each vertex, returns a 2D array of (u,v) and a matching 1D array of depths.
211
+
212
+ We attempt to use the sparse_depth if available in a local neighborhood:
213
+ 1. For each vertex coordinate (x, y), define a local window in sparse_depth
214
+ of size (2*search_radius + 1).
215
+ 2. Collect all valid (nonzero) values in that window.
216
+ 3. If any exist, we take the *closest* valid pixel's depth.
217
+ 4. Otherwise, we use depth_fitted[y, x].
218
+
219
+ Parameters
220
+ ----------
221
+ vertices : List[dict]
222
+ Each dict must have "xy" at least, e.g. {"xy": (x, y), ...}
223
+ depth_fitted : np.ndarray
224
+ A 2D array (H, W), the dense (or corrected) depth for fallback.
225
+ sparse_depth : np.ndarray
226
+ A 2D array (H, W), mostly zeros except where accurate data is available.
227
+ search_radius : int
228
+ Pixel radius around the vertex in which to look for sparse depth values.
229
+
230
+ Returns
231
+ -------
232
+ uv : np.ndarray of shape (N, 2)
233
+ 2D float coordinates of each vertex (x, y).
234
+ vertex_depth : np.ndarray of shape (N,)
235
+ Depth value chosen for each vertex.
236
+ """
237
+
238
+ # Collect each vertex's (x, y)
239
+ uv = np.array([vert['xy'] for vert in vertices], dtype=np.float32)
240
+
241
+ # Convert to integer pixel coordinates (round or floor)
242
+ uv_int = np.round(uv).astype(np.int32)
243
+ H, W = depth_fitted.shape[:2]
244
+
245
+ # Clip coordinates to stay within image bounds
246
+ uv_int[:, 0] = np.clip(uv_int[:, 0], 0, W - 1)
247
+ uv_int[:, 1] = np.clip(uv_int[:, 1], 0, H - 1)
248
+
249
+ # Prepare output array of depths
250
+ vertex_depth = np.zeros(len(vertices), dtype=np.float32)
251
+ dense_count = 0
252
+
253
+ for i, (x_i, y_i) in enumerate(uv_int):
254
+ # Local region in [x_i - search_radius, x_i + search_radius]
255
+ x0 = max(0, x_i - search_radius)
256
+ x1 = min(W, x_i + search_radius + 1)
257
+ y0 = max(0, y_i - search_radius)
258
+ y1 = min(H, y_i + search_radius + 1)
259
+
260
+ # Crop out the local window in sparse_depth
261
+ region = sparse_depth[y0:y1, x0:x1]
262
+
263
+ # Find all valid (non-zero) depths
264
+ valid_mask = (region > 0)
265
+ valid_y, valid_x = np.where(valid_mask)
266
+
267
+ if valid_y.size > 0:
268
+ # Compute global coordinates for each valid pixel
269
+ global_x = x0 + valid_x
270
+ global_y = y0 + valid_y
271
+
272
+ # Compute squared distance to center (x_i, y_i)
273
+ dist_sq = (global_x - x_i)**2 + (global_y - y_i)**2
274
+
275
+ # Find the nearest valid pixel
276
+ min_idx = np.argmin(dist_sq)
277
+ nearest_depth = region[valid_y[min_idx], valid_x[min_idx]]
278
+ vertex_depth[i] = nearest_depth
279
+ else:
280
+ # Fallback to the dense depth
281
+ vertex_depth[i] = depth_fitted[y_i, x_i]
282
+ dense_count += 1
283
+ return uv, vertex_depth
284
+
285
+
286
+
287
+ def project_vertices_to_3d(uv: np.ndarray, depth_vert: np.ndarray, col_img: pycolmap.Image) -> np.ndarray:
288
+ """
289
+ Projects 2D vertex coordinates with associated depths to 3D world coordinates.
290
+
291
+ Parameters
292
+ ----------
293
+ uv : np.ndarray
294
+ (N, 2) array of 2D vertex coordinates (u, v).
295
+ depth_vert : np.ndarray
296
+ (N,) array of depth values for each vertex.
297
+ col_img : pycolmap.Image
298
+
299
+ Returns
300
+ -------
301
+ vertices_3d : np.ndarray
302
+ (N, 3) array of vertex coordinates in 3D world space.
303
+ """
304
+ # Backproject to 3D local camera coordinates
305
+ xy_local = np.ones((len(uv), 3))
306
+ K = col_img.camera.calibration_matrix()
307
+ xy_local[:, 0] = (uv[:, 0] - K[0, 2]) / K[0, 0]
308
+ xy_local[:, 1] = (uv[:, 1] - K[1, 2]) / K[1, 1]
309
+ # Get the 3D vertices
310
+ vertices_3d_local = xy_local * depth_vert[...,None]
311
+
312
+ # Create camera-to-world transformation matrix
313
+ world_to_cam = np.eye(4)
314
+ world_to_cam[:3] = col_img.cam_from_world.matrix()
315
+ cam_to_world = np.linalg.inv(world_to_cam)
316
+
317
+ # Transform local 3D points to world coordinates
318
+ vertices_3d_homogeneous = cv2.convertPointsToHomogeneous(vertices_3d_local)
319
+ vertices_3d = cv2.transform(vertices_3d_homogeneous, cam_to_world)
320
+ vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
321
+ return vertices_3d
322
+
323
+
324
+ def create_3d_wireframe_single_image(vertices: List[dict],
325
+ connections: List[Tuple[int, int]],
326
+ depth: PImage,
327
+ colmap_rec: pycolmap.Reconstruction,
328
+ img_id: str,
329
+ ade_seg: PImage) -> np.ndarray:
330
+ """
331
+ Processes a single image view to generate 3D vertex coordinates from existing 2D vertices/edges.
332
+
333
+ Parameters
334
+ ----------
335
+ vertices : List[dict]
336
+ List of 2D vertex dictionaries (e.g., {"xy": (x, y), "type": ...}).
337
+ connections : List[Tuple[int, int]]
338
+ List of 2D edge connections (indices into the vertices list).
339
+ depth : PIL.Image
340
+ Initial dense depth map as a PIL Image.
341
+ colmap_rec : pycolmap.Reconstruction
342
+ COLMAP reconstruction data.
343
+ img_id : str
344
+ Identifier for the current image within the COLMAP reconstruction.
345
+ ade_seg : PIL.Image
346
+ ADE20k segmentation map for the image.
347
+
348
+ Returns
349
+ -------
350
+ vertices_3d : np.ndarray
351
+ (N, 3) array of vertex coordinates in 3D world space.
352
+ Returns an empty array if processing fails (e.g., missing sparse depth).
353
+ """
354
+ # Check if initial vertices/connections are valid
355
+ if (len(vertices) < 2) or (len(connections) < 1):
356
+ # This case should ideally be handled before calling, but good to double check.
357
+ print(f'Warning: create_3d_wireframe_single_image called with insufficient vertices/connections for image {img_id}')
358
+ return np.empty((0, 3))
359
+
360
+ # Get fitted dense depth and sparse depth
361
+ depth_fitted, depth_sparse, found_sparse, col_img = get_fitted_dense_depth(
362
+ depth, colmap_rec, img_id, ade_seg
363
+ )
364
+
365
+ # Get UV coordinates and depth for each vertex
366
+ uv, depth_vert = get_uv_depth(vertices, depth_fitted, depth_sparse, search_radius=15) # default=10, 15*
367
+
368
+ # Backproject to 3D
369
+ vertices_3d = project_vertices_to_3d(uv, depth_vert, col_img)
370
+
371
+ return vertices_3d
372
+
373
+
374
+ def merge_vertices_3d(vert_edge_per_image, th=0.5):
375
+ '''Merge vertices that are close to each other in 3D space and are of same types'''
376
+ # Initialize structures to collect vertices and connections from all images
377
+ all_3d_vertices = []
378
+ connections_3d = []
379
+ all_indexes = []
380
+ cur_start = 0
381
+ types = []
382
+
383
+ # Combine vertices and update connection indices across all images
384
+ for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
385
+ types += [int(v['type']=='apex') for v in vertices]
386
+ all_3d_vertices.append(vertices_3d)
387
+ connections_3d+=[(x+cur_start,y+cur_start) for (x,y) in connections]
388
+ cur_start+=len(vertices_3d)
389
+ all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
390
+
391
+ # Calculate distance matrix between all vertices
392
+ distmat = cdist(all_3d_vertices, all_3d_vertices)
393
+ types = np.array(types).reshape(-1,1)
394
+ same_types = cdist(types, types)
395
+
396
+ # Create mask for vertices that should be merged (close in space and same type)
397
+ mask_to_merge = (distmat <= th) & (same_types==0)
398
+ new_vertices = []
399
+ new_connections = []
400
+
401
+ # Extract vertex indices to merge based on the mask
402
+ to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge])))
403
+
404
+ # Build groups of vertices to merge (transitive grouping)
405
+ to_merge_final = defaultdict(list)
406
+ for i in range(len(all_3d_vertices)):
407
+ for j in to_merge:
408
+ if i in j:
409
+ to_merge_final[i]+=j
410
+
411
+ # Remove duplicates in each group
412
+ for k, v in to_merge_final.items():
413
+ to_merge_final[k] = list(set(v))
414
+
415
+ # Create final merge groups without duplicates
416
+ already_there = set()
417
+ merged = []
418
+ for k, v in to_merge_final.items():
419
+ if k in already_there:
420
+ continue
421
+ merged.append(v)
422
+ for vv in v:
423
+ already_there.add(vv)
424
+
425
+ # Calculate new vertex positions (average of merged groups)
426
+ old_idx_to_new = {}
427
+ count=0
428
+ for idxs in merged:
429
+ new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
430
+ for idx in idxs:
431
+ old_idx_to_new[idx] = count
432
+ count +=1
433
+ new_vertices=np.array(new_vertices)
434
+
435
+ # Update connections to use new vertex indices
436
+ for conn in connections_3d:
437
+ new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]]))
438
+ if new_con[0] == new_con[1]:
439
+ continue
440
+ if new_con not in new_connections:
441
+ new_connections.append(new_con)
442
+ return new_vertices, new_connections
443
+
444
+
445
+ def prune_not_connected(all_3d_vertices, connections_3d, keep_largest=True):
446
+ """
447
+ Prune vertices not connected to anything. If keep_largest=True, also
448
+ keep only the largest connected component in the graph.
449
+ """
450
+ if len(all_3d_vertices) == 0:
451
+ return np.array([]), []
452
+
453
+ # adjacency
454
+ adj = defaultdict(set)
455
+ for (i, j) in connections_3d:
456
+ adj[i].add(j)
457
+ adj[j].add(i)
458
+
459
+ # keep only vertices that appear in at least one edge
460
+ used_idxs = set()
461
+ for (i, j) in connections_3d:
462
+ used_idxs.add(i)
463
+ used_idxs.add(j)
464
+
465
+ if not used_idxs:
466
+ return np.empty((0,3)), []
467
+
468
+ # If we only want to remove truly isolated points, but keep multiple subgraphs:
469
+ if not keep_largest:
470
+ new_map = {}
471
+ used_list = sorted(list(used_idxs))
472
+ for new_id, old_id in enumerate(used_list):
473
+ new_map[old_id] = new_id
474
+ new_vertices = np.array([all_3d_vertices[old_id] for old_id in used_list])
475
+ new_conns = []
476
+ for (i, j) in connections_3d:
477
+ if i in used_idxs and j in used_idxs:
478
+ new_conns.append((new_map[i], new_map[j]))
479
+ return new_vertices, new_conns
480
+
481
+ # Otherwise find the largest connected component:
482
+ visited = set()
483
+ def bfs(start):
484
+ queue = [start]
485
+ comp = []
486
+ visited.add(start)
487
+ while queue:
488
+ cur = queue.pop()
489
+ comp.append(cur)
490
+ for neigh in adj[cur]:
491
+ if neigh not in visited:
492
+ visited.add(neigh)
493
+ queue.append(neigh)
494
+ return comp
495
+
496
+ # Collect all subgraphs
497
+ comps = []
498
+ for idx in used_idxs:
499
+ if idx not in visited:
500
+ c = bfs(idx)
501
+ comps.append(c)
502
+
503
+ # pick largest
504
+ comps.sort(key=lambda c: len(c), reverse=True)
505
+ largest = comps[0] if len(comps)>0 else []
506
+
507
+ # Remap
508
+ new_map = {}
509
+ for new_id, old_id in enumerate(largest):
510
+ new_map[old_id] = new_id
511
+
512
+ new_vertices = np.array([all_3d_vertices[old_id] for old_id in largest])
513
+ new_conns = []
514
+ for (i, j) in connections_3d:
515
+ if i in largest and j in largest:
516
+ new_conns.append((new_map[i], new_map[j]))
517
+
518
+ # remove duplicates
519
+ new_conns = list(set([tuple(sorted(c)) for c in new_conns]))
520
+ return new_vertices, new_conns
521
+
522
+ def get_sparse_depth(colmap_rec, img_id_substring, depth):
523
+ """
524
+ Return a sparse depth map for the COLMAP image whose name contains
525
+ `img_id_substring`. The output is an array of shape `depth_shape` (H,W),
526
+ where only the projected 3D points get a depth > 0, else 0.
527
+ """
528
+ H, W = depth.shape
529
+
530
+ # 1) Find the matching COLMAP image
531
+ found_img = None
532
+ for img_id_c, col_img in colmap_rec.images.items():
533
+ if img_id_substring in col_img.name:
534
+ found_img = col_img
535
+ break
536
+ if found_img is None:
537
+ print(f"Image substring {img_id_substring} not found in COLMAP.")
538
+ return np.zeros((H, W), dtype=np.float32), False, None
539
+
540
+ # 2) Gather 3D points that this image sees
541
+ points_xyz = []
542
+ for pid, p3D in colmap_rec.points3D.items():
543
+ if found_img.has_point3D(pid):
544
+ points_xyz.append(p3D.xyz) # world coords
545
+ if not points_xyz:
546
+ print(f"No 3D points associated with {found_img.name}.")
547
+ return np.zeros((H, W), dtype=np.float32), False, found_img
548
+
549
+ points_xyz = np.array(points_xyz) # (N, 3)
550
+
551
+ # 3) For each point, project via col_img.project_point()
552
+ uv = []
553
+ z_vals = []
554
+ for xyz in points_xyz:
555
+ proj = found_img.project_point(xyz) # returns (u, v) in image coords or None
556
+ if proj is not None:
557
+ u_i, v_i = proj
558
+ u_i = int(round(u_i))
559
+ v_i = int(round(v_i))
560
+ # Check in-bounds
561
+ if 0 <= u_i < W and 0 <= v_i < H:
562
+ uv.append((u_i, v_i))
563
+ # We'll compute depth as Z in camera coords
564
+ # from the world->cam transform col_img holds
565
+ mat4x4 = np.eye(4)
566
+ mat4x4[:3, :4] = found_img.cam_from_world.matrix()
567
+ p_cam = mat4x4@ np.array([xyz[0], xyz[1], xyz[2], 1.0])
568
+ z_vals.append(p_cam[2] / p_cam[3])
569
+
570
+ uv = np.array(uv, dtype=int) # shape (M,2)
571
+ z_vals = np.array(z_vals) # shape (M,)
572
+
573
+ depth_out = np.zeros((H, W), dtype=np.float32)
574
+ depth_out[uv[:,1], uv[:,0]] = z_vals # Note: uv = (u, v), so row = v, col = u
575
+
576
+ return depth_out, True, found_img
577
+
578
+
579
+ def fit_scale_robust_median(depth, sparse_depth, validity_mask=None):
580
+ """
581
+ Fit a scale factor to the depth map using the median of the ratio of sparse to dense depth.
582
+ """
583
+ if validity_mask is None:
584
+ mask = (sparse_depth != 0)
585
+ else:
586
+ mask = (sparse_depth != 0) & validity_mask
587
+ mask = mask & (depth <50) & (sparse_depth <50)
588
+ X = depth[mask]
589
+ Y = sparse_depth[mask]
590
+ alpha =np.median(Y/X)
591
+ depth_fitted = alpha * depth
592
+ return alpha, depth_fitted
593
+
594
+
595
+ def get_fitted_dense_depth(depth, colmap_rec, img_id, ade20k_seg):
596
+ """
597
+ Gets sparse depth from COLMAP, computes a house mask, fits dense depth to sparse
598
+ depth within the mask, and returns the fitted dense depth.
599
+
600
+ Parameters
601
+ ----------
602
+ depth : np.ndarray
603
+ Initial dense depth map (H, W).
604
+ colmap_rec : pycolmap.Reconstruction
605
+ COLMAP reconstruction data.
606
+ img_id : str
607
+ Identifier for the current image within the COLMAP reconstruction.
608
+ K : np.ndarray
609
+ Camera intrinsic matrix (3x3).
610
+ R : np.ndarray
611
+ Camera rotation matrix (3x3).
612
+ t : np.ndarray
613
+ Camera translation vector (3,).
614
+ ade20k_seg : PIL.Image
615
+ ADE20k segmentation map for the image.
616
+
617
+ Returns
618
+ -------
619
+ depth_fitted : np.ndarray
620
+ Dense depth map scaled and shifted to align with sparse depth within the house mask (H, W).
621
+ depth_sparse : np.ndarray
622
+ The sparse depth map obtained from COLMAP (H, W).
623
+ found_sparse : bool
624
+ True if sparse depth points were found for this image, False otherwise.
625
+ """
626
+ depth_np = np.array(depth) / 1000. # Convert mm to meters if needed
627
+ depth_sparse, found_sparse, col_img = get_sparse_depth(colmap_rec, img_id, depth_np)
628
+
629
+ if not found_sparse:
630
+ print(f'No sparse depth found for image {img_id}')
631
+ # Return original (meter-scaled) depth if no sparse data
632
+ return depth_np, np.zeros_like(depth_np), False, None
633
+
634
+ # Get house mask to focus fitting on relevant areas
635
+ house_mask = get_house_mask(ade20k_seg)
636
+
637
+ # Fit dense depth to sparse depth (scale only), using only points within the house mask
638
+ k, depth_fitted = fit_scale_robust_median(depth_np, depth_sparse, validity_mask=house_mask)
639
+ print(f"Fitted depth scale k={k:.4f} for image {img_id}")
640
+ #depth_fitted = depth_np# * house_mask.astype(np.float32)
641
+ depth_sparse = depth_sparse# * house_mask.astype(np.float32)
642
+ return depth_fitted, depth_sparse, True, col_img
643
+
644
+
645
+ def prune_too_far(all_3d_vertices, connections_3d, colmap_rec, th = 3.0):
646
+ """
647
+ Prune vertices that are too far from sparse point cloud
648
+
649
+ """
650
+ xyz_sfm=[]
651
+ for k, v in colmap_rec.points3D.items():
652
+ xyz_sfm.append(v.xyz)
653
+ xyz_sfm = np.array(xyz_sfm)
654
+ distmat = cdist(all_3d_vertices, xyz_sfm)
655
+ mindist = distmat.min(axis=1)
656
+ mask = mindist <= th
657
+ all_3d_vertices_new = all_3d_vertices[mask]
658
+ old_idx_survived = np.arange(len(all_3d_vertices))[mask]
659
+ new_idxs = np.arange(len(all_3d_vertices_new))
660
+ old_to_new_idx = dict(zip(old_idx_survived, new_idxs))
661
+ connections_3d_new = [(old_to_new_idx[conn[0]], old_to_new_idx[conn[1]]) for conn in connections_3d if mask[conn[0]] and mask[conn[1]]]
662
+ return all_3d_vertices_new, connections_3d_new
663
+
664
+
665
+ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
666
+ """
667
+ Predict 3D wireframe from a dataset entry.
668
+ """
669
+ good_entry = convert_entry_to_human_readable(entry)
670
+ vert_edge_per_image = {}
671
+ for i, (gest, depth, K, R, t, img_id, ade_seg) in enumerate(zip(good_entry['gestalt'],
672
+ good_entry['depth'],
673
+ good_entry['K'],
674
+ good_entry['R'],
675
+ good_entry['t'],
676
+ good_entry['image_ids'],
677
+ good_entry['ade'] # Added ade20k segmentation
678
+ )):
679
+ colmap_rec = good_entry['colmap_binary']
680
+ K = np.array(K)
681
+ R = np.array(R)
682
+ t = np.array(t)
683
+ # Resize gestalt segmentation to match depth map size
684
+ depth_size = (np.array(depth).shape[1], np.array(depth).shape[0]) # W, H
685
+ gest_seg = gest.resize(depth_size)
686
+ gest_seg_np = np.array(gest_seg).astype(np.uint8)
687
+
688
+ # Get 2D vertices and edges first
689
+ vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=15.) # default 10, 15*
690
+
691
+ # Check if we have enough to proceed
692
+ if (len(vertices) < 2) or (len(connections) < 1):
693
+ print(f'Not enough vertices or connections found in image {i}, skipping.')
694
+ vert_edge_per_image[i] = [], [], np.empty((0, 3))
695
+ continue
696
+
697
+ # Call the refactored function to get 3D points
698
+ vertices_3d = create_3d_wireframe_single_image(
699
+ vertices, connections, depth, colmap_rec, img_id, ade_seg
700
+ )
701
+ # Store original 2D vertices, connections, and computed 3D points
702
+ vert_edge_per_image[i] = vertices, connections, vertices_3d
703
+
704
+ # Merge vertices from all images
705
+ all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, th=0.4) # default=0.5, 0.4*
706
+ # tighten the 3D merge radius
707
+
708
+ all_3d_vertices_clean, connections_3d_clean = prune_not_connected(all_3d_vertices, connections_3d, keep_largest=False)
709
+ all_3d_vertices_clean, connections_3d_clean = prune_too_far(all_3d_vertices_clean, connections_3d_clean, colmap_rec, th = 4.0) # default=4.0*
710
+
711
+ if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
712
+ print (f'Not enough vertices or connections in the 3D vertices')
713
+ return empty_solution()
714
+
715
+ return all_3d_vertices_clean, connections_3d_clean
tools2025/hoho2025/hoho.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import shutil
4
+ from pathlib import Path
5
+ from typing import Dict
6
+ import warnings
7
+ import contextlib
8
+ import tempfile
9
+ from PIL import Image
10
+ import io
11
+ import webdataset as wds
12
+ import numpy as np
13
+ import importlib
14
+ import subprocess
15
+
16
+
17
+ from PIL import ImageFile
18
+
19
+ from huggingface_hub.utils._headers import build_hf_headers # note: using _headers
20
+
21
+
22
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
23
+
24
+ LOCAL_DATADIR = None
25
+
26
+ def setup(local_dir='./data/usm-training-data/data'):
27
+
28
+ # If we are in the test environment, we need to link the data directory to the correct location
29
+ tmp_datadir = Path('/tmp/data/data')
30
+ local_test_datadir = Path('./data/usm-test-data-x/data')
31
+ local_val_datadir = Path(local_dir)
32
+
33
+ os.system('pwd')
34
+ os.system('ls -lahtr .')
35
+
36
+ if tmp_datadir.exists() and not local_test_datadir.exists():
37
+ global LOCAL_DATADIR
38
+ LOCAL_DATADIR = local_test_datadir
39
+ # shutil.move(datadir, './usm-test-data-x/data')
40
+ print(f"Linking {tmp_datadir} to {LOCAL_DATADIR} (we are in the test environment)")
41
+ LOCAL_DATADIR.parent.mkdir(parents=True, exist_ok=True)
42
+ LOCAL_DATADIR.symlink_to(tmp_datadir)
43
+ else:
44
+ LOCAL_DATADIR = local_val_datadir
45
+ print(f"Using {LOCAL_DATADIR} as the data directory (we are running locally)")
46
+
47
+ if not LOCAL_DATADIR.exists():
48
+ warnings.warn(f"Data directory {LOCAL_DATADIR} does not exist: creating it...")
49
+ LOCAL_DATADIR.mkdir(parents=True)
50
+
51
+ return LOCAL_DATADIR
52
+
53
+
54
+ def download_package(package_name, path_to_save='packages'):
55
+ """
56
+ Downloads a package using pip and saves it to a specified directory.
57
+
58
+ Parameters:
59
+ package_name (str): The name of the package to download.
60
+ path_to_save (str): The path to the directory where the package will be saved.
61
+ """
62
+ try:
63
+ # pip download webdataset -d packages/webdataset --platform manylinux1_x86_64 --python-version 38 --only-binary=:all:
64
+ subprocess.check_call([subprocess.sys.executable, "-m", "pip", "download", package_name,
65
+ "-d", str(Path(path_to_save)/package_name), # Download the package to the specified directory
66
+ "--platform", "manylinux1_x86_64", # Specify the platform
67
+ "--python-version", "38", # Specify the Python version
68
+ "--only-binary=:all:"]) # Download only binary packages
69
+ print(f'Package "{package_name}" downloaded successfully')
70
+ except subprocess.CalledProcessError as e:
71
+ print(f'Failed to downloaded package "{package_name}". Error: {e}')
72
+
73
+
74
+ def install_package_from_local_file(package_name, folder='packages'):
75
+ """
76
+ Installs a package from a local .whl file or a directory containing .whl files using pip.
77
+
78
+ Parameters:
79
+ path_to_file_or_directory (str): The path to the .whl file or the directory containing .whl files.
80
+ """
81
+ try:
82
+ pth = str(Path(folder) / package_name)
83
+ subprocess.check_call([subprocess.sys.executable, "-m", "pip", "install",
84
+ "--no-index", # Do not use package index
85
+ "--find-links", pth, # Look for packages in the specified directory or at the file
86
+ package_name]) # Specify the package to install
87
+ print(f"Package installed successfully from {pth}")
88
+ except subprocess.CalledProcessError as e:
89
+ print(f"Failed to install package from {pth}. Error: {e}")
90
+
91
+
92
+ def importt(module_name, as_name=None):
93
+ """
94
+ Imports a module and returns it.
95
+
96
+ Parameters:
97
+ module_name (str): The name of the module to import.
98
+ as_name (str): The name to use for the imported module. If None, the original module name will be used.
99
+
100
+ Returns:
101
+ The imported module.
102
+ """
103
+ for _ in range(2):
104
+ try:
105
+ if as_name is None:
106
+ print(f'imported {module_name}')
107
+ return importlib.import_module(module_name)
108
+ else:
109
+ print(f'imported {module_name} as {as_name}')
110
+ return importlib.import_module(module_name, as_name)
111
+ except ModuleNotFoundError as e:
112
+ install_package_from_local_file(module_name)
113
+ print(f"Failed to import module {module_name}. Error: {e}")
114
+
115
+
116
+ def prepare_submission():
117
+ # Download packages from requirements.txt
118
+ if Path('requirements.txt').exists():
119
+ print('downloading packages from requirements.txt')
120
+ Path('packages').mkdir(exist_ok=True)
121
+ with open('requirements.txt') as f:
122
+ packages = f.readlines()
123
+ for p in packages:
124
+ download_package(p.strip())
125
+
126
+ print('all packages downloaded. Don\'t foget to include the packages in the submission by adding them with git lfs.')
127
+
128
+
129
+ def Rt_to_eye_target(im, K, R, t):
130
+ height = im.height
131
+ focal_length = K[0,0]
132
+ fov = 2.0 * np.arctan2((0.5 * height), focal_length) / (np.pi / 180.0)
133
+
134
+ x_axis, y_axis, z_axis = R
135
+
136
+ eye = -(R.T @ t).squeeze()
137
+ z_axis = z_axis.squeeze()
138
+ target = eye + z_axis
139
+ up = -y_axis
140
+
141
+ return eye, target, up, fov
142
+
143
+
144
+ ########## general utilities ##########
145
+
146
+
147
+ @contextlib.contextmanager
148
+ def working_directory(path):
149
+ """Changes working directory and returns to previous on exit."""
150
+ prev_cwd = Path.cwd()
151
+ os.chdir(path)
152
+ try:
153
+ yield
154
+ finally:
155
+ os.chdir(prev_cwd)
156
+
157
+ @contextlib.contextmanager
158
+ def temp_working_directory():
159
+ with tempfile.TemporaryDirectory(dir='.') as D:
160
+ with working_directory(D):
161
+ yield
162
+
163
+
164
+ ############# Dataset #############
165
+ def proc(row, split='train'):
166
+ out = {}
167
+ out['__key__'] = None
168
+ out['__imagekey__'] = []
169
+ for k, v in row.items():
170
+ key_parts = k.split('.')
171
+ colname = key_parts[0]
172
+ if colname == 'ade20k':
173
+ out['__imagekey__'].append(key_parts[1])
174
+ if colname in {'ade20k', 'depthcm', 'gestalt'}:
175
+ if colname in out:
176
+ out[colname].append(v)
177
+ else:
178
+ out[colname] = [v]
179
+ elif colname in {'wireframe', 'mesh'}:
180
+ out.update({a: b for a,b in v.items()})
181
+ elif colname in 'kr':
182
+ out[colname.upper()] = v
183
+ else:
184
+ out[colname] = v
185
+ return Sample(out)
186
+
187
+
188
+
189
+ def decode_colmap(s):
190
+ import hoho2025.read_write_colmap as read_write_colmap
191
+ with temp_working_directory():
192
+
193
+ with open('points3D.bin', 'wb') as stream:
194
+ stream.write(s['points3d'])
195
+
196
+
197
+ with open('cameras.bin', 'wb') as stream:
198
+ stream.write(s['cameras'])
199
+
200
+
201
+ with open('images.bin', 'wb') as stream:
202
+ stream.write(s['images'])
203
+
204
+
205
+ cameras, images, points3D = read_write_colmap.read_model(
206
+ path='.', ext='.bin'
207
+ )
208
+ return cameras, images, points3D
209
+
210
+
211
+ def decode(row):
212
+ cameras, images, points3D = decode_colmap(row)
213
+
214
+ out = {}
215
+
216
+ for k, v in row.items():
217
+ # colname = k.split('.')[0]
218
+ if k in {'ade20k', 'depthcm', 'gestalt'}:
219
+ # print(k, len(v), type(v))
220
+ v = [Image.open(io.BytesIO(im)) for im in v]
221
+ if k in out:
222
+ out[k].extend(v)
223
+ else:
224
+ out[k] = v
225
+ elif k in {'wireframe', 'mesh'}:
226
+ # out.update({a: b.tolist() for a,b in v.items()})
227
+ v = dict(np.load(io.BytesIO(v)))
228
+ out.update({a: b for a,b in v.items()})
229
+ elif k in 'kr':
230
+ out[k.upper()] = v
231
+ elif k == 'cameras':
232
+ out[k] = cameras
233
+ elif k == 'images':
234
+ out[k] = images
235
+ elif k =='points3d':
236
+ out[k] = points3D
237
+ else:
238
+ out[k] = v
239
+
240
+ return Sample(out)
241
+
242
+
243
+ class Sample(Dict):
244
+ def __repr__(self):
245
+ return str({k: v.shape if hasattr(v, 'shape') else [type(v[0])] if isinstance(v, list) else type(v) for k,v in self.items()})
246
+
247
+
248
+
249
+ def get_params():
250
+ exmaple_param_dict = {
251
+ "competition_id": "usm3d/S23DR",
252
+ "competition_type": "script",
253
+ "metric": "custom",
254
+ "token": "hf_**********************************",
255
+ "team_id": "local-test-team_id",
256
+ "submission_id": "local-test-submission_id",
257
+ "submission_id_col": "__key__",
258
+ "submission_cols": [
259
+ "__key__",
260
+ "wf_edges",
261
+ "wf_vertices",
262
+ "edge_semantics"
263
+ ],
264
+ "submission_rows": 180,
265
+ "output_path": ".",
266
+ "submission_repo": "<THE HF MODEL ID of THIS REPO",
267
+ "time_limit": 7200,
268
+ "dataset": "usm3d/usm-test-data-x",
269
+ "submission_filenames": [
270
+ "submission.parquet"
271
+ ]
272
+ }
273
+
274
+ param_path = Path('params.json')
275
+
276
+ if not param_path.exists():
277
+ print('params.json not found (this means we probably aren\'t in the test env). Using example params.')
278
+ params = exmaple_param_dict
279
+ else:
280
+ print('found params.json (this means we are probably in the test env). Using params from file.')
281
+ with param_path.open() as f:
282
+ params = json.load(f)
283
+ print(params)
284
+ return params
285
+
286
+
287
+
288
+
289
+
290
+ SHARD_IDS = {'train': (0, 25), 'val': (25, 26), 'public': (26, 27), 'private': (27, 32)}
291
+ def get_dataset(decode='pil', proc=proc, split='train', dataset_type='webdataset', stream=True):
292
+ if LOCAL_DATADIR is None:
293
+ raise ValueError('LOCAL_DATADIR is not set. Please run setup() first.')
294
+
295
+ local_dir = Path(LOCAL_DATADIR)
296
+ if split != 'all':
297
+ local_dir = local_dir / split
298
+
299
+ paths = [str(p) for p in local_dir.rglob('*.tar.gz')]
300
+ msg = f'no tarfiles found in {local_dir}.'
301
+ if len(paths) == 0:
302
+ if stream:
303
+ if split=='all': split = 'train'
304
+ warnings.warn('streaming isn\'t using with \'all\': changing `split` to \'train\'')
305
+ warnings.warn(msg)
306
+ if split == 'val':
307
+ names = [f'data/val/inputs/hoho_v3_{i:03}-of-032.tar.gz' for i in range(*SHARD_IDS[split])]
308
+ elif split == 'train':
309
+ names = [f'data/train/hoho_v3_{i:03}-of-032.tar.gz' for i in range(*SHARD_IDS[split])]
310
+
311
+ auth = build_hf_headers()['authorization']
312
+ paths = [f"pipe:curl -L -s https://huggingface.co/datasets/usm3d/hoho-train-set/resolve/main/{name} -H 'Authorization: {auth}'" for name in names]
313
+ else:
314
+ raise FileNotFoundError(msg)
315
+
316
+ dataset = wds.WebDataset(paths)
317
+
318
+ if decode is not None:
319
+ dataset = dataset.decode(decode)
320
+ else:
321
+ dataset = dataset.decode()
322
+
323
+ dataset = dataset.map(proc)
324
+
325
+ if dataset_type == 'webdataset':
326
+ return dataset
327
+
328
+ if dataset_type == 'hf':
329
+ import datasets
330
+ from datasets import Features, Value, Sequence, Image, Array2D
331
+
332
+ if split == 'train':
333
+ return datasets.IterableDataset.from_generator(lambda: dataset.iterator())
334
+ elif split == 'val':
335
+ return datasets.IterableDataset.from_generator(lambda: dataset.iterator())
336
+ else:
337
+ raise NotImplementedError('only train and val are implemented as hf datasets')
338
+
339
+
340
+
tools2025/hoho2025/metric_helper.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from scipy.spatial.distance import cdist
3
+ from scipy.optimize import linear_sum_assignment
4
+ import torch
5
+ import trimesh
6
+ from time import time
7
+
8
+ MAX_SCORE = 1.0
9
+
10
+ def get_one_primitive(p1, p2, c=(255, 0, 0), radius=25, primitive_type='cylinder', sections=6):
11
+ if len(c) == 1:
12
+ c = [c[0]] * 4
13
+ elif len(c) == 3:
14
+ c = [*c, 255]
15
+ elif len(c) != 4:
16
+ raise ValueError(f'{c} is not a valid color (must have 1,3, or 4 elements).')
17
+
18
+ p1, p2 = np.asarray(p1), np.asarray(p2)
19
+ l = np.linalg.norm(p2 - p1)
20
+
21
+ # Add check for zero-length edges
22
+ if l < 1e-6:
23
+ return None
24
+
25
+ direction = (p2 - p1) / l
26
+
27
+ T = np.eye(4)
28
+ T[:3, 2] = direction
29
+ T[:3, 3] = (p1 + p2) / 2
30
+
31
+ b0, b1 = T[:3, 0], T[:3, 1]
32
+ if np.abs(np.dot(b0, direction)) < np.abs(np.dot(b1, direction)):
33
+ T[:3, 1] = -np.cross(b0, direction)
34
+ else:
35
+ T[:3, 0] = np.cross(b1, direction)
36
+
37
+ if primitive_type == 'capsule':
38
+ mesh = trimesh.primitives.Capsule(radius=radius, height=l, transform=T, sections=sections)
39
+ elif primitive_type == 'cylinder':
40
+ mesh = trimesh.primitives.Cylinder(radius=radius, height=l, transform=T, sections=sections)
41
+ else:
42
+ raise ValueError("Unknown primitive!")
43
+
44
+ # Add vertex color initialization check
45
+ if not hasattr(mesh.visual, 'vertex_colors') or mesh.visual.vertex_colors is None:
46
+ mesh.visual.vertex_colors = np.ones((len(mesh.vertices), 4)) * 255
47
+
48
+ mesh.visual.vertex_colors = np.ones_like(mesh.visual.vertex_colors) * c
49
+ return mesh
50
+
51
+ def get_primitives(vertices, edges, radius=25, c=[255, 0, 0]):
52
+ # Convert vertices to a NumPy array
53
+ if isinstance(vertices, torch.Tensor):
54
+ vertices = vertices.detach().cpu().numpy()
55
+ else:
56
+ vertices = np.asarray(vertices)
57
+
58
+ # Convert edges to a NumPy array of integers
59
+ if isinstance(edges, torch.Tensor):
60
+ edges = edges.detach().cpu().numpy().astype(np.int64)
61
+ else:
62
+ edges = np.asarray(edges, dtype=np.int64)
63
+
64
+ primitives = []
65
+ for e in edges:
66
+ # Add edge validation
67
+ if e[0] >= len(vertices) or e[1] >= len(vertices):
68
+ continue
69
+ primitive = get_one_primitive(vertices[e[0]], vertices[e[1]], radius=radius, c=c)
70
+ if primitive is not None:
71
+ primitives.append(primitive)
72
+ return primitives
73
+
74
+
75
+
76
+ def compute_mesh_iou_VOLUME(pd_vertices, pd_edges, gt_vertices, gt_edges, radius=20, engine='manifold'):
77
+ # check empty
78
+ if len(pd_edges) == 0 or len(gt_edges) == 0:
79
+ return 0.0
80
+
81
+ pd_vertices = pd_vertices.detach().cpu() if isinstance(pd_vertices, torch.Tensor) else pd_vertices
82
+ pd_edges = pd_edges.detach().cpu() if isinstance(pd_edges, torch.Tensor) else pd_edges
83
+ gt_vertices = gt_vertices.detach().cpu() if isinstance(gt_vertices, torch.Tensor) else gt_vertices
84
+ gt_edges = gt_edges.detach().cpu() if isinstance(gt_edges, torch.Tensor) else gt_edges
85
+
86
+ pd_primitives = get_primitives(pd_vertices, pd_edges, radius=radius, c=[0, 255, 0])
87
+ gt_primitives = get_primitives(gt_vertices, gt_edges, radius=radius, c=[255, 0, 0])
88
+ # check for empty primitives
89
+ if not pd_primitives or not gt_primitives:
90
+ return 0.0
91
+
92
+ # Add bounding box check to detect non-overlapping cases quickly
93
+ pd_bounds = np.array([p.bounds for p in pd_primitives])
94
+ gt_bounds = np.array([p.bounds for p in gt_primitives])
95
+
96
+ pd_min, pd_max = np.min(pd_bounds[:, 0], axis=0), np.max(pd_bounds[:, 1], axis=0)
97
+ gt_min, gt_max = np.min(gt_bounds[:, 0], axis=0), np.max(gt_bounds[:, 1], axis=0)
98
+
99
+ # If bounding boxes don't overlap, return 0
100
+ if np.any(pd_max < gt_min) or np.any(pd_min > gt_max):
101
+ return 0.0
102
+ t=time()
103
+ mesh_pred = trimesh.boolean.union(pd_primitives, engine=engine)
104
+ #print(f"mesh_pred union: {time() - t} {mesh_pred.is_volume}")
105
+ t=time()
106
+ mesh_gt= trimesh.boolean.union(gt_primitives, engine=engine)
107
+ #print(f"mesh_gt union: {time() - t} {mesh_gt.is_volume}")
108
+
109
+ if mesh_pred.is_volume and mesh_gt.is_volume:
110
+ t=time()
111
+ inter_volume = trimesh.boolean.intersection([mesh_pred, mesh_gt], engine=engine).volume
112
+ #print(f"inter_volume: {time() - t}")
113
+ else:
114
+ all_inter = []
115
+ t=time()
116
+ for pd_prim in pd_primitives:
117
+ pd_min, pd_max = pd_prim.bounds
118
+ for gt_prim in gt_primitives:
119
+ # Skip intersection calculation if bounding boxes don't overlap
120
+ gt_min, gt_max = gt_prim.bounds
121
+ if np.any(pd_max < gt_min) or np.any(pd_min > gt_max):
122
+ continue
123
+ inter = trimesh.boolean.intersection([pd_prim, gt_prim], engine=engine)
124
+ if inter.is_volume and inter.volume > 0:
125
+ all_inter.append(inter)
126
+ inter_volume = trimesh.boolean.union(all_inter, engine=engine).volume if all_inter else 0
127
+ #print(f"all_inter: {time() - t}")
128
+ union_volume = mesh_pred.volume + mesh_gt.volume - inter_volume
129
+
130
+ return inter_volume / union_volume if union_volume > 0 else 0.0
131
+
132
+
133
+ # ----------------- Corner F1 -----------------
134
+ def compute_ap_metrics(pd_vertices, gt_vertices, thresh=25):
135
+ if len(pd_vertices) == 0 or len(gt_vertices) == 0:
136
+ return 0.0
137
+
138
+ dists = cdist(pd_vertices, gt_vertices)
139
+ row_ind, col_ind = linear_sum_assignment(dists)
140
+
141
+ tp = (dists[row_ind, col_ind] <= thresh).sum()
142
+ precision = tp / len(pd_vertices) if len(pd_vertices) > 0 else 0
143
+ recall = tp / len(gt_vertices) if len(gt_vertices) > 0 else 0
144
+ denom = precision + recall
145
+ f1 = (2 * precision * recall / denom) if denom > 0 else 0.0
146
+ return f1
147
+
148
+ def batch_corner_f1(X, Y, distance_thresh=25):
149
+ results = []
150
+ for (pd_v, _), (gt_v, _) in zip(X, Y):
151
+ results.append(compute_ap_metrics(pd_v, gt_v, thresh=distance_thresh))
152
+ return np.array(results)
153
+
154
+ # ----------------- HSS Metric -----------------
155
+ from collections import namedtuple
156
+ HSSReturnType = namedtuple('HSSReturnType', ['hss', 'f1', 'iou'])
157
+ def hss(y_hat_v, y_hat_e, y_v, y_e, vert_thresh=0.5, edge_thresh=0.5):
158
+ X = [(y_hat_v, y_hat_e)]
159
+ Y = [(y_v, y_e)]
160
+ t=time()
161
+ f1 = np.clip(batch_corner_f1(X, Y, distance_thresh=vert_thresh)[0], 0, 1)
162
+ #print(f"f1 {f1}: in {time() - t:.2f} sec")
163
+ t=time()
164
+ IoU = np.clip(compute_mesh_iou_VOLUME(y_hat_v, y_hat_e, y_v, y_e, radius=edge_thresh), 0, 1)
165
+ #print(f"IoU: {IoU} in {time() - t:.2f} sec")
166
+ score = 2 * f1 * IoU / (f1 + IoU) if (f1 + IoU) > 0 else 0.0
167
+ return HSSReturnType(hss=score, f1=f1, iou=IoU)
tools2025/hoho2025/read_write_colmap.py ADDED
@@ -0,0 +1,488 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Modified to read from bytes-like object by Dmytro Mishkin.
2
+ # The original license is below:
3
+ # Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
4
+ # All rights reserved.
5
+ #
6
+ # Redistribution and use in source and binary forms, with or without
7
+ # modification, are permitted provided that the following conditions are met:
8
+ #
9
+ # * Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ #
12
+ # * Redistributions in binary form must reproduce the above copyright
13
+ # notice, this list of conditions and the following disclaimer in the
14
+ # documentation and/or other materials provided with the distribution.
15
+ #
16
+ # * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
17
+ # its contributors may be used to endorse or promote products derived
18
+ # from this software without specific prior written permission.
19
+ #
20
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
+ # POSSIBILITY OF SUCH DAMAGE.
31
+ #
32
+ # Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
33
+
34
+ import os
35
+ import collections
36
+ import numpy as np
37
+ import struct
38
+ import argparse
39
+
40
+
41
+ CameraModel = collections.namedtuple(
42
+ "CameraModel", ["model_id", "model_name", "num_params"])
43
+ Camera = collections.namedtuple(
44
+ "Camera", ["id", "model", "width", "height", "params"])
45
+ BaseImage = collections.namedtuple(
46
+ "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
47
+ Point3D = collections.namedtuple(
48
+ "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
49
+
50
+
51
+ class Image(BaseImage):
52
+ def qvec2rotmat(self):
53
+ return qvec2rotmat(self.qvec)
54
+
55
+
56
+ CAMERA_MODELS = {
57
+ CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
58
+ CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
59
+ CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
60
+ CameraModel(model_id=3, model_name="RADIAL", num_params=5),
61
+ CameraModel(model_id=4, model_name="OPENCV", num_params=8),
62
+ CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
63
+ CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
64
+ CameraModel(model_id=7, model_name="FOV", num_params=5),
65
+ CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
66
+ CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
67
+ CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
68
+ }
69
+ CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
70
+ for camera_model in CAMERA_MODELS])
71
+ CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
72
+ for camera_model in CAMERA_MODELS])
73
+
74
+
75
+ def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
76
+ """Read and unpack the next bytes from a binary file.
77
+ :param fid:
78
+ :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
79
+ :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
80
+ :param endian_character: Any of {@, =, <, >, !}
81
+ :return: Tuple of read and unpacked values.
82
+ """
83
+ data = fid.read(num_bytes)
84
+ return struct.unpack(endian_character + format_char_sequence, data)
85
+
86
+
87
+ def write_next_bytes(fid, data, format_char_sequence, endian_character="<"):
88
+ """pack and write to a binary file.
89
+ :param fid:
90
+ :param data: data to send, if multiple elements are sent at the same time,
91
+ they should be encapsuled either in a list or a tuple
92
+ :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
93
+ should be the same length as the data list or tuple
94
+ :param endian_character: Any of {@, =, <, >, !}
95
+ """
96
+ if isinstance(data, (list, tuple)):
97
+ bytes = struct.pack(endian_character + format_char_sequence, *data)
98
+ else:
99
+ bytes = struct.pack(endian_character + format_char_sequence, data)
100
+ fid.write(bytes)
101
+
102
+
103
+ def read_cameras_text(path):
104
+ """
105
+ see: src/base/reconstruction.cc
106
+ void Reconstruction::WriteCamerasText(const std::string& path)
107
+ void Reconstruction::ReadCamerasText(const std::string& path)
108
+ """
109
+ cameras = {}
110
+ with open(path, "r") as fid:
111
+ while True:
112
+ line = fid.readline()
113
+ if not line:
114
+ break
115
+ line = line.strip()
116
+ if len(line) > 0 and line[0] != "#":
117
+ elems = line.split()
118
+ camera_id = int(elems[0])
119
+ model = elems[1]
120
+ width = int(elems[2])
121
+ height = int(elems[3])
122
+ params = np.array(tuple(map(float, elems[4:])))
123
+ cameras[camera_id] = Camera(id=camera_id, model=model,
124
+ width=width, height=height,
125
+ params=params)
126
+ return cameras
127
+
128
+
129
+ def read_cameras_binary(path_to_model_file=None, fid=None):
130
+ """
131
+ see: src/base/reconstruction.cc
132
+ void Reconstruction::WriteCamerasBinary(const std::string& path)
133
+ void Reconstruction::ReadCamerasBinary(const std::string& path)
134
+ """
135
+ cameras = {}
136
+ if fid is None:
137
+ fid = open(path_to_model_file, "rb")
138
+ num_cameras = read_next_bytes(fid, 8, "Q")[0]
139
+ for _ in range(num_cameras):
140
+ camera_properties = read_next_bytes(
141
+ fid, num_bytes=24, format_char_sequence="iiQQ")
142
+ camera_id = camera_properties[0]
143
+ model_id = camera_properties[1]
144
+ model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
145
+ width = camera_properties[2]
146
+ height = camera_properties[3]
147
+ num_params = CAMERA_MODEL_IDS[model_id].num_params
148
+ params = read_next_bytes(fid, num_bytes=8*num_params,
149
+ format_char_sequence="d"*num_params)
150
+ cameras[camera_id] = Camera(id=camera_id,
151
+ model=model_name,
152
+ width=width,
153
+ height=height,
154
+ params=np.array(params))
155
+ assert len(cameras) == num_cameras
156
+ if path_to_model_file is not None:
157
+ fid.close()
158
+ return cameras
159
+
160
+
161
+ def write_cameras_text(cameras, path):
162
+ """
163
+ see: src/base/reconstruction.cc
164
+ void Reconstruction::WriteCamerasText(const std::string& path)
165
+ void Reconstruction::ReadCamerasText(const std::string& path)
166
+ """
167
+ HEADER = "# Camera list with one line of data per camera:\n" + \
168
+ "# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n" + \
169
+ "# Number of cameras: {}\n".format(len(cameras))
170
+ with open(path, "w") as fid:
171
+ fid.write(HEADER)
172
+ for _, cam in cameras.items():
173
+ to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
174
+ line = " ".join([str(elem) for elem in to_write])
175
+ fid.write(line + "\n")
176
+
177
+
178
+ def write_cameras_binary(cameras, path_to_model_file):
179
+ """
180
+ see: src/base/reconstruction.cc
181
+ void Reconstruction::WriteCamerasBinary(const std::string& path)
182
+ void Reconstruction::ReadCamerasBinary(const std::string& path)
183
+ """
184
+ with open(path_to_model_file, "wb") as fid:
185
+ write_next_bytes(fid, len(cameras), "Q")
186
+ for _, cam in cameras.items():
187
+ model_id = CAMERA_MODEL_NAMES[cam.model].model_id
188
+ camera_properties = [cam.id,
189
+ model_id,
190
+ cam.width,
191
+ cam.height]
192
+ write_next_bytes(fid, camera_properties, "iiQQ")
193
+ for p in cam.params:
194
+ write_next_bytes(fid, float(p), "d")
195
+ return cameras
196
+
197
+
198
+ def read_images_text(path):
199
+ """
200
+ see: src/base/reconstruction.cc
201
+ void Reconstruction::ReadImagesText(const std::string& path)
202
+ void Reconstruction::WriteImagesText(const std::string& path)
203
+ """
204
+ images = {}
205
+ with open(path, "r") as fid:
206
+ while True:
207
+ line = fid.readline()
208
+ if not line:
209
+ break
210
+ line = line.strip()
211
+ if len(line) > 0 and line[0] != "#":
212
+ elems = line.split()
213
+ image_id = int(elems[0])
214
+ qvec = np.array(tuple(map(float, elems[1:5])))
215
+ tvec = np.array(tuple(map(float, elems[5:8])))
216
+ camera_id = int(elems[8])
217
+ image_name = elems[9]
218
+ elems = fid.readline().split()
219
+ xys = np.column_stack([tuple(map(float, elems[0::3])),
220
+ tuple(map(float, elems[1::3]))])
221
+ point3D_ids = np.array(tuple(map(int, elems[2::3])))
222
+ images[image_id] = Image(
223
+ id=image_id, qvec=qvec, tvec=tvec,
224
+ camera_id=camera_id, name=image_name,
225
+ xys=xys, point3D_ids=point3D_ids)
226
+ return images
227
+
228
+
229
+ def read_images_binary(path_to_model_file=None, fid=None):
230
+ """
231
+ see: src/base/reconstruction.cc
232
+ void Reconstruction::ReadImagesBinary(const std::string& path)
233
+ void Reconstruction::WriteImagesBinary(const std::string& path)
234
+ """
235
+ images = {}
236
+ if fid is None:
237
+ fid = open(path_to_model_file, "rb")
238
+ num_reg_images = read_next_bytes(fid, 8, "Q")[0]
239
+ for _ in range(num_reg_images):
240
+ binary_image_properties = read_next_bytes(
241
+ fid, num_bytes=64, format_char_sequence="idddddddi")
242
+ image_id = binary_image_properties[0]
243
+ qvec = np.array(binary_image_properties[1:5])
244
+ tvec = np.array(binary_image_properties[5:8])
245
+ camera_id = binary_image_properties[8]
246
+ image_name = ""
247
+ current_char = read_next_bytes(fid, 1, "c")[0]
248
+ while current_char != b"\x00": # look for the ASCII 0 entry
249
+ image_name += current_char.decode("utf-8")
250
+ current_char = read_next_bytes(fid, 1, "c")[0]
251
+ num_points2D = read_next_bytes(fid, num_bytes=8,
252
+ format_char_sequence="Q")[0]
253
+ x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
254
+ format_char_sequence="ddq"*num_points2D)
255
+ xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
256
+ tuple(map(float, x_y_id_s[1::3]))])
257
+ point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
258
+ images[image_id] = Image(
259
+ id=image_id, qvec=qvec, tvec=tvec,
260
+ camera_id=camera_id, name=image_name,
261
+ xys=xys, point3D_ids=point3D_ids)
262
+ if path_to_model_file is not None:
263
+ fid.close()
264
+ return images
265
+
266
+
267
+ def write_images_text(images, path):
268
+ """
269
+ see: src/base/reconstruction.cc
270
+ void Reconstruction::ReadImagesText(const std::string& path)
271
+ void Reconstruction::WriteImagesText(const std::string& path)
272
+ """
273
+ if len(images) == 0:
274
+ mean_observations = 0
275
+ else:
276
+ mean_observations = sum((len(img.point3D_ids) for _, img in images.items()))/len(images)
277
+ HEADER = "# Image list with two lines of data per image:\n" + \
278
+ "# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n" + \
279
+ "# POINTS2D[] as (X, Y, POINT3D_ID)\n" + \
280
+ "# Number of images: {}, mean observations per image: {}\n".format(len(images), mean_observations)
281
+
282
+ with open(path, "w") as fid:
283
+ fid.write(HEADER)
284
+ for _, img in images.items():
285
+ image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
286
+ first_line = " ".join(map(str, image_header))
287
+ fid.write(first_line + "\n")
288
+
289
+ points_strings = []
290
+ for xy, point3D_id in zip(img.xys, img.point3D_ids):
291
+ points_strings.append(" ".join(map(str, [*xy, point3D_id])))
292
+ fid.write(" ".join(points_strings) + "\n")
293
+
294
+
295
+ def write_images_binary(images, path_to_model_file):
296
+ """
297
+ see: src/base/reconstruction.cc
298
+ void Reconstruction::ReadImagesBinary(const std::string& path)
299
+ void Reconstruction::WriteImagesBinary(const std::string& path)
300
+ """
301
+ with open(path_to_model_file, "wb") as fid:
302
+ write_next_bytes(fid, len(images), "Q")
303
+ for _, img in images.items():
304
+ write_next_bytes(fid, img.id, "i")
305
+ write_next_bytes(fid, img.qvec.tolist(), "dddd")
306
+ write_next_bytes(fid, img.tvec.tolist(), "ddd")
307
+ write_next_bytes(fid, img.camera_id, "i")
308
+ for char in img.name:
309
+ write_next_bytes(fid, char.encode("utf-8"), "c")
310
+ write_next_bytes(fid, b"\x00", "c")
311
+ write_next_bytes(fid, len(img.point3D_ids), "Q")
312
+ for xy, p3d_id in zip(img.xys, img.point3D_ids):
313
+ write_next_bytes(fid, [*xy, p3d_id], "ddq")
314
+
315
+
316
+ def read_points3D_text(path):
317
+ """
318
+ see: src/base/reconstruction.cc
319
+ void Reconstruction::ReadPoints3DText(const std::string& path)
320
+ void Reconstruction::WritePoints3DText(const std::string& path)
321
+ """
322
+ points3D = {}
323
+ with open(path, "r") as fid:
324
+ while True:
325
+ line = fid.readline()
326
+ if not line:
327
+ break
328
+ line = line.strip()
329
+ if len(line) > 0 and line[0] != "#":
330
+ elems = line.split()
331
+ point3D_id = int(elems[0])
332
+ xyz = np.array(tuple(map(float, elems[1:4])))
333
+ rgb = np.array(tuple(map(int, elems[4:7])))
334
+ error = float(elems[7])
335
+ image_ids = np.array(tuple(map(int, elems[8::2])))
336
+ point2D_idxs = np.array(tuple(map(int, elems[9::2])))
337
+ points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb,
338
+ error=error, image_ids=image_ids,
339
+ point2D_idxs=point2D_idxs)
340
+ return points3D
341
+
342
+
343
+ def read_points3D_binary(path_to_model_file=None, fid=None):
344
+ """
345
+ see: src/base/reconstruction.cc
346
+ void Reconstruction::ReadPoints3DBinary(const std::string& path)
347
+ void Reconstruction::WritePoints3DBinary(const std::string& path)
348
+ """
349
+ points3D = {}
350
+ if fid is None:
351
+ fid = open(path_to_model_file, "rb")
352
+ num_points = read_next_bytes(fid, 8, "Q")[0]
353
+ for _ in range(num_points):
354
+ binary_point_line_properties = read_next_bytes(
355
+ fid, num_bytes=43, format_char_sequence="QdddBBBd")
356
+ point3D_id = binary_point_line_properties[0]
357
+ xyz = np.array(binary_point_line_properties[1:4])
358
+ rgb = np.array(binary_point_line_properties[4:7])
359
+ error = np.array(binary_point_line_properties[7])
360
+ track_length = read_next_bytes(
361
+ fid, num_bytes=8, format_char_sequence="Q")[0]
362
+ track_elems = read_next_bytes(
363
+ fid, num_bytes=8*track_length,
364
+ format_char_sequence="ii"*track_length)
365
+ image_ids = np.array(tuple(map(int, track_elems[0::2])))
366
+ point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
367
+ points3D[point3D_id] = Point3D(
368
+ id=point3D_id, xyz=xyz, rgb=rgb,
369
+ error=error, image_ids=image_ids,
370
+ point2D_idxs=point2D_idxs)
371
+ if path_to_model_file is not None:
372
+ fid.close()
373
+ return points3D
374
+
375
+
376
+ def write_points3D_text(points3D, path):
377
+ """
378
+ see: src/base/reconstruction.cc
379
+ void Reconstruction::ReadPoints3DText(const std::string& path)
380
+ void Reconstruction::WritePoints3DText(const std::string& path)
381
+ """
382
+ if len(points3D) == 0:
383
+ mean_track_length = 0
384
+ else:
385
+ mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items()))/len(points3D)
386
+ HEADER = "# 3D point list with one line of data per point:\n" + \
387
+ "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" + \
388
+ "# Number of points: {}, mean track length: {}\n".format(len(points3D), mean_track_length)
389
+
390
+ with open(path, "w") as fid:
391
+ fid.write(HEADER)
392
+ for _, pt in points3D.items():
393
+ point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
394
+ fid.write(" ".join(map(str, point_header)) + " ")
395
+ track_strings = []
396
+ for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
397
+ track_strings.append(" ".join(map(str, [image_id, point2D])))
398
+ fid.write(" ".join(track_strings) + "\n")
399
+
400
+
401
+ def write_points3D_binary(points3D, path_to_model_file):
402
+ """
403
+ see: src/base/reconstruction.cc
404
+ void Reconstruction::ReadPoints3DBinary(const std::string& path)
405
+ void Reconstruction::WritePoints3DBinary(const std::string& path)
406
+ """
407
+ with open(path_to_model_file, "wb") as fid:
408
+ write_next_bytes(fid, len(points3D), "Q")
409
+ for _, pt in points3D.items():
410
+ write_next_bytes(fid, pt.id, "Q")
411
+ write_next_bytes(fid, pt.xyz.tolist(), "ddd")
412
+ write_next_bytes(fid, pt.rgb.tolist(), "BBB")
413
+ write_next_bytes(fid, pt.error, "d")
414
+ track_length = pt.image_ids.shape[0]
415
+ write_next_bytes(fid, track_length, "Q")
416
+ for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
417
+ write_next_bytes(fid, [image_id, point2D_id], "ii")
418
+
419
+
420
+ def detect_model_format(path, ext):
421
+ if os.path.isfile(os.path.join(path, "cameras" + ext)) and \
422
+ os.path.isfile(os.path.join(path, "images" + ext)) and \
423
+ os.path.isfile(os.path.join(path, "points3D" + ext)):
424
+ print("Detected model format: '" + ext + "'")
425
+ return True
426
+
427
+ return False
428
+
429
+
430
+ def read_model(path, ext=""):
431
+ # try to detect the extension automatically
432
+ if ext == "":
433
+ if detect_model_format(path, ".bin"):
434
+ ext = ".bin"
435
+ elif detect_model_format(path, ".txt"):
436
+ ext = ".txt"
437
+ else:
438
+ print("Provide model format: '.bin' or '.txt'")
439
+ return
440
+
441
+ if ext == ".txt":
442
+ cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
443
+ images = read_images_text(os.path.join(path, "images" + ext))
444
+ points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
445
+ else:
446
+ cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
447
+ images = read_images_binary(os.path.join(path, "images" + ext))
448
+ points3D = read_points3D_binary(os.path.join(path, "points3D") + ext)
449
+ return cameras, images, points3D
450
+
451
+
452
+ def write_model(cameras, images, points3D, path, ext=".bin"):
453
+ if ext == ".txt":
454
+ write_cameras_text(cameras, os.path.join(path, "cameras" + ext))
455
+ write_images_text(images, os.path.join(path, "images" + ext))
456
+ write_points3D_text(points3D, os.path.join(path, "points3D") + ext)
457
+ else:
458
+ write_cameras_binary(cameras, os.path.join(path, "cameras" + ext))
459
+ write_images_binary(images, os.path.join(path, "images" + ext))
460
+ write_points3D_binary(points3D, os.path.join(path, "points3D") + ext)
461
+ return cameras, images, points3D
462
+
463
+
464
+ def qvec2rotmat(qvec):
465
+ return np.array([
466
+ [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
467
+ 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
468
+ 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
469
+ [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
470
+ 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
471
+ 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
472
+ [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
473
+ 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
474
+ 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
475
+
476
+
477
+ def rotmat2qvec(R):
478
+ Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
479
+ K = np.array([
480
+ [Rxx - Ryy - Rzz, 0, 0, 0],
481
+ [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
482
+ [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
483
+ [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
484
+ eigvals, eigvecs = np.linalg.eigh(K)
485
+ qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
486
+ if qvec[0] < 0:
487
+ qvec *= -1
488
+ return qvec
tools2025/hoho2025/vis.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import matplotlib.pyplot as plt
3
+ import trimesh
4
+ import numpy as np
5
+ from copy import deepcopy
6
+ from PIL import Image
7
+
8
+ from . import color_mappings
9
+
10
+
11
+ def plot_all_modalities(ds_entry, figsize=(8, 15)):
12
+ modalities_to_plot = ['images', 'depth', 'gestalt', 'ade']
13
+ modalities_in_entry = [k for k in ds_entry.keys() if k in modalities_to_plot and len(ds_entry[k]) > 0]
14
+ number_of_columns = len(modalities_in_entry)
15
+ number_of_images = len(ds_entry['image_ids'])
16
+ number_of_rows = number_of_images
17
+ fig, axes = plt.subplots(number_of_rows, number_of_columns, figsize=figsize)
18
+ for i in range(len(ds_entry[modalities_in_entry[0]])):
19
+ for j, modality in enumerate(modalities_in_entry):
20
+ ax = axes[i, j]
21
+ if modality == 'image':
22
+ ax.imshow(ds_entry[modality][i])
23
+ elif modality == 'depth':
24
+ depth_image = np.array(ds_entry[modality][i])/1000.0
25
+ ax.imshow(depth_image, cmap='rainbow')
26
+ elif modality == 'gestalt':
27
+ ax.imshow(ds_entry[modality][i])
28
+ elif modality == 'ade':
29
+ ax.imshow(ds_entry[modality][i])
30
+ else:
31
+ raise ValueError(f"Unknown modality: {modality}")
32
+ if i == 0:
33
+ ax.set_title(modality)
34
+ ax.axis('off')
35
+ if j == 0:
36
+ ax.set_ylabel(f"Image {i}")
37
+ fig.tight_layout()
38
+ fig.subplots_adjust(wspace=0.05, hspace=0.01)
39
+ #plt.show()
40
+ return fig, axes
41
+
42
+
43
+ def line(p1, p2, c=(255,0,0), resolution=10, radius=0.05):
44
+ '''draws a 3d cylinder along the line (p1, p2)'''
45
+ # check colors
46
+ if len(c) == 1:
47
+ c = [c[0]]*4
48
+ elif len(c) == 3:
49
+ c = [*c, 255]
50
+ elif len(c) != 4:
51
+ raise ValueError(f'{c} is not a valid color (must have 1,3, or 4 elements).')
52
+
53
+ # compute length and direction of segment
54
+ p1, p2 = np.asarray(p1), np.asarray(p2)
55
+ l = np.linalg.norm(p2-p1)
56
+
57
+ direction = (p2 - p1) / l
58
+
59
+ # point z along direction of segment
60
+ T = np.eye(4)
61
+ T[:3, 2] = direction
62
+ T[:3, 3] = (p1+p2)/2
63
+
64
+ #reorthogonalize basis
65
+ b0, b1 = T[:3, 0], T[:3, 1]
66
+ if np.abs(np.dot(b0, direction)) < np.abs(np.dot(b1, direction)):
67
+ T[:3, 1] = -np.cross(b0, direction)
68
+ else:
69
+ T[:3, 0] = np.cross(b1, direction)
70
+
71
+ # generate and transform mesh
72
+ mesh = trimesh.primitives.Cylinder(radius=radius, height=l, transform=T)
73
+
74
+ # apply uniform color
75
+ mesh.visual.vertex_colors = np.ones_like(mesh.visual.vertex_colors)*c
76
+
77
+ return mesh
78
+
79
+ def show_wf(row, radius=10, show_vertices=False, vertex_color=(255,0,0, 255)):
80
+ EDGE_CLASSES = ['eave',
81
+ 'ridge',
82
+ 'step_flashing',
83
+ 'rake',
84
+ 'flashing',
85
+ 'post',
86
+ 'valley',
87
+ 'hip',
88
+ 'transition_line']
89
+ out_meshes = []
90
+ if show_vertices:
91
+ out_meshes.extend([trimesh.primitives.Sphere(radius=radius+5, center = center, color=vertex_color) for center in row['wf_vertices']])
92
+ for m in out_meshes:
93
+ m.visual.vertex_colors = np.ones_like(m.visual.vertex_colors)*vertex_color
94
+ if 'edge_semantics' not in row:
95
+ print ("Warning: edge semantics is not here, skipping")
96
+ out_meshes.extend([line(a,b, radius=radius, c=(214, 251, 248)) for a,b in np.stack([*row['wf_vertices']])[np.stack(row['wf_edges'])]])
97
+ elif len(np.stack(row['wf_edges'])) == len(row['edge_semantics']):
98
+ out_meshes.extend([line(a,b, radius=radius, c=color_mappings.gestalt_color_mapping[EDGE_CLASSES[cls_id]]) for (a,b), cls_id in zip(np.stack([*row['wf_vertices']])[np.stack(row['wf_edges'])], row['edge_semantics'])])
99
+ else:
100
+ print ("Warning: edge semantics has different length compared to edges, skipping semantics")
101
+ out_meshes.extend([line(a,b, radius=radius, c=(214, 251, 248)) for a,b in np.stack([*row['wf_vertices']])[np.stack(row['wf_edges'])]])
102
+ return out_meshes
103
+ # return [line(a,b, radius=radius, c=color_mappings.edge_colors[cls_id]) for (a,b), cls_id in zip(np.stack([*row['wf_vertices']])[np.stack(row['wf_edges'])], row['edge_semantics'])]
104
+
105
+
106
+ def show_grid(edges, meshes=None, row_length=5):
107
+ '''
108
+ edges: list of list of meshes
109
+ meshes: optional corresponding list of meshes
110
+ row_length: number of meshes per row
111
+
112
+ returns trimesh.Scene()
113
+ '''
114
+
115
+ T = np.eye(4)
116
+ out = []
117
+ edges = [sum(e[1:], e[0]) for e in edges]
118
+ row_height = 1.1 * max((e.extents for e in edges), key=lambda e: e[1])[1]
119
+ col_width = 1.1 * max((e.extents for e in edges), key=lambda e: e[0])[0]
120
+ # print(row_height, col_width)
121
+
122
+ if meshes is None:
123
+ meshes = [None]*len(edges)
124
+
125
+ for i, (gt, mesh) in enumerate(zip(edges, meshes), start=0):
126
+ mesh = deepcopy(mesh)
127
+ gt = deepcopy(gt)
128
+
129
+ if i%row_length != 0:
130
+ T[0, 3] += col_width
131
+
132
+ else:
133
+ T[0, 3] = 0
134
+ T[1, 3] += row_height
135
+
136
+ # print(T[0,3]/col_width, T[2,3]/row_height)
137
+
138
+ if mesh is not None:
139
+ mesh.apply_transform(T)
140
+ out.append(mesh)
141
+
142
+ gt.apply_transform(T)
143
+ out.append(gt)
144
+
145
+
146
+ out.extend([mesh, gt])
147
+
148
+
149
+ return trimesh.Scene(out)
150
+
151
+
152
+ def visualize_order_images(row_order):
153
+ return create_image_grid(row_order['ade20k'] + row_order['gestalt'] + [visualize_depth(dm) for dm in row_order['depthcm']], num_per_row=len(row_order['ade20k']))
154
+
155
+ def create_image_grid(images, target_length=312, num_per_row=2):
156
+ # Calculate the target size for the first image
157
+ first_img = images[0]
158
+ aspect_ratio = first_img.width / first_img.height
159
+ new_width = int((target_length ** 2 * aspect_ratio) ** 0.5)
160
+ new_height = int((target_length ** 2 / aspect_ratio) ** 0.5)
161
+
162
+ # Resize the first image
163
+ resized_images = [img.resize((new_width, new_height), Image.Resampling.LANCZOS) for img in images]
164
+
165
+ # Calculate the grid size
166
+ num_rows = (len(resized_images) + num_per_row - 1) // num_per_row
167
+ grid_width = new_width * num_per_row
168
+ grid_height = new_height * num_rows
169
+
170
+ # Create a new image for the grid
171
+ grid_img = Image.new('RGB', (grid_width, grid_height))
172
+
173
+ # Paste the images into the grid
174
+ for i, img in enumerate(resized_images):
175
+ x_offset = (i % num_per_row) * new_width
176
+ y_offset = (i // num_per_row) * new_height
177
+ grid_img.paste(img, (x_offset, y_offset))
178
+
179
+ return grid_img
180
+
181
+
182
+ def visualize_depth(depth, min_depth=None, max_depth=None, cmap='rainbow'):
183
+ depth = np.array(depth)
184
+
185
+ if min_depth is None:
186
+ min_depth = np.min(depth)
187
+ if max_depth is None:
188
+ max_depth = np.max(depth)
189
+
190
+
191
+ # Normalize the depth to be between 0 and 1
192
+ depth = (depth - min_depth) / (max_depth - min_depth)
193
+ depth = np.clip(depth, 0, 1)
194
+
195
+ # Use the matplotlib colormap to convert the depth to an RGB image
196
+ cmap = plt.get_cmap(cmap)
197
+ depth_image = (cmap(depth) * 255).astype(np.uint8)
198
+
199
+ # Convert the depth image to a PIL image
200
+ depth_image = Image.fromarray(depth_image)
201
+
202
+ return depth_image
tools2025/hoho2025/viz3d.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Copyright [2022] [Paul-Edouard Sarlin and Philipp Lindenberger]
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+
16
+ 3D visualization based on plotly.
17
+ Works for a small number of points and cameras, might be slow otherwise.
18
+
19
+ 1) Initialize a figure with `init_figure`
20
+ 2) Add 3D points, camera frustums, or both as a pycolmap.Reconstruction
21
+
22
+ Written by Paul-Edouard Sarlin and Philipp Lindenberger.
23
+ Slightly modified by Dmytro Mishkin
24
+ """
25
+ from typing import Optional
26
+ import numpy as np
27
+ import pycolmap
28
+ import plotly.graph_objects as go
29
+ from hoho2025.color_mappings import edge_color_mapping, EDGE_CLASSES_BY_ID
30
+
31
+ def to_homogeneous(points):
32
+ pad = np.ones((points.shape[:-1]+(1,)), dtype=points.dtype)
33
+ return np.concatenate([points, pad], axis=-1)
34
+
35
+ ### Plotting functions
36
+
37
+ def init_figure(height: int = 800) -> go.Figure:
38
+ """Initialize a 3D figure."""
39
+ fig = go.FigureWidget()
40
+ axes = dict(
41
+ visible=False,
42
+ showbackground=False,
43
+ showgrid=False,
44
+ showline=False,
45
+ showticklabels=True,
46
+ autorange=True,
47
+ )
48
+ fig.update_layout(
49
+ template="plotly_dark",
50
+ height=height,
51
+ scene_camera=dict(
52
+ eye=dict(x=0., y=-.1, z=-2),
53
+ up=dict(x=0, y=-1., z=0),
54
+ projection=dict(type="orthographic")),
55
+ scene=dict(
56
+ xaxis=axes,
57
+ yaxis=axes,
58
+ zaxis=axes,
59
+ aspectmode='data',
60
+ dragmode='orbit',
61
+ ),
62
+ margin=dict(l=0, r=0, b=0, t=0, pad=0),
63
+ legend=dict(
64
+ orientation="h",
65
+ yanchor="top",
66
+ y=0.99,
67
+ xanchor="left",
68
+ x=0.1
69
+ ),
70
+ )
71
+ return fig
72
+
73
+
74
+ def plot_lines_3d(
75
+ fig: go.Figure,
76
+ pts: np.ndarray,
77
+ color: str = 'rgba(255, 255, 255, 1)',
78
+ ps: int = 2,
79
+ colorscale: Optional[str] = None,
80
+ name: Optional[str] = None):
81
+ """Plot a set of 3D points."""
82
+ x = pts[..., 0]
83
+ y = pts[..., 1]
84
+ z = pts[..., 2]
85
+ if isinstance(color, list):
86
+ traces = [go.Scatter3d(x=x1, y=y1, z=z1,
87
+ mode='lines',
88
+ line=dict(color=f"rgb{c}", width=ps)) for x1, y1, z1, c in zip(x,y,z,color)]
89
+ else:
90
+ traces = [go.Scatter3d(x=x1, y=y1, z=z1,
91
+ mode='lines',
92
+ line=dict(color=color, width=ps)) for x1, y1, z1 in zip(x,y,z)]
93
+ for t in traces:
94
+ fig.add_trace(t)
95
+ fig.update_traces(showlegend=False)
96
+
97
+
98
+ def plot_points(
99
+ fig: go.Figure,
100
+ pts: np.ndarray,
101
+ color: str = 'rgba(255, 0, 0, 1)',
102
+ ps: int = 2,
103
+ colorscale: Optional[str] = None,
104
+ name: Optional[str] = None):
105
+ """Plot a set of 3D points."""
106
+ x, y, z = pts.T
107
+ tr = go.Scatter3d(
108
+ x=x, y=y, z=z, mode='markers', name=name, legendgroup=name,
109
+ marker=dict(
110
+ size=ps, color=color, line_width=0.0, colorscale=colorscale))
111
+ fig.add_trace(tr)
112
+
113
+ def plot_camera(
114
+ fig: go.Figure,
115
+ R: np.ndarray,
116
+ t: np.ndarray,
117
+ K: np.ndarray,
118
+ color: str = 'rgb(0, 0, 255)',
119
+ name: Optional[str] = None,
120
+ legendgroup: Optional[str] = None,
121
+ size: float = 1.0):
122
+ """Plot a camera frustum from pose and intrinsic matrix. R and t are
123
+ world_to_camera transformation"""
124
+ R = np.array(R)
125
+ t = np.array(t).reshape(3)
126
+ K = np.array(K)
127
+ W, H = K[0, 2]*2, K[1, 2]*2
128
+ corners = np.array([[0, 0], [W, 0], [W, H], [0, H], [0, 0]])
129
+ if size is not None:
130
+ image_extent = max(size * W / 1024.0, size * H / 1024.0)
131
+ world_extent = max(W, H) / (K[0, 0] + K[1, 1]) / 0.5
132
+ scale = 0.5 * image_extent / world_extent
133
+ else:
134
+ scale = 1.0
135
+ corners = to_homogeneous(corners) @ np.linalg.inv(K).T
136
+ corners = (corners / 2 * scale) @ R.T + t
137
+
138
+ x, y, z = corners.T
139
+ rect = go.Scatter3d(
140
+ x=x, y=y, z=z, line=dict(color=color), legendgroup=legendgroup,
141
+ name=name, marker=dict(size=0.0001), showlegend=False)
142
+ fig.add_trace(rect)
143
+
144
+ x, y, z = np.concatenate(([t], corners)).T
145
+ i = [0, 0, 0, 0]
146
+ j = [1, 2, 3, 4]
147
+ k = [2, 3, 4, 1]
148
+
149
+ pyramid = go.Mesh3d(
150
+ x=x, y=y, z=z, color=color, i=i, j=j, k=k,
151
+ legendgroup=legendgroup, name=name, showlegend=False)
152
+ fig.add_trace(pyramid)
153
+ triangles = np.vstack((i, j, k)).T
154
+ vertices = np.concatenate(([t], corners))
155
+ tri_points = np.array([
156
+ vertices[i] for i in triangles.reshape(-1)
157
+ ])
158
+
159
+ x, y, z = tri_points.T
160
+ pyramid = go.Scatter3d(
161
+ x=x, y=y, z=z, mode='lines', legendgroup=legendgroup,
162
+ name=name, line=dict(color=color, width=1), showlegend=False)
163
+ fig.add_trace(pyramid)
164
+
165
+
166
+ def plot_camera_colmap(
167
+ fig: go.Figure,
168
+ image: pycolmap.Image,
169
+ camera: pycolmap.Camera,
170
+ name: Optional[str] = None,
171
+ **kwargs):
172
+ """Plot a camera frustum from PyCOLMAP objects"""
173
+ # Use camera intrinsics method if available, otherwise fallback to params
174
+ intr = camera.calibration_matrix()
175
+ if intr[0][0] > 5000:
176
+ print("Bad camera")
177
+ return
178
+ world_t_camera = image.cam_from_world.inverse()
179
+ plot_camera(
180
+ fig,
181
+ world_t_camera.rotation.matrix(), # Use rotation matrix method (World-to-Camera)
182
+ world_t_camera.translation, # Use camera center in world coordinates
183
+ intr,
184
+ name=name or str(image.name),
185
+ **kwargs)
186
+
187
+
188
+ def plot_cameras(
189
+ fig: go.Figure,
190
+ reconstruction: pycolmap.Reconstruction, # Added type hint
191
+ **kwargs):
192
+ """Plot a camera as a cone with camera frustum."""
193
+ # Iterate over reconstruction.images
194
+ for image_id, image in reconstruction.images.items():
195
+ # Access camera using reconstruction.cameras
196
+ plot_camera_colmap(
197
+ fig, image, reconstruction.cameras[image.camera_id], **kwargs)
198
+
199
+
200
+ def plot_reconstruction(
201
+ fig: go.Figure,
202
+ rec: pycolmap.Reconstruction, # Added type hint
203
+ color: str = 'rgb(0, 0, 255)',
204
+ name: Optional[str] = None,
205
+ points: bool = True,
206
+ cameras: bool = True,
207
+ cs: float = 1.0,
208
+ single_color_points=False,
209
+ camera_color='rgba(0, 255, 0, 0.5)',
210
+ crop_outliers: bool = False):
211
+ # rec is a pycolmap.Reconstruction object
212
+ # Filter outliers
213
+ xyzs = []
214
+ rgbs = []
215
+ # Iterate over rec.points3D
216
+ for k, p3D in rec.points3D.items():
217
+ #print (p3D)
218
+ xyzs.append(p3D.xyz)
219
+ rgbs.append(p3D.color)
220
+
221
+ xyzs = np.array(xyzs)
222
+ rgbs = np.array(rgbs)
223
+
224
+ # Crop outliers if requested
225
+ if crop_outliers and len(xyzs) > 0:
226
+ # Calculate distances from origin
227
+ distances = np.linalg.norm(xyzs, axis=1)
228
+ # Find threshold at 98th percentile (removing 2% furthest points)
229
+ threshold = np.percentile(distances, 98)
230
+ # Filter points
231
+ mask = distances <= threshold
232
+ xyzs = xyzs[mask]
233
+ rgbs = rgbs[mask]
234
+ print(f"Cropped outliers: removed {np.sum(~mask)} out of {len(mask)} points ({np.sum(~mask)/len(mask)*100:.2f}%)")
235
+
236
+ if points and len(xyzs) > 0:
237
+ plot_points(fig, xyzs, color=color if single_color_points else rgbs, ps=1, name=name)
238
+ if cameras:
239
+ plot_cameras(fig, rec, color=camera_color, legendgroup=name, size=cs)
240
+
241
+ def plot_wireframe(
242
+ fig: go.Figure,
243
+ vertices: np.ndarray,
244
+ edges: np.ndarray,
245
+ classifications: np.ndarray = None,
246
+ color: str = 'rgb(0, 0, 255)',
247
+ name: Optional[str] = None,
248
+ **kwargs):
249
+ """Plot a camera as a cone with camera frustum."""
250
+ gt_vertices = np.array(vertices)
251
+ gt_connections = np.array(edges)
252
+ if gt_vertices is not None:
253
+ img_color2 = [color for _ in range(len(gt_vertices))]
254
+ plot_points(fig, gt_vertices, color = img_color2, ps = 10)
255
+ if gt_connections is not None:
256
+ gt_lines = []
257
+ for c in gt_connections:
258
+ v1 = gt_vertices[c[0]]
259
+ v2 = gt_vertices[c[1]]
260
+ gt_lines.append(np.stack([v1, v2], axis=0))
261
+ if classifications is not None and len(classifications) == len(gt_lines):
262
+ line_colors = []
263
+ for c in classifications:
264
+ line_colors.append(edge_color_mapping[EDGE_CLASSES_BY_ID[c]])
265
+ plot_lines_3d(fig, np.array(gt_lines), line_colors, ps=4)
266
+ else:
267
+ plot_lines_3d(fig, np.array(gt_lines), color, ps=4)
268
+
269
+
270
+ def plot_bpo_cameras_from_entry(fig: go.Figure, entry: dict, idx = None):
271
+ def cam2world_to_world2cam(R, t):
272
+ rt = np.eye(4)
273
+ rt[:3,:3] = R
274
+ rt[:3,3] = t.reshape(-1)
275
+ rt = np.linalg.inv(rt)
276
+ return rt[:3,:3], rt[:3,3]
277
+
278
+ for i in range(len(entry['R'])):
279
+ if idx is not None and i != idx:
280
+ continue
281
+ K = np.array(entry['K'][i])
282
+ R = np.array(entry['R'][i])
283
+ t = np.array(entry['t'][i])
284
+ R, t = cam2world_to_world2cam(R, t)
285
+ plot_camera(fig, R, t, K)
286
+
287
+
tools2025/notebooks/example.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
tools2025/pyproject.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=42", "wheel"]
3
+ build-backend = "setuptools.build_meta"
tools2025/requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets
2
+ huggingface-hub
3
+ ipywidgets
4
+ matplotlib
5
+ numpy
6
+ opencv-python
7
+ Pillow
8
+ plotly
9
+ pycolmap
10
+ scipy
11
+ torch
12
+ trimesh
13
+ webdataset
14
+ manifold3d # for metric computation
tools2025/setup.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+ import glob
3
+ import os
4
+
5
+ # Try to read from requirements.txt, but have fallback
6
+ try:
7
+ here = os.path.abspath(os.path.dirname(__file__))
8
+ with open(os.path.join(here, 'requirements.txt')) as f:
9
+ required = f.read().splitlines()
10
+ except FileNotFoundError:
11
+ # Fallback to hardcoded dependencies
12
+ required = [
13
+ 'datasets',
14
+ 'huggingface-hub',
15
+ 'ipywidgets',
16
+ 'matplotlib',
17
+ 'numpy',
18
+ 'opencv-python',
19
+ 'Pillow',
20
+ 'plotly',
21
+ 'pycolmap',
22
+ 'scipy',
23
+ 'torch',
24
+ 'trimesh',
25
+ 'webdataset==0.2.111',
26
+ ]
27
+
28
+ setup(name='hoho2025',
29
+ version='0.1.0',
30
+ description='Tools and utilites for the HoHo Dataset and S23DR Competition',
31
+ url='https://github.com/s23dr/hoho2025',
32
+ author='Jack Langerman, Dmytro Mishkin, S23DR Orgainizing Team',
33
+ author_email='hoho@jackml.com',
34
+ install_requires=required,
35
+ packages=find_packages(),
36
+ python_requires='>=3.10',
37
+ include_package_data=True)