amhalacheva commited on
Commit
44b6733
·
verified ·
1 Parent(s): 66783f1

Upload folder using huggingface_hub

Browse files
config_inference.py ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = '/home/yli7/projects/gaussian_world/GS_Transformer/exp/lang_pretrainer/base-scannet-fix-xyz-all-w-normal-contrastive-siglip2-voting/model/model_best.pth'
2
+ resume = False
3
+ evaluate = True
4
+ test_only = True
5
+ seed = 58143646
6
+ save_path = 'exp/lang_pretrainer/lang-pretrain-ppv2-and-scannet-fixed-all-w-normal-late-contrastive'
7
+ num_worker = 0
8
+ batch_size = 48
9
+ batch_size_val = 48
10
+ batch_size_test = 1
11
+ epoch = 800
12
+ eval_epoch = 100
13
+ clip_grad = None
14
+ sync_bn = False
15
+ enable_amp = True
16
+ empty_cache = False
17
+ empty_cache_per_epoch = True
18
+ find_unused_parameters = False
19
+ mix_prob = 0.8
20
+ param_dicts = [dict(keyword='block', lr=0.0006)]
21
+ hooks = [
22
+ dict(type='CheckpointLoader'),
23
+ dict(type='IterationTimer', warmup_iter=2),
24
+ dict(type='InformationWriter'),
25
+ dict(
26
+ type='LangPretrainZeroShotSemSegEval',
27
+ class_names=
28
+ '/home/yli7/projects/gaussian_world/GS_Transformer_debug/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_labels.txt',
29
+ text_embeddings=
30
+ '/home/yli7/projects/gaussian_world/GS_Transformer_debug/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_text_embeddings_siglip2.pt',
31
+ excluded_classes=['wall', 'floor', 'ceiling'],
32
+ ignore_index=-1,
33
+ vote_k=25,
34
+ enbale_voting=True,
35
+ confidence_threshold=0.1),
36
+ dict(type='CheckpointSaver', save_freq=None),
37
+ dict(type='PreciseEvaluator', test_last=True)
38
+ ]
39
+ train = dict(type='DefaultTrainer')
40
+
41
+ test = [
42
+ # scannet++
43
+ dict(
44
+ type='ZeroShotSemSegTester',
45
+ verbose=True,
46
+ class_names=
47
+ '/home/yli7/scratch/datasets/gaussian_world/preprocessed/scannetpp_v2_default_fix_xyz_gs/metadata/semantic_benchmark/top100.txt',
48
+ text_embeddings=
49
+ '/home/yli7/scratch/datasets/gaussian_world/preprocessed/scannetpp_v2_default_fix_xyz_gs/metadata/semantic_benchmark/top100_text_embeddings_siglip2.pt',
50
+ excluded_classes=['wall', 'floor', 'ceiling'],
51
+ enable_voting=True,
52
+ vote_k=25,
53
+ confidence_threshold=0.1,
54
+ save_feat=False,
55
+ skip_eval=True),
56
+
57
+ # scannet20
58
+ # dict(
59
+ # type="ZeroShotSemSegTester",
60
+ # verbose=True,
61
+ # class_names="/home/yli7/projects/gaussian_world/GS_Transformer_debug/pointcept/datasets/preprocessing/scannet/meta_data/scannet20_labels.txt",
62
+ # text_embeddings="/home/yli7/projects/gaussian_world/GS_Transformer_debug/pointcept/datasets/preprocessing/scannet/meta_data/scannet20_text_embeddings_siglip2.pt",
63
+ # excluded_classes=["wall", "floor", "ceiling"],
64
+ # enable_voting=True,
65
+ # vote_k=25,
66
+ # confidence_threshold=0.1,
67
+ # save_feat=False,
68
+ # skip_eval=False,
69
+ # ),
70
+
71
+ # matterport3d
72
+ # dict(
73
+ # type="ZeroShotSemSegTester",
74
+ # verbose=True,
75
+ # class_names="/home/yli7/projects/gaussian_world/GS_Transformer_debug/pointcept/datasets/preprocessing/matterport3d/meta_data/matterport_labels_21.txt",
76
+ # text_embeddings="/home/yli7/projects/gaussian_world/GS_Transformer_debug/pointcept/datasets/preprocessing/matterport3d/meta_data/matterport21_text_embeddings_siglip2.pt",
77
+ # excluded_classes=["wall", "floor", "ceiling"],
78
+ # enable_voting=True,
79
+ # vote_k=25,
80
+ # confidence_threshold=0.1,
81
+ # save_feat=False,
82
+ # skip_eval=False,
83
+ # )
84
+
85
+ ]
86
+
87
+ data = dict(
88
+ names=[
89
+ 'wall', 'ceiling', 'floor', 'table', 'door', 'ceiling lamp', 'cabinet',
90
+ 'blinds', 'curtain', 'chair', 'storage cabinet', 'office chair',
91
+ 'bookshelf', 'whiteboard', 'window', 'box', 'window frame', 'monitor',
92
+ 'shelf', 'doorframe', 'pipe', 'heater', 'kitchen cabinet', 'sofa',
93
+ 'windowsill', 'bed', 'shower wall', 'trash can', 'book', 'plant',
94
+ 'blanket', 'tv', 'computer tower', 'kitchen counter', 'refrigerator',
95
+ 'jacket', 'electrical duct', 'sink', 'bag', 'picture', 'pillow',
96
+ 'towel', 'suitcase', 'backpack', 'crate', 'keyboard', 'rack', 'toilet',
97
+ 'paper', 'printer', 'poster', 'painting', 'microwave', 'board',
98
+ 'shoes', 'socket', 'bottle', 'bucket', 'cushion', 'basket',
99
+ 'shoe rack', 'telephone', 'file folder', 'cloth', 'blind rail',
100
+ 'laptop', 'plant pot', 'exhaust fan', 'cup', 'coat hanger',
101
+ 'light switch', 'speaker', 'table lamp', 'air vent', 'clothes hanger',
102
+ 'kettle', 'smoke detector', 'container', 'power strip', 'slippers',
103
+ 'paper bag', 'mouse', 'cutting board', 'toilet paper', 'paper towel',
104
+ 'pot', 'clock', 'pan', 'tap', 'jar', 'soap dispenser', 'binder',
105
+ 'bowl', 'tissue box', 'whiteboard eraser', 'toilet brush',
106
+ 'spray bottle', 'headphones', 'stapler', 'marker'
107
+ ],
108
+ num_classes=100,
109
+ ignore_index=-1,
110
+ train=dict(
111
+ type='ScanNetPPGSDataset',
112
+ split=('train_grid1mm_chunk6x6_stride3x3',
113
+ 'val_v1_grid1mm_chunk6x6_stride3x3', 'train_scannet_fix_xyz',
114
+ 'val_scannet_fix_xyz'),
115
+ data_root=
116
+ '/home/yli7/scratch/datasets/gaussian_world/preprocessed/scannetpp_v2_default_fix_xyz_gs',
117
+ sample_tail_classes=False,
118
+ filtered_scene=[
119
+ 'c601466b77', '654a4f341b', '0f25f24a4f', '72f527a47c',
120
+ '2c7c10379b', '5ea3e738c3', '27dd4da69e', '281ba69af1',
121
+ '816e996553'
122
+ ],
123
+ transform=[
124
+ dict(type='CenterShift', apply_z=True),
125
+ dict(
126
+ type='RandomDropout',
127
+ dropout_ratio=0.2,
128
+ dropout_application_ratio=0.2),
129
+ dict(
130
+ type='RandomRotate',
131
+ angle=[-1, 1],
132
+ axis='z',
133
+ center=[0, 0, 0],
134
+ p=0.5),
135
+ dict(
136
+ type='RandomRotate',
137
+ angle=[-0.015625, 0.015625],
138
+ axis='x',
139
+ p=0.5),
140
+ dict(
141
+ type='RandomRotate',
142
+ angle=[-0.015625, 0.015625],
143
+ axis='y',
144
+ p=0.5),
145
+ dict(type='RandomScale', scale=[0.9, 1.1]),
146
+ dict(type='RandomFlip', p=0.5),
147
+ dict(type='RandomJitter', sigma=0.005, clip=0.01),
148
+ dict(
149
+ type='ElasticDistortion',
150
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
151
+ dict(type='ChromaticAutoContrast', p=0.2, blend_factor=None),
152
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
153
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
154
+ dict(
155
+ type='GridSample',
156
+ grid_size=0.02,
157
+ hash_type='fnv',
158
+ mode='train',
159
+ keys=('coord', 'color', 'opacity', 'quat', 'scale', 'normal',
160
+ 'segment', 'lang_feat', 'valid_feat_mask'),
161
+ return_grid_coord=True),
162
+ dict(type='SphereCrop', point_max=192000, mode='random'),
163
+ dict(type='CenterShift', apply_z=False),
164
+ dict(type='NormalizeColor'),
165
+ dict(type='ToTensor'),
166
+ dict(
167
+ type='Collect',
168
+ keys=('coord', 'grid_coord', 'segment', 'lang_feat',
169
+ 'valid_feat_mask'),
170
+ feat_keys=('color', 'opacity', 'quat', 'scale', 'normal'))
171
+ ],
172
+ test_mode=False,
173
+ loop=8),
174
+ val=dict(
175
+ type='ScanNetPPGSDataset',
176
+ split='val_scannet_fix_xyz',
177
+ data_root=
178
+ '/home/yli7/scratch/datasets/gaussian_world/preprocessed/scannetpp_v2_default_fix_xyz_gs',
179
+ filtered_scene=[
180
+ 'c601466b77', '654a4f341b', '0f25f24a4f', '72f527a47c',
181
+ '2c7c10379b', '5ea3e738c3', '27dd4da69e', '281ba69af1',
182
+ '816e996553'
183
+ ],
184
+ transform=[
185
+ dict(type='CenterShift', apply_z=True),
186
+ dict(
187
+ type='GridSample',
188
+ grid_size=0.02,
189
+ hash_type='fnv',
190
+ mode='train',
191
+ keys=('coord', 'color', 'opacity', 'quat', 'scale', 'normal',
192
+ 'segment', 'lang_feat', 'valid_feat_mask'),
193
+ return_grid_coord=True),
194
+ dict(type='CenterShift', apply_z=False),
195
+ dict(type='NormalizeColor'),
196
+ dict(type='ToTensor'),
197
+ dict(
198
+ type='Collect',
199
+ keys=('coord', 'grid_coord', 'segment', 'lang_feat',
200
+ 'valid_feat_mask'),
201
+ feat_keys=('color', 'opacity', 'quat', 'scale', 'normal'))
202
+ ],
203
+ test_mode=False),
204
+ test=[
205
+ # scannet++
206
+ dict(
207
+ type='ScanNetPPGSDataset',
208
+ split='val',
209
+ data_root=
210
+ '/home/yli7/scratch/datasets/gaussian_world/preprocessed/scannetpp_v2_default_fix_xyz_gs',
211
+ # filtered_scene=[
212
+ # 'c601466b77', '654a4f341b', '0f25f24a4f', '72f527a47c',
213
+ # '2c7c10379b', '5ea3e738c3', '27dd4da69e', '281ba69af1',
214
+ # '816e996553'
215
+ # ],
216
+ transform=[
217
+ dict(type='CenterShift', apply_z=True),
218
+ dict(type='NormalizeColor'),
219
+ dict(
220
+ type='Copy',
221
+ keys_dict=dict(
222
+ segment='origin_segment',
223
+ coord='origin_coord',
224
+ valid_feat_mask='origin_feat_mask')),
225
+ dict(
226
+ type='GridSample',
227
+ grid_size=0.01,
228
+ hash_type='fnv',
229
+ mode='train',
230
+ keys=('coord', 'color', 'opacity', 'quat', 'scale', 'normal',
231
+ 'lang_feat', 'valid_feat_mask', "segment"),
232
+ return_inverse=True)
233
+ ],
234
+ test_mode=True,
235
+ test_cfg=dict(
236
+ voxelize=dict(
237
+ type='GridSample',
238
+ grid_size=0.02,
239
+ hash_type='fnv',
240
+ mode='test',
241
+ keys=('coord', 'color', 'opacity', 'quat', 'scale', 'normal',
242
+ 'lang_feat', 'valid_feat_mask'), # keep keys for inference is enough here
243
+ return_grid_coord=True),
244
+ crop=None,
245
+ post_transform=[
246
+ dict(type='CenterShift', apply_z=False),
247
+ dict(type='ToTensor'),
248
+ dict(
249
+ type='Collect',
250
+ keys=('coord', 'grid_coord', 'index', 'lang_feat', 'valid_feat_mask'),
251
+ feat_keys=('color', 'opacity', 'quat', 'scale', 'normal')) # only keys for inference
252
+ ],
253
+ aug_transform=[[{
254
+ 'type': 'RandomRotateTargetAngle',
255
+ 'angle': [0],
256
+ 'axis': 'z',
257
+ 'center': [0, 0, 0],
258
+ 'p': 1
259
+ }]])),
260
+
261
+ # scannet20
262
+ # dict(
263
+ # type='ScanNetGSDataset',
264
+ # split='val',
265
+ # data_root=
266
+ # '/home/yli7/scratch/datasets/gaussian_world/preprocessed/scannet_default_fix_xyz_gs',
267
+ # transform=[
268
+ # dict(type='CenterShift', apply_z=True),
269
+ # dict(type='NormalizeColor'),
270
+ # dict(
271
+ # type='Copy',
272
+ # keys_dict=dict(
273
+ # segment='origin_segment',
274
+ # coord='origin_coord',
275
+ # valid_feat_mask='origin_feat_mask',
276
+ # instance='origin_instance')),
277
+ # dict(
278
+ # type='GridSample',
279
+ # grid_size=0.01,
280
+ # hash_type='fnv',
281
+ # mode='train',
282
+ # keys=('coord', 'color', 'opacity', 'quat', 'scale', 'normal',
283
+ # 'lang_feat', 'valid_feat_mask', "segment"),
284
+ # return_inverse=True)
285
+ # ],
286
+ # test_mode=True,
287
+ # test_cfg=dict(
288
+ # voxelize=dict(
289
+ # type='GridSample',
290
+ # grid_size=0.02,
291
+ # hash_type='fnv',
292
+ # mode='test',
293
+ # keys=('coord', 'color', 'opacity', 'quat', 'scale', 'normal',
294
+ # 'lang_feat', 'valid_feat_mask'), # keep keys for inference is enough here
295
+ # return_grid_coord=True),
296
+ # crop=None,
297
+ # post_transform=[
298
+ # dict(type='CenterShift', apply_z=False),
299
+ # dict(type='ToTensor'),
300
+ # dict(
301
+ # type='Collect',
302
+ # keys=('coord', 'grid_coord', 'index', 'lang_feat', 'valid_feat_mask'),
303
+ # feat_keys=('color', 'opacity', 'quat', 'scale', 'normal')) # only keys for inference
304
+ # ],
305
+ # aug_transform=[[{
306
+ # 'type': 'RandomRotateTargetAngle',
307
+ # 'angle': [0],
308
+ # 'axis': 'z',
309
+ # 'center': [0, 0, 0],
310
+ # 'p': 1
311
+ # }]])),
312
+
313
+ # matterport3d
314
+ # dict(
315
+ # type='Matterport3DGSDataset',
316
+ # split='test',
317
+ # data_root=
318
+ # '/home/yli7/scratch/datasets/gaussian_world/preprocessed/matterport3d_region_default_fix_xyz_gs',
319
+ # transform=[
320
+ # dict(type='CenterShift', apply_z=True),
321
+ # dict(type='NormalizeColor'),
322
+ # dict(
323
+ # type='Copy',
324
+ # keys_dict=dict(
325
+ # segment='origin_segment',
326
+ # coord='origin_coord',
327
+ # valid_feat_mask='origin_feat_mask',
328
+ # )),
329
+ # dict(
330
+ # type='GridSample',
331
+ # grid_size=0.01,
332
+ # hash_type='fnv',
333
+ # mode='train',
334
+ # keys=('coord', 'color', 'opacity', 'quat', 'scale', 'normal',
335
+ # 'lang_feat', 'valid_feat_mask', "segment"),
336
+ # return_inverse=True)
337
+ # ],
338
+ # test_mode=True,
339
+ # test_cfg=dict(
340
+ # voxelize=dict(
341
+ # type='GridSample',
342
+ # grid_size=0.02,
343
+ # hash_type='fnv',
344
+ # mode='test',
345
+ # keys=('coord', 'color', 'opacity', 'quat', 'scale', 'normal',
346
+ # 'lang_feat', 'valid_feat_mask'), # keep keys for inference is enough here
347
+ # return_grid_coord=True),
348
+ # crop=None,
349
+ # post_transform=[
350
+ # dict(type='CenterShift', apply_z=False),
351
+ # dict(type='ToTensor'),
352
+ # dict(
353
+ # type='Collect',
354
+ # keys=('coord', 'grid_coord', 'index', 'lang_feat', 'valid_feat_mask'),
355
+ # feat_keys=('color', 'opacity', 'quat', 'scale', 'normal')) # only keys for inference
356
+ # ],
357
+ # aug_transform=[[{
358
+ # 'type': 'RandomRotateTargetAngle',
359
+ # 'angle': [0],
360
+ # 'axis': 'z',
361
+ # 'center': [0, 0, 0],
362
+ # 'p': 1
363
+ # }]]))
364
+
365
+ ]
366
+
367
+ )
368
+ debug = 0
369
+ gpu_nums = 24
370
+ model = dict(
371
+ type='LangPretrainer',
372
+ backbone=dict(
373
+ type='PT-v3m1',
374
+ in_channels=14,
375
+ order=('z', 'z-trans', 'hilbert', 'hilbert-trans'),
376
+ stride=(2, 2, 2),
377
+ enc_depths=(2, 2, 2, 6),
378
+ enc_channels=(32, 64, 128, 256),
379
+ enc_num_head=(2, 4, 8, 16),
380
+ enc_patch_size=(1024, 1024, 1024, 1024),
381
+ dec_depths=(2, 2, 2),
382
+ dec_channels=(768, 512, 256),
383
+ dec_num_head=(16, 16, 16),
384
+ dec_patch_size=(1024, 1024, 1024),
385
+ mlp_ratio=4,
386
+ qkv_bias=True,
387
+ qk_scale=None,
388
+ attn_drop=0.0,
389
+ proj_drop=0.0,
390
+ drop_path=0.3,
391
+ shuffle_orders=True,
392
+ pre_norm=True,
393
+ enable_rpe=False,
394
+ enable_flash=True,
395
+ upcast_attention=False,
396
+ upcast_softmax=False,
397
+ cls_mode=False,
398
+ pdnorm_bn=False,
399
+ pdnorm_ln=False,
400
+ pdnorm_decouple=True,
401
+ pdnorm_adaptive=False,
402
+ pdnorm_affine=True,
403
+ pdnorm_conditions=('ScanNet', 'S3DIS', 'Structured3D')),
404
+ criteria=[
405
+ dict(type='CosineSimilarity', reduction='mean', loss_weight=1.0),
406
+ dict(type='L2Loss', reduction='mean', loss_weight=1.0),
407
+ dict(
408
+ type='AggregatedContrastiveLoss',
409
+ temperature=0.2,
410
+ reduction='mean',
411
+ loss_weight=0.02,
412
+ schedule='last_75')
413
+ ])
414
+ optimizer = dict(type='AdamW', lr=0.006, weight_decay=0.05)
415
+ scheduler = dict(
416
+ type='OneCycleLR',
417
+ max_lr=[0.006, 0.0006],
418
+ pct_start=0.05,
419
+ anneal_strategy='cos',
420
+ div_factor=10.0,
421
+ final_div_factor=1000.0)
422
+ dataset_type = 'ScanNetPPGSDataset'
423
+ data_root = '/home/yli7/scratch/datasets/gaussian_world/preprocessed/scannetpp_v2_default_fix_xyz_gs'
424
+ class_names_path = '/home/yli7/projects/gaussian_world/GS_Transformer_debug/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_labels.txt'
425
+ text_embeddings_path = '/home/yli7/projects/gaussian_world/GS_Transformer_debug/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_text_embeddings_siglip2.pt'
model_best_lang-pretrain-ppv2-and-scannet-fixed-all-w-normal-late-contrastive.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cff775c7d64f9d7f45ec8decd4be6a3e15f26ed48a8e08b45a14030678922ab7
3
+ size 1101204066