TensorBoard
karimknaebel commited on
Commit
6cbc625
·
verified ·
1 Parent(s): a46f8a7

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -41,3 +41,4 @@ nuscenes/semseg-ptv3_dino-S/test.log filter=lfs diff=lfs merge=lfs -text
41
  nuscenes/semseg-ptv3_dino-S/train.log filter=lfs diff=lfs merge=lfs -text
42
  semantic_kitti/semseg-ptv3_dino-L/train.log filter=lfs diff=lfs merge=lfs -text
43
  scannet200/semseg-ptv3_dino-L/train.log filter=lfs diff=lfs merge=lfs -text
 
 
41
  nuscenes/semseg-ptv3_dino-S/train.log filter=lfs diff=lfs merge=lfs -text
42
  semantic_kitti/semseg-ptv3_dino-L/train.log filter=lfs diff=lfs merge=lfs -text
43
  scannet200/semseg-ptv3_dino-L/train.log filter=lfs diff=lfs merge=lfs -text
44
+ scannet200/semseg-ptv3_dinov3-L/train.log filter=lfs diff=lfs merge=lfs -text
scannet200/semseg-ptv3_dinov3-L/config.py ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 14931494
6
+ save_path = 'exp/scannet200/2025-08-16_005918'
7
+ wandb_project = 'semseg_scannet200'
8
+ num_worker = 24
9
+ batch_size = 12
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 800
13
+ eval_epoch = 100
14
+ clip_grad = None
15
+ sync_bn = False
16
+ enable_amp = True
17
+ empty_cache = False
18
+ empty_cache_per_epoch = False
19
+ find_unused_parameters = False
20
+ mix_prob = 0.8
21
+ param_dicts = [dict(keyword='img_enc|block', lr=0.0006)]
22
+ hooks = [
23
+ dict(type='CheckpointLoader'),
24
+ dict(type='IterationTimer', warmup_iter=2),
25
+ dict(type='InformationWriter'),
26
+ dict(type='SemSegEvaluator'),
27
+ dict(type='CheckpointSaver', save_freq=None),
28
+ dict(type='PreciseEvaluator', test_last=False)
29
+ ]
30
+ train = dict(type='DefaultTrainer')
31
+ test = dict(type='SemSegTester', verbose=True)
32
+ CLASS_LABELS_200 = (
33
+ 'wall', 'chair', 'floor', 'table', 'door', 'couch', 'cabinet', 'shelf',
34
+ 'desk', 'office chair', 'bed', 'pillow', 'sink', 'picture', 'window',
35
+ 'toilet', 'bookshelf', 'monitor', 'curtain', 'book', 'armchair',
36
+ 'coffee table', 'box', 'refrigerator', 'lamp', 'kitchen cabinet', 'towel',
37
+ 'clothes', 'tv', 'nightstand', 'counter', 'dresser', 'stool', 'cushion',
38
+ 'plant', 'ceiling', 'bathtub', 'end table', 'dining table', 'keyboard',
39
+ 'bag', 'backpack', 'toilet paper', 'printer', 'tv stand', 'whiteboard',
40
+ 'blanket', 'shower curtain', 'trash can', 'closet', 'stairs', 'microwave',
41
+ 'stove', 'shoe', 'computer tower', 'bottle', 'bin', 'ottoman', 'bench',
42
+ 'board', 'washing machine', 'mirror', 'copier', 'basket', 'sofa chair',
43
+ 'file cabinet', 'fan', 'laptop', 'shower', 'paper', 'person',
44
+ 'paper towel dispenser', 'oven', 'blinds', 'rack', 'plate', 'blackboard',
45
+ 'piano', 'suitcase', 'rail', 'radiator', 'recycling bin', 'container',
46
+ 'wardrobe', 'soap dispenser', 'telephone', 'bucket', 'clock', 'stand',
47
+ 'light', 'laundry basket', 'pipe', 'clothes dryer', 'guitar',
48
+ 'toilet paper holder', 'seat', 'speaker', 'column', 'bicycle', 'ladder',
49
+ 'bathroom stall', 'shower wall', 'cup', 'jacket', 'storage bin',
50
+ 'coffee maker', 'dishwasher', 'paper towel roll', 'machine', 'mat',
51
+ 'windowsill', 'bar', 'toaster', 'bulletin board', 'ironing board',
52
+ 'fireplace', 'soap dish', 'kitchen counter', 'doorframe',
53
+ 'toilet paper dispenser', 'mini fridge', 'fire extinguisher', 'ball',
54
+ 'hat', 'shower curtain rod', 'water cooler', 'paper cutter', 'tray',
55
+ 'shower door', 'pillar', 'ledge', 'toaster oven', 'mouse',
56
+ 'toilet seat cover dispenser', 'furniture', 'cart', 'storage container',
57
+ 'scale', 'tissue box', 'light switch', 'crate', 'power outlet',
58
+ 'decoration', 'sign', 'projector', 'closet door', 'vacuum cleaner',
59
+ 'candle', 'plunger', 'stuffed animal', 'headphones', 'dish rack', 'broom',
60
+ 'guitar case', 'range hood', 'dustpan', 'hair dryer', 'water bottle',
61
+ 'handicap bar', 'purse', 'vent', 'shower floor', 'water pitcher',
62
+ 'mailbox', 'bowl', 'paper bag', 'alarm clock', 'music stand',
63
+ 'projector screen', 'divider', 'laundry detergent', 'bathroom counter',
64
+ 'object', 'bathroom vanity', 'closet wall', 'laundry hamper',
65
+ 'bathroom stall door', 'ceiling light', 'trash bin', 'dumbbell',
66
+ 'stair rail', 'tube', 'bathroom cabinet', 'cd case', 'closet rod',
67
+ 'coffee kettle', 'structure', 'shower head', 'keyboard piano',
68
+ 'case of water bottles', 'coat rack', 'storage organizer', 'folded chair',
69
+ 'fire alarm', 'power strip', 'calendar', 'poster', 'potted plant',
70
+ 'luggage', 'mattress')
71
+ model = dict(
72
+ type='DefaultSegmentorV2',
73
+ num_classes=200,
74
+ backbone_out_channels=64,
75
+ backbone=dict(
76
+ type='PT-v3m1-dinov3',
77
+ in_channels=6,
78
+ order=('z', 'z-trans', 'hilbert', 'hilbert-trans'),
79
+ stride=(2, 2, 2, 2),
80
+ enc_depths=(2, 2, 2, 6, 2),
81
+ enc_channels=(32, 64, 128, 256, 512),
82
+ enc_num_head=(2, 4, 8, 16, 32),
83
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
84
+ dec_depths=(2, 2, 2, 2),
85
+ dec_channels=(64, 64, 128, 256),
86
+ dec_num_head=(4, 4, 8, 16),
87
+ dec_patch_size=(1024, 1024, 1024, 1024),
88
+ mlp_ratio=4,
89
+ qkv_bias=True,
90
+ qk_scale=None,
91
+ init_values=None,
92
+ attn_drop=0.0,
93
+ proj_drop=0.0,
94
+ drop_path=0.3,
95
+ shuffle_orders=True,
96
+ pre_norm=True,
97
+ enable_rpe=False,
98
+ enable_flash=True,
99
+ upcast_attention=False,
100
+ upcast_softmax=False,
101
+ cls_mode=False,
102
+ pdnorm_bn=False,
103
+ pdnorm_ln=False,
104
+ pdnorm_decouple=True,
105
+ pdnorm_adaptive=False,
106
+ pdnorm_affine=True,
107
+ pdnorm_conditions=('ScanNet', 'S3DIS', 'Structured3D'),
108
+ dinov2='large',
109
+ image_size=(480, 640)),
110
+ criteria=[
111
+ dict(type='CrossEntropyLoss', loss_weight=1.0, ignore_index=-1),
112
+ dict(
113
+ type='LovaszLoss',
114
+ mode='multiclass',
115
+ loss_weight=1.0,
116
+ ignore_index=-1)
117
+ ])
118
+ optimizer = dict(type='AdamW', lr=0.006, weight_decay=0.05)
119
+ scheduler = dict(
120
+ type='OneCycleLR',
121
+ max_lr=[0.006, 0.0006],
122
+ pct_start=0.05,
123
+ anneal_strategy='cos',
124
+ div_factor=10.0,
125
+ final_div_factor=1000.0)
126
+ dataset_type = 'ScanNet200Dataset'
127
+ data_root = 'data/scannet'
128
+ data = dict(
129
+ num_classes=200,
130
+ ignore_index=-1,
131
+ names=(
132
+ 'wall', 'chair', 'floor', 'table', 'door', 'couch', 'cabinet', 'shelf',
133
+ 'desk', 'office chair', 'bed', 'pillow', 'sink', 'picture', 'window',
134
+ 'toilet', 'bookshelf', 'monitor', 'curtain', 'book', 'armchair',
135
+ 'coffee table', 'box', 'refrigerator', 'lamp', 'kitchen cabinet',
136
+ 'towel', 'clothes', 'tv', 'nightstand', 'counter', 'dresser', 'stool',
137
+ 'cushion', 'plant', 'ceiling', 'bathtub', 'end table', 'dining table',
138
+ 'keyboard', 'bag', 'backpack', 'toilet paper', 'printer', 'tv stand',
139
+ 'whiteboard', 'blanket', 'shower curtain', 'trash can', 'closet',
140
+ 'stairs', 'microwave', 'stove', 'shoe', 'computer tower', 'bottle',
141
+ 'bin', 'ottoman', 'bench', 'board', 'washing machine', 'mirror',
142
+ 'copier', 'basket', 'sofa chair', 'file cabinet', 'fan', 'laptop',
143
+ 'shower', 'paper', 'person', 'paper towel dispenser', 'oven', 'blinds',
144
+ 'rack', 'plate', 'blackboard', 'piano', 'suitcase', 'rail', 'radiator',
145
+ 'recycling bin', 'container', 'wardrobe', 'soap dispenser',
146
+ 'telephone', 'bucket', 'clock', 'stand', 'light', 'laundry basket',
147
+ 'pipe', 'clothes dryer', 'guitar', 'toilet paper holder', 'seat',
148
+ 'speaker', 'column', 'bicycle', 'ladder', 'bathroom stall',
149
+ 'shower wall', 'cup', 'jacket', 'storage bin', 'coffee maker',
150
+ 'dishwasher', 'paper towel roll', 'machine', 'mat', 'windowsill',
151
+ 'bar', 'toaster', 'bulletin board', 'ironing board', 'fireplace',
152
+ 'soap dish', 'kitchen counter', 'doorframe', 'toilet paper dispenser',
153
+ 'mini fridge', 'fire extinguisher', 'ball', 'hat',
154
+ 'shower curtain rod', 'water cooler', 'paper cutter', 'tray',
155
+ 'shower door', 'pillar', 'ledge', 'toaster oven', 'mouse',
156
+ 'toilet seat cover dispenser', 'furniture', 'cart',
157
+ 'storage container', 'scale', 'tissue box', 'light switch', 'crate',
158
+ 'power outlet', 'decoration', 'sign', 'projector', 'closet door',
159
+ 'vacuum cleaner', 'candle', 'plunger', 'stuffed animal', 'headphones',
160
+ 'dish rack', 'broom', 'guitar case', 'range hood', 'dustpan',
161
+ 'hair dryer', 'water bottle', 'handicap bar', 'purse', 'vent',
162
+ 'shower floor', 'water pitcher', 'mailbox', 'bowl', 'paper bag',
163
+ 'alarm clock', 'music stand', 'projector screen', 'divider',
164
+ 'laundry detergent', 'bathroom counter', 'object', 'bathroom vanity',
165
+ 'closet wall', 'laundry hamper', 'bathroom stall door',
166
+ 'ceiling light', 'trash bin', 'dumbbell', 'stair rail', 'tube',
167
+ 'bathroom cabinet', 'cd case', 'closet rod', 'coffee kettle',
168
+ 'structure', 'shower head', 'keyboard piano', 'case of water bottles',
169
+ 'coat rack', 'storage organizer', 'folded chair', 'fire alarm',
170
+ 'power strip', 'calendar', 'poster', 'potted plant', 'luggage',
171
+ 'mattress'),
172
+ train=dict(
173
+ type='ScanNet200Dataset',
174
+ split='train',
175
+ data_root='data/scannet',
176
+ with_images=10,
177
+ transform=[
178
+ dict(type='ImageResize', size=[480, 640]),
179
+ dict(
180
+ type='ImageColorJitter',
181
+ brightness=0.4,
182
+ contrast=0.4,
183
+ saturation=0.2,
184
+ hue=0.1),
185
+ dict(type='ImageRandomHorizontalFlip'),
186
+ dict(type='ImageNormalize'),
187
+ dict(type='CenterShift', apply_z=True),
188
+ dict(
189
+ type='RandomDropout',
190
+ dropout_ratio=0.2,
191
+ dropout_application_ratio=0.2),
192
+ dict(
193
+ type='RandomRotate',
194
+ angle=[-1, 1],
195
+ axis='z',
196
+ center=[0, 0, 0],
197
+ p=0.5),
198
+ dict(
199
+ type='RandomRotate',
200
+ angle=[-0.015625, 0.015625],
201
+ axis='x',
202
+ p=0.5),
203
+ dict(
204
+ type='RandomRotate',
205
+ angle=[-0.015625, 0.015625],
206
+ axis='y',
207
+ p=0.5),
208
+ dict(type='RandomScale', scale=[0.9, 1.1]),
209
+ dict(type='RandomFlip', p=0.5),
210
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
211
+ dict(
212
+ type='ElasticDistortion',
213
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
214
+ dict(type='ChromaticAutoContrast', p=0.2, blend_factor=None),
215
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
216
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
217
+ dict(
218
+ type='GridSample',
219
+ grid_size=0.02,
220
+ hash_type='fnv',
221
+ mode='train',
222
+ keys=('coord', 'color', 'normal', 'segment', 'image_coord',
223
+ 'image_mask'),
224
+ return_grid_coord=True),
225
+ dict(type='SphereCrop', point_max=102400, mode='random'),
226
+ dict(type='CenterShift', apply_z=False),
227
+ dict(type='NormalizeColor'),
228
+ dict(type='ToTensor'),
229
+ dict(
230
+ type='Collect',
231
+ keys=('coord', 'grid_coord', 'segment', 'image', 'image_coord',
232
+ 'image_mask'),
233
+ feat_keys=('color', 'normal'))
234
+ ],
235
+ test_mode=False,
236
+ loop=8),
237
+ val=dict(
238
+ type='ScanNet200Dataset',
239
+ split='val',
240
+ data_root='data/scannet',
241
+ with_images=10,
242
+ transform=[
243
+ dict(type='ImageResize', size=[480, 640]),
244
+ dict(type='ImageNormalize'),
245
+ dict(type='CenterShift', apply_z=True),
246
+ dict(
247
+ type='GridSample',
248
+ grid_size=0.02,
249
+ hash_type='fnv',
250
+ mode='train',
251
+ keys=('coord', 'color', 'normal', 'segment', 'image_coord',
252
+ 'image_mask'),
253
+ return_grid_coord=True),
254
+ dict(type='CenterShift', apply_z=False),
255
+ dict(type='NormalizeColor'),
256
+ dict(type='ToTensor'),
257
+ dict(
258
+ type='Collect',
259
+ keys=('coord', 'grid_coord', 'segment', 'image', 'image_coord',
260
+ 'image_mask'),
261
+ feat_keys=('color', 'normal'))
262
+ ],
263
+ test_mode=False),
264
+ test=dict(
265
+ type='ScanNet200Dataset',
266
+ split='val',
267
+ data_root='data/scannet',
268
+ with_images=10,
269
+ transform=[
270
+ dict(type='CenterShift', apply_z=True),
271
+ dict(type='NormalizeColor')
272
+ ],
273
+ test_mode=True,
274
+ test_cfg=dict(
275
+ voxelize=dict(
276
+ type='GridSample',
277
+ grid_size=0.02,
278
+ hash_type='fnv',
279
+ mode='test',
280
+ keys=('coord', 'color', 'normal', 'image_coord', 'image_mask'),
281
+ return_grid_coord=True),
282
+ crop=None,
283
+ post_transform=[
284
+ dict(type='ImageResize', size=[480, 640]),
285
+ dict(type='ImageNormalize'),
286
+ dict(type='CenterShift', apply_z=False),
287
+ dict(type='ToTensor'),
288
+ dict(
289
+ type='Collect',
290
+ keys=('coord', 'grid_coord', 'index', 'image',
291
+ 'image_coord', 'image_mask'),
292
+ feat_keys=('color', 'normal'))
293
+ ],
294
+ aug_transform=[[{
295
+ 'type': 'RandomRotateTargetAngle',
296
+ 'angle': [0],
297
+ 'axis': 'z',
298
+ 'center': [0, 0, 0],
299
+ 'p': 1
300
+ }],
301
+ [{
302
+ 'type': 'RandomRotateTargetAngle',
303
+ 'angle': [0.5],
304
+ 'axis': 'z',
305
+ 'center': [0, 0, 0],
306
+ 'p': 1
307
+ }],
308
+ [{
309
+ 'type': 'RandomRotateTargetAngle',
310
+ 'angle': [1],
311
+ 'axis': 'z',
312
+ 'center': [0, 0, 0],
313
+ 'p': 1
314
+ }],
315
+ [{
316
+ 'type': 'RandomRotateTargetAngle',
317
+ 'angle': [1.5],
318
+ 'axis': 'z',
319
+ 'center': [0, 0, 0],
320
+ 'p': 1
321
+ }],
322
+ [{
323
+ 'type': 'RandomRotateTargetAngle',
324
+ 'angle': [0],
325
+ 'axis': 'z',
326
+ 'center': [0, 0, 0],
327
+ 'p': 1
328
+ }, {
329
+ 'type': 'RandomScale',
330
+ 'scale': [0.95, 0.95]
331
+ }],
332
+ [{
333
+ 'type': 'RandomRotateTargetAngle',
334
+ 'angle': [0.5],
335
+ 'axis': 'z',
336
+ 'center': [0, 0, 0],
337
+ 'p': 1
338
+ }, {
339
+ 'type': 'RandomScale',
340
+ 'scale': [0.95, 0.95]
341
+ }],
342
+ [{
343
+ 'type': 'RandomRotateTargetAngle',
344
+ 'angle': [1],
345
+ 'axis': 'z',
346
+ 'center': [0, 0, 0],
347
+ 'p': 1
348
+ }, {
349
+ 'type': 'RandomScale',
350
+ 'scale': [0.95, 0.95]
351
+ }],
352
+ [{
353
+ 'type': 'RandomRotateTargetAngle',
354
+ 'angle': [1.5],
355
+ 'axis': 'z',
356
+ 'center': [0, 0, 0],
357
+ 'p': 1
358
+ }, {
359
+ 'type': 'RandomScale',
360
+ 'scale': [0.95, 0.95]
361
+ }],
362
+ [{
363
+ 'type': 'RandomRotateTargetAngle',
364
+ 'angle': [0],
365
+ 'axis': 'z',
366
+ 'center': [0, 0, 0],
367
+ 'p': 1
368
+ }, {
369
+ 'type': 'RandomScale',
370
+ 'scale': [1.05, 1.05]
371
+ }],
372
+ [{
373
+ 'type': 'RandomRotateTargetAngle',
374
+ 'angle': [0.5],
375
+ 'axis': 'z',
376
+ 'center': [0, 0, 0],
377
+ 'p': 1
378
+ }, {
379
+ 'type': 'RandomScale',
380
+ 'scale': [1.05, 1.05]
381
+ }],
382
+ [{
383
+ 'type': 'RandomRotateTargetAngle',
384
+ 'angle': [1],
385
+ 'axis': 'z',
386
+ 'center': [0, 0, 0],
387
+ 'p': 1
388
+ }, {
389
+ 'type': 'RandomScale',
390
+ 'scale': [1.05, 1.05]
391
+ }],
392
+ [{
393
+ 'type': 'RandomRotateTargetAngle',
394
+ 'angle': [1.5],
395
+ 'axis': 'z',
396
+ 'center': [0, 0, 0],
397
+ 'p': 1
398
+ }, {
399
+ 'type': 'RandomScale',
400
+ 'scale': [1.05, 1.05]
401
+ }], [{
402
+ 'type': 'RandomFlip',
403
+ 'p': 1
404
+ }]])))
scannet200/semseg-ptv3_dinov3-L/events.out.tfevents.1755298870.n23g0015.hpc.itc.rwth-aachen.de ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f493c0982e44631c551c89a32f9a4dc23a1c7160b7515e5a06ab86f8ea6eb85
3
+ size 7830557
scannet200/semseg-ptv3_dinov3-L/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c583ee37e761b9695297c2a42e27220db4dd2fbc5b2d1c76b32cc9bf2ca61b6
3
+ size 561061040
scannet200/semseg-ptv3_dinov3-L/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e000a839a685c67c1c5d271f1260a10a6eb886b9d106ec4eb2aba4e317d77062
3
+ size 561061040
scannet200/semseg-ptv3_dinov3-L/train.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c03021b0354343cc40c6add98d7929be537a03f81e8e8114e8e3e03e567d2cd6
3
+ size 18715664